{ "best_global_step": 80000, "best_metric": 0.09788688, "best_model_checkpoint": "/home/fit02/dien_workspace/output/v6-20260117-103936/checkpoint-80000", "epoch": 1.3834726893844411, "eval_steps": 40000, "global_step": 160000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.646704308652757e-06, "grad_norm": 30.69170415356784, "learning_rate": 5.9999999997232855e-06, "loss": 0.505859375, "step": 1 }, { "epoch": 4.323352154326379e-05, "grad_norm": 22.980350177112115, "learning_rate": 5.999999993082132e-06, "loss": 0.31048583984375, "step": 5 }, { "epoch": 8.646704308652757e-05, "grad_norm": 29.166429649875333, "learning_rate": 5.999999972328531e-06, "loss": 0.32708740234375, "step": 10 }, { "epoch": 0.00012970056462979136, "grad_norm": 25.540202001742195, "learning_rate": 5.999999937739193e-06, "loss": 0.257080078125, "step": 15 }, { "epoch": 0.00017293408617305515, "grad_norm": 19.16529881734912, "learning_rate": 5.999999889314121e-06, "loss": 0.2935546875, "step": 20 }, { "epoch": 0.00021616760771631894, "grad_norm": 17.634208453632606, "learning_rate": 5.999999827053315e-06, "loss": 0.13309326171875, "step": 25 }, { "epoch": 0.0002594011292595827, "grad_norm": 42.13036139216939, "learning_rate": 5.999999750956776e-06, "loss": 0.20992431640625, "step": 30 }, { "epoch": 0.0003026346508028465, "grad_norm": 23.019797090648016, "learning_rate": 5.9999996610245e-06, "loss": 0.19864501953125, "step": 35 }, { "epoch": 0.0003458681723461103, "grad_norm": 15.708704613434696, "learning_rate": 5.9999995572564936e-06, "loss": 0.2221435546875, "step": 40 }, { "epoch": 0.00038910169388937406, "grad_norm": 7.986770170379777, "learning_rate": 5.999999439652753e-06, "loss": 0.06697998046875, "step": 45 }, { "epoch": 0.0004323352154326379, "grad_norm": 59.808383045400916, "learning_rate": 5.99999930821328e-06, "loss": 0.191656494140625, "step": 50 }, { "epoch": 0.00047556873697590164, "grad_norm": 50.73213652704851, "learning_rate": 5.9999991629380766e-06, "loss": 0.386651611328125, "step": 55 }, { "epoch": 0.0005188022585191654, "grad_norm": 5.384840576299853, "learning_rate": 5.999999003827141e-06, "loss": 0.2929443359375, "step": 60 }, { "epoch": 0.0005620357800624292, "grad_norm": 43.321363821301375, "learning_rate": 5.999998830880475e-06, "loss": 0.216986083984375, "step": 65 }, { "epoch": 0.000605269301605693, "grad_norm": 58.35276226004044, "learning_rate": 5.99999864409808e-06, "loss": 0.4148681640625, "step": 70 }, { "epoch": 0.0006485028231489568, "grad_norm": 2.863501468177139, "learning_rate": 5.999998443479956e-06, "loss": 0.278558349609375, "step": 75 }, { "epoch": 0.0006917363446922206, "grad_norm": 17.264981633685537, "learning_rate": 5.999998229026104e-06, "loss": 0.14821014404296876, "step": 80 }, { "epoch": 0.0007349698662354843, "grad_norm": 2.093651587846957, "learning_rate": 5.999998000736526e-06, "loss": 0.185540771484375, "step": 85 }, { "epoch": 0.0007782033877787481, "grad_norm": 79.22963800616513, "learning_rate": 5.999997758611221e-06, "loss": 0.47708587646484374, "step": 90 }, { "epoch": 0.0008214369093220119, "grad_norm": 6.427005753081516, "learning_rate": 5.999997502650193e-06, "loss": 0.219439697265625, "step": 95 }, { "epoch": 0.0008646704308652757, "grad_norm": 64.43567867823378, "learning_rate": 5.999997232853442e-06, "loss": 0.28521728515625, "step": 100 }, { "epoch": 0.0009079039524085395, "grad_norm": 11.067257610006422, "learning_rate": 5.999996949220967e-06, "loss": 0.306146240234375, "step": 105 }, { "epoch": 0.0009511374739518033, "grad_norm": 34.743812499121766, "learning_rate": 5.999996651752772e-06, "loss": 0.14287109375, "step": 110 }, { "epoch": 0.000994370995495067, "grad_norm": 83.73033951086502, "learning_rate": 5.999996340448857e-06, "loss": 0.140740966796875, "step": 115 }, { "epoch": 0.001037604517038331, "grad_norm": 44.038674453882734, "learning_rate": 5.999996015309224e-06, "loss": 0.220672607421875, "step": 120 }, { "epoch": 0.0010808380385815947, "grad_norm": 43.2638646220819, "learning_rate": 5.999995676333876e-06, "loss": 0.191485595703125, "step": 125 }, { "epoch": 0.0011240715601248583, "grad_norm": 49.620883431180424, "learning_rate": 5.999995323522812e-06, "loss": 0.4044708251953125, "step": 130 }, { "epoch": 0.0011673050816681221, "grad_norm": 4.829138713687337, "learning_rate": 5.9999949568760346e-06, "loss": 0.109930419921875, "step": 135 }, { "epoch": 0.001210538603211386, "grad_norm": 41.413917854690894, "learning_rate": 5.999994576393546e-06, "loss": 0.19189300537109374, "step": 140 }, { "epoch": 0.0012537721247546498, "grad_norm": 20.57569994155041, "learning_rate": 5.999994182075346e-06, "loss": 0.08933563232421875, "step": 145 }, { "epoch": 0.0012970056462979136, "grad_norm": 30.895044938584874, "learning_rate": 5.999993773921439e-06, "loss": 0.24969482421875, "step": 150 }, { "epoch": 0.0013402391678411774, "grad_norm": 71.1664966365371, "learning_rate": 5.9999933519318255e-06, "loss": 0.14020614624023436, "step": 155 }, { "epoch": 0.0013834726893844412, "grad_norm": 54.46453245776618, "learning_rate": 5.999992916106509e-06, "loss": 0.40771484375, "step": 160 }, { "epoch": 0.0014267062109277048, "grad_norm": 5.672531312473166, "learning_rate": 5.999992466445488e-06, "loss": 0.0408233642578125, "step": 165 }, { "epoch": 0.0014699397324709686, "grad_norm": 11.395618852789942, "learning_rate": 5.999992002948768e-06, "loss": 0.4129852294921875, "step": 170 }, { "epoch": 0.0015131732540142324, "grad_norm": 11.485529243034025, "learning_rate": 5.999991525616351e-06, "loss": 0.22171173095703126, "step": 175 }, { "epoch": 0.0015564067755574962, "grad_norm": 14.686943348690539, "learning_rate": 5.999991034448237e-06, "loss": 0.4372314453125, "step": 180 }, { "epoch": 0.00159964029710076, "grad_norm": 57.39095628738474, "learning_rate": 5.99999052944443e-06, "loss": 0.517047119140625, "step": 185 }, { "epoch": 0.0016428738186440239, "grad_norm": 14.13548483872567, "learning_rate": 5.999990010604931e-06, "loss": 0.36998291015625, "step": 190 }, { "epoch": 0.0016861073401872877, "grad_norm": 22.16445332165216, "learning_rate": 5.999989477929744e-06, "loss": 0.1513214111328125, "step": 195 }, { "epoch": 0.0017293408617305515, "grad_norm": 8.781857891244496, "learning_rate": 5.9999889314188696e-06, "loss": 0.16379547119140625, "step": 200 }, { "epoch": 0.001772574383273815, "grad_norm": 23.81075639926766, "learning_rate": 5.999988371072312e-06, "loss": 0.7389820098876954, "step": 205 }, { "epoch": 0.001815807904817079, "grad_norm": 7.547940479565619, "learning_rate": 5.999987796890073e-06, "loss": 0.2939453125, "step": 210 }, { "epoch": 0.0018590414263603427, "grad_norm": 124.21452023636606, "learning_rate": 5.9999872088721554e-06, "loss": 0.3075439453125, "step": 215 }, { "epoch": 0.0019022749479036065, "grad_norm": 16.918210419501165, "learning_rate": 5.999986607018561e-06, "loss": 0.096478271484375, "step": 220 }, { "epoch": 0.0019455084694468704, "grad_norm": 46.28089864798654, "learning_rate": 5.999985991329295e-06, "loss": 0.43441162109375, "step": 225 }, { "epoch": 0.001988741990990134, "grad_norm": 2.4646719744439065, "learning_rate": 5.999985361804358e-06, "loss": 0.17896728515625, "step": 230 }, { "epoch": 0.0020319755125333978, "grad_norm": 33.56566476498144, "learning_rate": 5.999984718443753e-06, "loss": 0.142181396484375, "step": 235 }, { "epoch": 0.002075209034076662, "grad_norm": 72.94922586819679, "learning_rate": 5.999984061247485e-06, "loss": 0.373876953125, "step": 240 }, { "epoch": 0.0021184425556199254, "grad_norm": 9.187099491791454, "learning_rate": 5.999983390215554e-06, "loss": 0.103594970703125, "step": 245 }, { "epoch": 0.0021616760771631894, "grad_norm": 12.75558723984392, "learning_rate": 5.999982705347966e-06, "loss": 0.1081939697265625, "step": 250 }, { "epoch": 0.002204909598706453, "grad_norm": 3.7277787985900415, "learning_rate": 5.999982006644723e-06, "loss": 0.2233367919921875, "step": 255 }, { "epoch": 0.0022481431202497166, "grad_norm": 52.35047885562998, "learning_rate": 5.999981294105827e-06, "loss": 0.31395263671875, "step": 260 }, { "epoch": 0.0022913766417929807, "grad_norm": 51.31902652752893, "learning_rate": 5.999980567731282e-06, "loss": 0.5368408203125, "step": 265 }, { "epoch": 0.0023346101633362443, "grad_norm": 3.6058958839020856, "learning_rate": 5.999979827521092e-06, "loss": 0.27401123046875, "step": 270 }, { "epoch": 0.0023778436848795083, "grad_norm": 4.942259379887434, "learning_rate": 5.999979073475261e-06, "loss": 0.4035919189453125, "step": 275 }, { "epoch": 0.002421077206422772, "grad_norm": 21.78749595605452, "learning_rate": 5.999978305593791e-06, "loss": 0.35301513671875, "step": 280 }, { "epoch": 0.002464310727966036, "grad_norm": 32.31384584652121, "learning_rate": 5.999977523876686e-06, "loss": 0.221697998046875, "step": 285 }, { "epoch": 0.0025075442495092995, "grad_norm": 12.4228521160173, "learning_rate": 5.99997672832395e-06, "loss": 0.3292724609375, "step": 290 }, { "epoch": 0.002550777771052563, "grad_norm": 15.745172736244191, "learning_rate": 5.999975918935587e-06, "loss": 0.09034347534179688, "step": 295 }, { "epoch": 0.002594011292595827, "grad_norm": 2.6214674889334706, "learning_rate": 5.999975095711599e-06, "loss": 0.07076263427734375, "step": 300 }, { "epoch": 0.0026372448141390907, "grad_norm": 12.793173250259834, "learning_rate": 5.999974258651991e-06, "loss": 0.06512451171875, "step": 305 }, { "epoch": 0.0026804783356823548, "grad_norm": 4.831093065909661, "learning_rate": 5.999973407756767e-06, "loss": 0.5762359619140625, "step": 310 }, { "epoch": 0.0027237118572256184, "grad_norm": 31.90614887106872, "learning_rate": 5.999972543025932e-06, "loss": 0.32931060791015626, "step": 315 }, { "epoch": 0.0027669453787688824, "grad_norm": 27.423380205763962, "learning_rate": 5.999971664459487e-06, "loss": 0.22844696044921875, "step": 320 }, { "epoch": 0.002810178900312146, "grad_norm": 19.73334312165792, "learning_rate": 5.9999707720574394e-06, "loss": 0.175164794921875, "step": 325 }, { "epoch": 0.0028534124218554096, "grad_norm": 172.6882312223815, "learning_rate": 5.99996986581979e-06, "loss": 0.4881591796875, "step": 330 }, { "epoch": 0.0028966459433986736, "grad_norm": 15.8953437719488, "learning_rate": 5.999968945746546e-06, "loss": 0.25614013671875, "step": 335 }, { "epoch": 0.0029398794649419372, "grad_norm": 5.985020754629737, "learning_rate": 5.999968011837709e-06, "loss": 0.3195068359375, "step": 340 }, { "epoch": 0.0029831129864852013, "grad_norm": 24.417583069972004, "learning_rate": 5.999967064093286e-06, "loss": 0.13448715209960938, "step": 345 }, { "epoch": 0.003026346508028465, "grad_norm": 8.508531029632286, "learning_rate": 5.9999661025132785e-06, "loss": 0.36274261474609376, "step": 350 }, { "epoch": 0.003069580029571729, "grad_norm": 89.15089036281591, "learning_rate": 5.999965127097694e-06, "loss": 0.458349609375, "step": 355 }, { "epoch": 0.0031128135511149925, "grad_norm": 23.51598237167435, "learning_rate": 5.999964137846535e-06, "loss": 0.16612548828125, "step": 360 }, { "epoch": 0.0031560470726582565, "grad_norm": 38.905739566655186, "learning_rate": 5.999963134759806e-06, "loss": 0.2647216796875, "step": 365 }, { "epoch": 0.00319928059420152, "grad_norm": 19.04730139017457, "learning_rate": 5.999962117837512e-06, "loss": 0.1453857421875, "step": 370 }, { "epoch": 0.0032425141157447837, "grad_norm": 11.111614214191018, "learning_rate": 5.9999610870796585e-06, "loss": 0.28504638671875, "step": 375 }, { "epoch": 0.0032857476372880477, "grad_norm": 22.8135530514771, "learning_rate": 5.999960042486248e-06, "loss": 0.2598423004150391, "step": 380 }, { "epoch": 0.0033289811588313113, "grad_norm": 41.89682021345395, "learning_rate": 5.999958984057289e-06, "loss": 0.485400390625, "step": 385 }, { "epoch": 0.0033722146803745754, "grad_norm": 45.914598586876295, "learning_rate": 5.999957911792782e-06, "loss": 0.29040069580078126, "step": 390 }, { "epoch": 0.003415448201917839, "grad_norm": 39.63056754751017, "learning_rate": 5.999956825692736e-06, "loss": 0.39171600341796875, "step": 395 }, { "epoch": 0.003458681723461103, "grad_norm": 4.707189182427223, "learning_rate": 5.999955725757154e-06, "loss": 0.1468475341796875, "step": 400 }, { "epoch": 0.0035019152450043666, "grad_norm": 26.445785277572213, "learning_rate": 5.99995461198604e-06, "loss": 0.2367889404296875, "step": 405 }, { "epoch": 0.00354514876654763, "grad_norm": 12.109686624679865, "learning_rate": 5.999953484379402e-06, "loss": 0.4776611328125, "step": 410 }, { "epoch": 0.0035883822880908942, "grad_norm": 12.170216333017896, "learning_rate": 5.999952342937243e-06, "loss": 0.138092041015625, "step": 415 }, { "epoch": 0.003631615809634158, "grad_norm": 38.601119410712485, "learning_rate": 5.999951187659568e-06, "loss": 0.22474365234375, "step": 420 }, { "epoch": 0.003674849331177422, "grad_norm": 25.054794974028642, "learning_rate": 5.9999500185463844e-06, "loss": 0.4101806640625, "step": 425 }, { "epoch": 0.0037180828527206855, "grad_norm": 59.21161038143297, "learning_rate": 5.999948835597696e-06, "loss": 0.3397705078125, "step": 430 }, { "epoch": 0.0037613163742639495, "grad_norm": 35.23166003766818, "learning_rate": 5.999947638813508e-06, "loss": 0.3279052734375, "step": 435 }, { "epoch": 0.003804549895807213, "grad_norm": 62.17118732230262, "learning_rate": 5.999946428193828e-06, "loss": 0.514801025390625, "step": 440 }, { "epoch": 0.0038477834173504767, "grad_norm": 5.970692363049215, "learning_rate": 5.999945203738658e-06, "loss": 0.1433807373046875, "step": 445 }, { "epoch": 0.0038910169388937407, "grad_norm": 54.50492844096543, "learning_rate": 5.999943965448008e-06, "loss": 0.2858154296875, "step": 450 }, { "epoch": 0.003934250460437005, "grad_norm": 100.32593503048905, "learning_rate": 5.99994271332188e-06, "loss": 0.1806488037109375, "step": 455 }, { "epoch": 0.003977483981980268, "grad_norm": 1.3462781791096388, "learning_rate": 5.999941447360282e-06, "loss": 0.2369781494140625, "step": 460 }, { "epoch": 0.004020717503523532, "grad_norm": 7.007631765906755, "learning_rate": 5.999940167563219e-06, "loss": 0.208935546875, "step": 465 }, { "epoch": 0.0040639510250667955, "grad_norm": 16.28267382630778, "learning_rate": 5.999938873930698e-06, "loss": 0.404833984375, "step": 470 }, { "epoch": 0.00410718454661006, "grad_norm": 14.976406890743995, "learning_rate": 5.999937566462722e-06, "loss": 0.291845703125, "step": 475 }, { "epoch": 0.004150418068153324, "grad_norm": 26.025086042114182, "learning_rate": 5.9999362451593e-06, "loss": 0.35698928833007815, "step": 480 }, { "epoch": 0.004193651589696587, "grad_norm": 27.66939981816912, "learning_rate": 5.999934910020438e-06, "loss": 0.167236328125, "step": 485 }, { "epoch": 0.004236885111239851, "grad_norm": 7.592517524850562, "learning_rate": 5.9999335610461404e-06, "loss": 0.20546875, "step": 490 }, { "epoch": 0.004280118632783114, "grad_norm": 18.41087947579556, "learning_rate": 5.999932198236414e-06, "loss": 0.43670654296875, "step": 495 }, { "epoch": 0.004323352154326379, "grad_norm": 24.5694551896633, "learning_rate": 5.999930821591265e-06, "loss": 0.30594482421875, "step": 500 }, { "epoch": 0.0043665856758696425, "grad_norm": 8.095371086322404, "learning_rate": 5.999929431110702e-06, "loss": 0.28787841796875, "step": 505 }, { "epoch": 0.004409819197412906, "grad_norm": 20.065422620793658, "learning_rate": 5.9999280267947276e-06, "loss": 0.1177276611328125, "step": 510 }, { "epoch": 0.00445305271895617, "grad_norm": 55.29587224543418, "learning_rate": 5.999926608643351e-06, "loss": 0.28187026977539065, "step": 515 }, { "epoch": 0.004496286240499433, "grad_norm": 7.05125009919743, "learning_rate": 5.999925176656577e-06, "loss": 0.1441436767578125, "step": 520 }, { "epoch": 0.004539519762042698, "grad_norm": 2.640319149265586, "learning_rate": 5.999923730834416e-06, "loss": 0.184912109375, "step": 525 }, { "epoch": 0.004582753283585961, "grad_norm": 104.68263246045126, "learning_rate": 5.999922271176869e-06, "loss": 0.38912353515625, "step": 530 }, { "epoch": 0.004625986805129225, "grad_norm": 94.72144136028274, "learning_rate": 5.999920797683947e-06, "loss": 0.39127197265625, "step": 535 }, { "epoch": 0.0046692203266724885, "grad_norm": 75.7092843557183, "learning_rate": 5.999919310355655e-06, "loss": 0.4608001708984375, "step": 540 }, { "epoch": 0.004712453848215753, "grad_norm": 22.266064727334527, "learning_rate": 5.999917809191999e-06, "loss": 0.15302734375, "step": 545 }, { "epoch": 0.004755687369759017, "grad_norm": 45.11106371094184, "learning_rate": 5.999916294192989e-06, "loss": 0.259820556640625, "step": 550 }, { "epoch": 0.00479892089130228, "grad_norm": 4.909265146104176, "learning_rate": 5.9999147653586295e-06, "loss": 0.3109893798828125, "step": 555 }, { "epoch": 0.004842154412845544, "grad_norm": 1.0672665376974728, "learning_rate": 5.999913222688928e-06, "loss": 0.051959228515625, "step": 560 }, { "epoch": 0.004885387934388807, "grad_norm": 45.683059746935946, "learning_rate": 5.999911666183891e-06, "loss": 0.18974609375, "step": 565 }, { "epoch": 0.004928621455932072, "grad_norm": 24.025690453444195, "learning_rate": 5.999910095843528e-06, "loss": 0.2554901123046875, "step": 570 }, { "epoch": 0.004971854977475335, "grad_norm": 43.25082123641289, "learning_rate": 5.999908511667844e-06, "loss": 0.2219635009765625, "step": 575 }, { "epoch": 0.005015088499018599, "grad_norm": 146.18675890665943, "learning_rate": 5.999906913656847e-06, "loss": 0.3737396240234375, "step": 580 }, { "epoch": 0.005058322020561863, "grad_norm": 2.5878316041822926, "learning_rate": 5.999905301810545e-06, "loss": 0.11121063232421875, "step": 585 }, { "epoch": 0.005101555542105126, "grad_norm": 2.4215333324042754, "learning_rate": 5.999903676128943e-06, "loss": 0.58470458984375, "step": 590 }, { "epoch": 0.005144789063648391, "grad_norm": 5.32583396343559, "learning_rate": 5.9999020366120526e-06, "loss": 0.477655029296875, "step": 595 }, { "epoch": 0.005188022585191654, "grad_norm": 67.80778601240252, "learning_rate": 5.999900383259878e-06, "loss": 0.19234619140625, "step": 600 }, { "epoch": 0.005231256106734918, "grad_norm": 0.7418958391673961, "learning_rate": 5.999898716072428e-06, "loss": 0.3938690185546875, "step": 605 }, { "epoch": 0.0052744896282781815, "grad_norm": 98.0219089287339, "learning_rate": 5.99989703504971e-06, "loss": 0.41383056640625, "step": 610 }, { "epoch": 0.005317723149821446, "grad_norm": 17.363445059207415, "learning_rate": 5.999895340191732e-06, "loss": 0.369244384765625, "step": 615 }, { "epoch": 0.0053609566713647095, "grad_norm": 4.342789793823146, "learning_rate": 5.999893631498502e-06, "loss": 0.28660888671875, "step": 620 }, { "epoch": 0.005404190192907973, "grad_norm": 26.04147071053806, "learning_rate": 5.999891908970028e-06, "loss": 0.14121551513671876, "step": 625 }, { "epoch": 0.005447423714451237, "grad_norm": 5.65839544643318, "learning_rate": 5.999890172606317e-06, "loss": 0.5223907470703125, "step": 630 }, { "epoch": 0.0054906572359945, "grad_norm": 7.828919738853862, "learning_rate": 5.999888422407378e-06, "loss": 0.333868408203125, "step": 635 }, { "epoch": 0.005533890757537765, "grad_norm": 72.75611269735138, "learning_rate": 5.9998866583732176e-06, "loss": 0.337396240234375, "step": 640 }, { "epoch": 0.005577124279081028, "grad_norm": 35.186674621256515, "learning_rate": 5.999884880503846e-06, "loss": 0.368280029296875, "step": 645 }, { "epoch": 0.005620357800624292, "grad_norm": 5.20784279506819, "learning_rate": 5.99988308879927e-06, "loss": 0.360693359375, "step": 650 }, { "epoch": 0.005663591322167556, "grad_norm": 15.782033584653481, "learning_rate": 5.999881283259499e-06, "loss": 0.39125213623046873, "step": 655 }, { "epoch": 0.005706824843710819, "grad_norm": 53.369411713990644, "learning_rate": 5.999879463884539e-06, "loss": 0.1732421875, "step": 660 }, { "epoch": 0.005750058365254084, "grad_norm": 12.451639573425085, "learning_rate": 5.999877630674402e-06, "loss": 0.44468841552734373, "step": 665 }, { "epoch": 0.005793291886797347, "grad_norm": 35.45567170416376, "learning_rate": 5.999875783629094e-06, "loss": 0.243359375, "step": 670 }, { "epoch": 0.005836525408340611, "grad_norm": 3.551875681913648, "learning_rate": 5.999873922748623e-06, "loss": 0.28390960693359374, "step": 675 }, { "epoch": 0.0058797589298838744, "grad_norm": 71.72780152014747, "learning_rate": 5.999872048032999e-06, "loss": 0.4650390625, "step": 680 }, { "epoch": 0.005922992451427139, "grad_norm": 28.439718243153028, "learning_rate": 5.99987015948223e-06, "loss": 0.525933837890625, "step": 685 }, { "epoch": 0.0059662259729704025, "grad_norm": 9.308143929796302, "learning_rate": 5.999868257096325e-06, "loss": 0.2978759765625, "step": 690 }, { "epoch": 0.006009459494513666, "grad_norm": 66.1218945792235, "learning_rate": 5.999866340875293e-06, "loss": 0.2255157470703125, "step": 695 }, { "epoch": 0.00605269301605693, "grad_norm": 0.6193870996660518, "learning_rate": 5.999864410819143e-06, "loss": 0.253424072265625, "step": 700 }, { "epoch": 0.006095926537600193, "grad_norm": 3.1761570674527007, "learning_rate": 5.999862466927882e-06, "loss": 0.07955322265625, "step": 705 }, { "epoch": 0.006139160059143458, "grad_norm": 5.042210254693577, "learning_rate": 5.999860509201521e-06, "loss": 0.0719482421875, "step": 710 }, { "epoch": 0.006182393580686721, "grad_norm": 6.131410208880404, "learning_rate": 5.999858537640068e-06, "loss": 0.2712646484375, "step": 715 }, { "epoch": 0.006225627102229985, "grad_norm": 1.8233141375774047, "learning_rate": 5.999856552243534e-06, "loss": 0.2665771484375, "step": 720 }, { "epoch": 0.0062688606237732486, "grad_norm": 76.74344934360947, "learning_rate": 5.999854553011924e-06, "loss": 0.2696441650390625, "step": 725 }, { "epoch": 0.006312094145316513, "grad_norm": 13.025206750127396, "learning_rate": 5.999852539945252e-06, "loss": 0.28521080017089845, "step": 730 }, { "epoch": 0.006355327666859777, "grad_norm": 16.161631126502122, "learning_rate": 5.999850513043525e-06, "loss": 0.27386474609375, "step": 735 }, { "epoch": 0.00639856118840304, "grad_norm": 31.503007878965455, "learning_rate": 5.999848472306752e-06, "loss": 0.36317138671875, "step": 740 }, { "epoch": 0.006441794709946304, "grad_norm": 4.948330129607184, "learning_rate": 5.999846417734942e-06, "loss": 0.095733642578125, "step": 745 }, { "epoch": 0.006485028231489567, "grad_norm": 2.114668662787215, "learning_rate": 5.999844349328107e-06, "loss": 0.6122505187988281, "step": 750 }, { "epoch": 0.006528261753032832, "grad_norm": 20.166554245920192, "learning_rate": 5.999842267086255e-06, "loss": 0.241546630859375, "step": 755 }, { "epoch": 0.0065714952745760955, "grad_norm": 40.43616428222124, "learning_rate": 5.999840171009395e-06, "loss": 0.2141693115234375, "step": 760 }, { "epoch": 0.006614728796119359, "grad_norm": 24.586387374951368, "learning_rate": 5.999838061097536e-06, "loss": 0.5022216796875, "step": 765 }, { "epoch": 0.006657962317662623, "grad_norm": 49.84216913762198, "learning_rate": 5.999835937350691e-06, "loss": 0.15997314453125, "step": 770 }, { "epoch": 0.006701195839205886, "grad_norm": 11.00765055184327, "learning_rate": 5.999833799768866e-06, "loss": 0.248175048828125, "step": 775 }, { "epoch": 0.006744429360749151, "grad_norm": 4.735034113145377, "learning_rate": 5.999831648352074e-06, "loss": 0.2687744140625, "step": 780 }, { "epoch": 0.006787662882292414, "grad_norm": 31.405379607172698, "learning_rate": 5.999829483100323e-06, "loss": 0.2655517578125, "step": 785 }, { "epoch": 0.006830896403835678, "grad_norm": 74.39410325947077, "learning_rate": 5.999827304013623e-06, "loss": 0.34338226318359377, "step": 790 }, { "epoch": 0.0068741299253789415, "grad_norm": 2.4621660609169136, "learning_rate": 5.999825111091984e-06, "loss": 0.2324462890625, "step": 795 }, { "epoch": 0.006917363446922206, "grad_norm": 107.31925187423163, "learning_rate": 5.999822904335418e-06, "loss": 0.1255157470703125, "step": 800 }, { "epoch": 0.00696059696846547, "grad_norm": 32.164245582533155, "learning_rate": 5.999820683743934e-06, "loss": 0.23878173828125, "step": 805 }, { "epoch": 0.007003830490008733, "grad_norm": 22.078191205559047, "learning_rate": 5.999818449317542e-06, "loss": 0.13090972900390624, "step": 810 }, { "epoch": 0.007047064011551997, "grad_norm": 72.31241491889341, "learning_rate": 5.999816201056252e-06, "loss": 0.14386978149414062, "step": 815 }, { "epoch": 0.00709029753309526, "grad_norm": 4.651251594342594, "learning_rate": 5.999813938960075e-06, "loss": 0.2696044921875, "step": 820 }, { "epoch": 0.007133531054638525, "grad_norm": 6.883180393041758, "learning_rate": 5.99981166302902e-06, "loss": 0.272430419921875, "step": 825 }, { "epoch": 0.0071767645761817885, "grad_norm": 14.08762748212726, "learning_rate": 5.999809373263099e-06, "loss": 0.2283477783203125, "step": 830 }, { "epoch": 0.007219998097725052, "grad_norm": 41.51844055302396, "learning_rate": 5.999807069662322e-06, "loss": 0.38051605224609375, "step": 835 }, { "epoch": 0.007263231619268316, "grad_norm": 12.905600081627105, "learning_rate": 5.999804752226702e-06, "loss": 0.32807350158691406, "step": 840 }, { "epoch": 0.007306465140811579, "grad_norm": 0.4320694357815857, "learning_rate": 5.999802420956245e-06, "loss": 0.302911376953125, "step": 845 }, { "epoch": 0.007349698662354844, "grad_norm": 18.930970394887773, "learning_rate": 5.999800075850966e-06, "loss": 0.305194091796875, "step": 850 }, { "epoch": 0.007392932183898107, "grad_norm": 34.91647231204405, "learning_rate": 5.999797716910873e-06, "loss": 0.3130706787109375, "step": 855 }, { "epoch": 0.007436165705441371, "grad_norm": 22.406638809889063, "learning_rate": 5.999795344135979e-06, "loss": 0.1224456787109375, "step": 860 }, { "epoch": 0.0074793992269846345, "grad_norm": 33.15621391923392, "learning_rate": 5.999792957526293e-06, "loss": 0.2252716064453125, "step": 865 }, { "epoch": 0.007522632748527899, "grad_norm": 4.424353936747384, "learning_rate": 5.999790557081828e-06, "loss": 0.26458740234375, "step": 870 }, { "epoch": 0.007565866270071163, "grad_norm": 5.889522150933046, "learning_rate": 5.999788142802593e-06, "loss": 0.1656982421875, "step": 875 }, { "epoch": 0.007609099791614426, "grad_norm": 30.56322408039023, "learning_rate": 5.999785714688601e-06, "loss": 0.093170166015625, "step": 880 }, { "epoch": 0.00765233331315769, "grad_norm": 20.59309019090293, "learning_rate": 5.999783272739862e-06, "loss": 0.09834136962890624, "step": 885 }, { "epoch": 0.007695566834700953, "grad_norm": 18.239529853311605, "learning_rate": 5.999780816956386e-06, "loss": 0.5489242553710938, "step": 890 }, { "epoch": 0.007738800356244218, "grad_norm": 16.08066098653268, "learning_rate": 5.999778347338189e-06, "loss": 0.15268402099609374, "step": 895 }, { "epoch": 0.007782033877787481, "grad_norm": 14.425071495842806, "learning_rate": 5.999775863885277e-06, "loss": 0.112255859375, "step": 900 }, { "epoch": 0.007825267399330746, "grad_norm": 23.24070537034975, "learning_rate": 5.999773366597665e-06, "loss": 0.51697998046875, "step": 905 }, { "epoch": 0.00786850092087401, "grad_norm": 59.54881762752419, "learning_rate": 5.999770855475363e-06, "loss": 0.49131011962890625, "step": 910 }, { "epoch": 0.007911734442417273, "grad_norm": 15.961179791448544, "learning_rate": 5.999768330518382e-06, "loss": 0.4754913330078125, "step": 915 }, { "epoch": 0.007954967963960537, "grad_norm": 72.57133358847081, "learning_rate": 5.9997657917267355e-06, "loss": 0.2086944580078125, "step": 920 }, { "epoch": 0.0079982014855038, "grad_norm": 28.634895707558478, "learning_rate": 5.999763239100434e-06, "loss": 0.16082763671875, "step": 925 }, { "epoch": 0.008041435007047064, "grad_norm": 0.584014889120324, "learning_rate": 5.99976067263949e-06, "loss": 0.08046035766601563, "step": 930 }, { "epoch": 0.008084668528590327, "grad_norm": 97.30786171180542, "learning_rate": 5.999758092343914e-06, "loss": 0.3278076171875, "step": 935 }, { "epoch": 0.008127902050133591, "grad_norm": 59.904623132945, "learning_rate": 5.999755498213719e-06, "loss": 0.38942947387695315, "step": 940 }, { "epoch": 0.008171135571676855, "grad_norm": 30.578488057608535, "learning_rate": 5.9997528902489164e-06, "loss": 0.196563720703125, "step": 945 }, { "epoch": 0.00821436909322012, "grad_norm": 14.266176267286063, "learning_rate": 5.99975026844952e-06, "loss": 0.1796142578125, "step": 950 }, { "epoch": 0.008257602614763384, "grad_norm": 22.992400191787212, "learning_rate": 5.999747632815539e-06, "loss": 0.27050094604492186, "step": 955 }, { "epoch": 0.008300836136306647, "grad_norm": 2.955753172032549, "learning_rate": 5.999744983346987e-06, "loss": 0.23094482421875, "step": 960 }, { "epoch": 0.00834406965784991, "grad_norm": 7.600958240839907, "learning_rate": 5.999742320043877e-06, "loss": 0.2449951171875, "step": 965 }, { "epoch": 0.008387303179393174, "grad_norm": 58.64480918015712, "learning_rate": 5.99973964290622e-06, "loss": 0.402081298828125, "step": 970 }, { "epoch": 0.008430536700936438, "grad_norm": 44.01683141941139, "learning_rate": 5.9997369519340285e-06, "loss": 0.281561279296875, "step": 975 }, { "epoch": 0.008473770222479702, "grad_norm": 46.166152068100416, "learning_rate": 5.9997342471273156e-06, "loss": 0.25382080078125, "step": 980 }, { "epoch": 0.008517003744022965, "grad_norm": 42.0060586563442, "learning_rate": 5.999731528486093e-06, "loss": 0.57188720703125, "step": 985 }, { "epoch": 0.008560237265566229, "grad_norm": 3.095408707900013, "learning_rate": 5.999728796010375e-06, "loss": 0.133538818359375, "step": 990 }, { "epoch": 0.008603470787109492, "grad_norm": 3.0507165717705815, "learning_rate": 5.999726049700171e-06, "loss": 0.098370361328125, "step": 995 }, { "epoch": 0.008646704308652758, "grad_norm": 17.947139622426086, "learning_rate": 5.999723289555497e-06, "loss": 0.160003662109375, "step": 1000 }, { "epoch": 0.008689937830196021, "grad_norm": 84.67710548030593, "learning_rate": 5.999720515576363e-06, "loss": 0.2561004638671875, "step": 1005 }, { "epoch": 0.008733171351739285, "grad_norm": 23.074629184408998, "learning_rate": 5.999717727762784e-06, "loss": 0.136962890625, "step": 1010 }, { "epoch": 0.008776404873282549, "grad_norm": 26.382286122496822, "learning_rate": 5.999714926114772e-06, "loss": 0.32681884765625, "step": 1015 }, { "epoch": 0.008819638394825812, "grad_norm": 3.0141095094232293, "learning_rate": 5.999712110632339e-06, "loss": 0.2332916259765625, "step": 1020 }, { "epoch": 0.008862871916369076, "grad_norm": 20.689290495989372, "learning_rate": 5.999709281315499e-06, "loss": 0.20615386962890625, "step": 1025 }, { "epoch": 0.00890610543791234, "grad_norm": 11.20497205622592, "learning_rate": 5.999706438164265e-06, "loss": 0.4220489501953125, "step": 1030 }, { "epoch": 0.008949338959455603, "grad_norm": 3.347052669388772, "learning_rate": 5.999703581178651e-06, "loss": 0.20501708984375, "step": 1035 }, { "epoch": 0.008992572480998866, "grad_norm": 70.06106735098851, "learning_rate": 5.999700710358667e-06, "loss": 0.55771484375, "step": 1040 }, { "epoch": 0.009035806002542132, "grad_norm": 55.89595688096393, "learning_rate": 5.99969782570433e-06, "loss": 0.46756591796875, "step": 1045 }, { "epoch": 0.009079039524085395, "grad_norm": 24.83565225010966, "learning_rate": 5.999694927215651e-06, "loss": 0.15169525146484375, "step": 1050 }, { "epoch": 0.009122273045628659, "grad_norm": 33.24935292598076, "learning_rate": 5.999692014892644e-06, "loss": 0.205999755859375, "step": 1055 }, { "epoch": 0.009165506567171923, "grad_norm": 7.421645167608678, "learning_rate": 5.999689088735323e-06, "loss": 0.2932830810546875, "step": 1060 }, { "epoch": 0.009208740088715186, "grad_norm": 15.726335377261643, "learning_rate": 5.9996861487437005e-06, "loss": 0.137652587890625, "step": 1065 }, { "epoch": 0.00925197361025845, "grad_norm": 24.614470965819542, "learning_rate": 5.999683194917791e-06, "loss": 0.195965576171875, "step": 1070 }, { "epoch": 0.009295207131801713, "grad_norm": 64.48784353053716, "learning_rate": 5.999680227257608e-06, "loss": 0.4311767578125, "step": 1075 }, { "epoch": 0.009338440653344977, "grad_norm": 8.751097579774013, "learning_rate": 5.999677245763164e-06, "loss": 0.250701904296875, "step": 1080 }, { "epoch": 0.00938167417488824, "grad_norm": 55.35934817811069, "learning_rate": 5.9996742504344744e-06, "loss": 0.3984375, "step": 1085 }, { "epoch": 0.009424907696431506, "grad_norm": 123.79157951459815, "learning_rate": 5.999671241271552e-06, "loss": 0.1496034622192383, "step": 1090 }, { "epoch": 0.00946814121797477, "grad_norm": 3.53469356066154, "learning_rate": 5.99966821827441e-06, "loss": 0.2462371826171875, "step": 1095 }, { "epoch": 0.009511374739518033, "grad_norm": 21.993612565870535, "learning_rate": 5.999665181443064e-06, "loss": 0.2023406982421875, "step": 1100 }, { "epoch": 0.009554608261061297, "grad_norm": 26.38205228527881, "learning_rate": 5.999662130777527e-06, "loss": 0.4201263427734375, "step": 1105 }, { "epoch": 0.00959784178260456, "grad_norm": 14.474197160523794, "learning_rate": 5.999659066277813e-06, "loss": 0.22752685546875, "step": 1110 }, { "epoch": 0.009641075304147824, "grad_norm": 2.8823871419252396, "learning_rate": 5.999655987943937e-06, "loss": 0.398052978515625, "step": 1115 }, { "epoch": 0.009684308825691088, "grad_norm": 14.747047255785882, "learning_rate": 5.999652895775913e-06, "loss": 0.115374755859375, "step": 1120 }, { "epoch": 0.009727542347234351, "grad_norm": 31.789122397574193, "learning_rate": 5.999649789773754e-06, "loss": 0.3745361328125, "step": 1125 }, { "epoch": 0.009770775868777615, "grad_norm": 7.0892270854232144, "learning_rate": 5.999646669937476e-06, "loss": 0.2119873046875, "step": 1130 }, { "epoch": 0.00981400939032088, "grad_norm": 4.638194919715848, "learning_rate": 5.999643536267092e-06, "loss": 0.200213623046875, "step": 1135 }, { "epoch": 0.009857242911864144, "grad_norm": 26.63062906754843, "learning_rate": 5.999640388762617e-06, "loss": 0.15408935546875, "step": 1140 }, { "epoch": 0.009900476433407407, "grad_norm": 1.4540781065775639, "learning_rate": 5.999637227424066e-06, "loss": 0.06665191650390626, "step": 1145 }, { "epoch": 0.00994370995495067, "grad_norm": 10.42894276512875, "learning_rate": 5.999634052251453e-06, "loss": 0.2409271240234375, "step": 1150 }, { "epoch": 0.009986943476493934, "grad_norm": 58.643651340084205, "learning_rate": 5.999630863244792e-06, "loss": 0.232476806640625, "step": 1155 }, { "epoch": 0.010030176998037198, "grad_norm": 15.568473533143043, "learning_rate": 5.999627660404099e-06, "loss": 0.3025848388671875, "step": 1160 }, { "epoch": 0.010073410519580462, "grad_norm": 11.25329402852256, "learning_rate": 5.999624443729389e-06, "loss": 0.539898681640625, "step": 1165 }, { "epoch": 0.010116644041123725, "grad_norm": 11.455207655728318, "learning_rate": 5.9996212132206765e-06, "loss": 0.0753448486328125, "step": 1170 }, { "epoch": 0.010159877562666989, "grad_norm": 5.202348763866147, "learning_rate": 5.999617968877975e-06, "loss": 0.2951515197753906, "step": 1175 }, { "epoch": 0.010203111084210252, "grad_norm": 23.992006705525387, "learning_rate": 5.9996147107013e-06, "loss": 0.10251846313476562, "step": 1180 }, { "epoch": 0.010246344605753518, "grad_norm": 24.188142249027837, "learning_rate": 5.9996114386906684e-06, "loss": 0.3426727294921875, "step": 1185 }, { "epoch": 0.010289578127296781, "grad_norm": 1.3848949914325466, "learning_rate": 5.999608152846093e-06, "loss": 0.084564208984375, "step": 1190 }, { "epoch": 0.010332811648840045, "grad_norm": 21.52213518324404, "learning_rate": 5.99960485316759e-06, "loss": 0.3713134765625, "step": 1195 }, { "epoch": 0.010376045170383309, "grad_norm": 30.147500882163293, "learning_rate": 5.999601539655174e-06, "loss": 0.33199462890625, "step": 1200 }, { "epoch": 0.010419278691926572, "grad_norm": 5.689513818134866, "learning_rate": 5.999598212308862e-06, "loss": 0.6667667388916015, "step": 1205 }, { "epoch": 0.010462512213469836, "grad_norm": 77.15368182019488, "learning_rate": 5.9995948711286675e-06, "loss": 0.760540771484375, "step": 1210 }, { "epoch": 0.0105057457350131, "grad_norm": 26.857963176765427, "learning_rate": 5.999591516114606e-06, "loss": 0.22756423950195312, "step": 1215 }, { "epoch": 0.010548979256556363, "grad_norm": 5.326991064879421, "learning_rate": 5.999588147266693e-06, "loss": 0.159930419921875, "step": 1220 }, { "epoch": 0.010592212778099627, "grad_norm": 25.940460627457906, "learning_rate": 5.999584764584947e-06, "loss": 0.13883132934570314, "step": 1225 }, { "epoch": 0.010635446299642892, "grad_norm": 21.2218639266948, "learning_rate": 5.999581368069378e-06, "loss": 0.23306884765625, "step": 1230 }, { "epoch": 0.010678679821186155, "grad_norm": 3.3248386950993662, "learning_rate": 5.999577957720007e-06, "loss": 0.2746124267578125, "step": 1235 }, { "epoch": 0.010721913342729419, "grad_norm": 57.441754710386185, "learning_rate": 5.999574533536847e-06, "loss": 0.364013671875, "step": 1240 }, { "epoch": 0.010765146864272683, "grad_norm": 2.317924901922467, "learning_rate": 5.999571095519913e-06, "loss": 0.4121917724609375, "step": 1245 }, { "epoch": 0.010808380385815946, "grad_norm": 105.0256909676317, "learning_rate": 5.999567643669224e-06, "loss": 0.24161376953125, "step": 1250 }, { "epoch": 0.01085161390735921, "grad_norm": 4.933614262057416, "learning_rate": 5.999564177984793e-06, "loss": 0.4084381103515625, "step": 1255 }, { "epoch": 0.010894847428902473, "grad_norm": 42.47078758186598, "learning_rate": 5.999560698466638e-06, "loss": 0.3037139892578125, "step": 1260 }, { "epoch": 0.010938080950445737, "grad_norm": 22.676071434019466, "learning_rate": 5.999557205114773e-06, "loss": 0.155126953125, "step": 1265 }, { "epoch": 0.010981314471989, "grad_norm": 12.93025308932436, "learning_rate": 5.999553697929216e-06, "loss": 0.136474609375, "step": 1270 }, { "epoch": 0.011024547993532266, "grad_norm": 88.38540384253825, "learning_rate": 5.999550176909981e-06, "loss": 0.306298828125, "step": 1275 }, { "epoch": 0.01106778151507553, "grad_norm": 35.247314053869005, "learning_rate": 5.999546642057087e-06, "loss": 0.19305419921875, "step": 1280 }, { "epoch": 0.011111015036618793, "grad_norm": 2.825338997381401, "learning_rate": 5.999543093370549e-06, "loss": 0.381365966796875, "step": 1285 }, { "epoch": 0.011154248558162057, "grad_norm": 36.909515291655325, "learning_rate": 5.999539530850383e-06, "loss": 0.811151123046875, "step": 1290 }, { "epoch": 0.01119748207970532, "grad_norm": 12.032874838542854, "learning_rate": 5.999535954496605e-06, "loss": 0.1037689208984375, "step": 1295 }, { "epoch": 0.011240715601248584, "grad_norm": 51.657222412012196, "learning_rate": 5.999532364309233e-06, "loss": 0.20723876953125, "step": 1300 }, { "epoch": 0.011283949122791848, "grad_norm": 12.078982188905123, "learning_rate": 5.999528760288283e-06, "loss": 0.4170379638671875, "step": 1305 }, { "epoch": 0.011327182644335111, "grad_norm": 25.519914793630104, "learning_rate": 5.9995251424337705e-06, "loss": 0.351129150390625, "step": 1310 }, { "epoch": 0.011370416165878375, "grad_norm": 7.840290709011876, "learning_rate": 5.999521510745714e-06, "loss": 0.384619140625, "step": 1315 }, { "epoch": 0.011413649687421638, "grad_norm": 20.05810237777458, "learning_rate": 5.999517865224129e-06, "loss": 0.705126953125, "step": 1320 }, { "epoch": 0.011456883208964904, "grad_norm": 48.40040019562467, "learning_rate": 5.999514205869033e-06, "loss": 0.44560546875, "step": 1325 }, { "epoch": 0.011500116730508167, "grad_norm": 28.309230688075115, "learning_rate": 5.9995105326804415e-06, "loss": 0.206243896484375, "step": 1330 }, { "epoch": 0.011543350252051431, "grad_norm": 13.659472569059936, "learning_rate": 5.999506845658373e-06, "loss": 0.11526336669921874, "step": 1335 }, { "epoch": 0.011586583773594695, "grad_norm": 56.89671115002555, "learning_rate": 5.999503144802844e-06, "loss": 0.5080780029296875, "step": 1340 }, { "epoch": 0.011629817295137958, "grad_norm": 35.22261332115719, "learning_rate": 5.999499430113872e-06, "loss": 0.34613189697265623, "step": 1345 }, { "epoch": 0.011673050816681222, "grad_norm": 24.32819250226551, "learning_rate": 5.999495701591473e-06, "loss": 0.21846389770507812, "step": 1350 }, { "epoch": 0.011716284338224485, "grad_norm": 31.2321954749799, "learning_rate": 5.999491959235665e-06, "loss": 0.2014404296875, "step": 1355 }, { "epoch": 0.011759517859767749, "grad_norm": 68.0263778951928, "learning_rate": 5.999488203046465e-06, "loss": 0.3137451171875, "step": 1360 }, { "epoch": 0.011802751381311012, "grad_norm": 0.6227645172110449, "learning_rate": 5.999484433023891e-06, "loss": 0.25821533203125, "step": 1365 }, { "epoch": 0.011845984902854278, "grad_norm": 71.20676324942261, "learning_rate": 5.999480649167959e-06, "loss": 0.4734375, "step": 1370 }, { "epoch": 0.011889218424397541, "grad_norm": 46.183961318065926, "learning_rate": 5.999476851478689e-06, "loss": 0.382086181640625, "step": 1375 }, { "epoch": 0.011932451945940805, "grad_norm": 5.211399232143838, "learning_rate": 5.999473039956095e-06, "loss": 0.08114089965820312, "step": 1380 }, { "epoch": 0.011975685467484069, "grad_norm": 10.172037890255705, "learning_rate": 5.999469214600197e-06, "loss": 0.15668487548828125, "step": 1385 }, { "epoch": 0.012018918989027332, "grad_norm": 45.40136800270573, "learning_rate": 5.999465375411012e-06, "loss": 0.429058837890625, "step": 1390 }, { "epoch": 0.012062152510570596, "grad_norm": 20.200383536425857, "learning_rate": 5.999461522388557e-06, "loss": 0.5821792602539062, "step": 1395 }, { "epoch": 0.01210538603211386, "grad_norm": 43.148643126756404, "learning_rate": 5.9994576555328525e-06, "loss": 0.156640625, "step": 1400 }, { "epoch": 0.012148619553657123, "grad_norm": 59.79055074526543, "learning_rate": 5.999453774843913e-06, "loss": 0.547137451171875, "step": 1405 }, { "epoch": 0.012191853075200387, "grad_norm": 25.872721083744807, "learning_rate": 5.9994498803217575e-06, "loss": 0.2706451416015625, "step": 1410 }, { "epoch": 0.012235086596743652, "grad_norm": 29.021814496554757, "learning_rate": 5.999445971966404e-06, "loss": 0.095794677734375, "step": 1415 }, { "epoch": 0.012278320118286916, "grad_norm": 23.323128297290822, "learning_rate": 5.999442049777871e-06, "loss": 0.0719268798828125, "step": 1420 }, { "epoch": 0.01232155363983018, "grad_norm": 10.011526137223253, "learning_rate": 5.999438113756177e-06, "loss": 0.13036041259765624, "step": 1425 }, { "epoch": 0.012364787161373443, "grad_norm": 13.973036116382996, "learning_rate": 5.999434163901339e-06, "loss": 0.15806884765625, "step": 1430 }, { "epoch": 0.012408020682916706, "grad_norm": 46.56454081961037, "learning_rate": 5.999430200213376e-06, "loss": 0.3862548828125, "step": 1435 }, { "epoch": 0.01245125420445997, "grad_norm": 30.434816648433998, "learning_rate": 5.999426222692305e-06, "loss": 0.16959075927734374, "step": 1440 }, { "epoch": 0.012494487726003234, "grad_norm": 198.73846439017575, "learning_rate": 5.999422231338145e-06, "loss": 0.65189208984375, "step": 1445 }, { "epoch": 0.012537721247546497, "grad_norm": 7.494860301019546, "learning_rate": 5.999418226150916e-06, "loss": 0.271337890625, "step": 1450 }, { "epoch": 0.01258095476908976, "grad_norm": 26.112408709431822, "learning_rate": 5.999414207130635e-06, "loss": 0.12819671630859375, "step": 1455 }, { "epoch": 0.012624188290633026, "grad_norm": 60.82843291134381, "learning_rate": 5.99941017427732e-06, "loss": 0.5763145446777344, "step": 1460 }, { "epoch": 0.01266742181217629, "grad_norm": 31.719738946027874, "learning_rate": 5.9994061275909905e-06, "loss": 0.470343017578125, "step": 1465 }, { "epoch": 0.012710655333719553, "grad_norm": 15.191707972758195, "learning_rate": 5.999402067071666e-06, "loss": 0.2991943359375, "step": 1470 }, { "epoch": 0.012753888855262817, "grad_norm": 37.946862951208225, "learning_rate": 5.999397992719363e-06, "loss": 0.4806793212890625, "step": 1475 }, { "epoch": 0.01279712237680608, "grad_norm": 11.877547208534995, "learning_rate": 5.999393904534101e-06, "loss": 0.4177001953125, "step": 1480 }, { "epoch": 0.012840355898349344, "grad_norm": 49.39182082277611, "learning_rate": 5.999389802515901e-06, "loss": 0.238262939453125, "step": 1485 }, { "epoch": 0.012883589419892608, "grad_norm": 1.584622361009868, "learning_rate": 5.99938568666478e-06, "loss": 0.093353271484375, "step": 1490 }, { "epoch": 0.012926822941435871, "grad_norm": 71.47032228988076, "learning_rate": 5.999381556980757e-06, "loss": 0.241705322265625, "step": 1495 }, { "epoch": 0.012970056462979135, "grad_norm": 64.08257036496202, "learning_rate": 5.999377413463851e-06, "loss": 0.8442138671875, "step": 1500 }, { "epoch": 0.013013289984522398, "grad_norm": 12.201088378734607, "learning_rate": 5.999373256114081e-06, "loss": 0.23967056274414061, "step": 1505 }, { "epoch": 0.013056523506065664, "grad_norm": 18.07564651611917, "learning_rate": 5.999369084931467e-06, "loss": 0.21085205078125, "step": 1510 }, { "epoch": 0.013099757027608927, "grad_norm": 26.57657326282244, "learning_rate": 5.9993648999160275e-06, "loss": 0.604888916015625, "step": 1515 }, { "epoch": 0.013142990549152191, "grad_norm": 2.4276366194038332, "learning_rate": 5.999360701067782e-06, "loss": 0.45150146484375, "step": 1520 }, { "epoch": 0.013186224070695455, "grad_norm": 28.240685122343738, "learning_rate": 5.999356488386751e-06, "loss": 0.3083740234375, "step": 1525 }, { "epoch": 0.013229457592238718, "grad_norm": 0.2649914211248649, "learning_rate": 5.999352261872952e-06, "loss": 0.27402801513671876, "step": 1530 }, { "epoch": 0.013272691113781982, "grad_norm": 20.437262458537976, "learning_rate": 5.9993480215264055e-06, "loss": 0.50859375, "step": 1535 }, { "epoch": 0.013315924635325245, "grad_norm": 6.443658357164249, "learning_rate": 5.999343767347131e-06, "loss": 0.238726806640625, "step": 1540 }, { "epoch": 0.013359158156868509, "grad_norm": 97.1397286587378, "learning_rate": 5.999339499335149e-06, "loss": 0.3492767333984375, "step": 1545 }, { "epoch": 0.013402391678411773, "grad_norm": 57.908205657085865, "learning_rate": 5.999335217490477e-06, "loss": 0.302685546875, "step": 1550 }, { "epoch": 0.013445625199955038, "grad_norm": 1.7745527132130343, "learning_rate": 5.9993309218131365e-06, "loss": 0.42156982421875, "step": 1555 }, { "epoch": 0.013488858721498301, "grad_norm": 4.899025695628489, "learning_rate": 5.999326612303147e-06, "loss": 0.15135498046875, "step": 1560 }, { "epoch": 0.013532092243041565, "grad_norm": 19.408069257137747, "learning_rate": 5.999322288960528e-06, "loss": 0.19940185546875, "step": 1565 }, { "epoch": 0.013575325764584829, "grad_norm": 37.308392478212916, "learning_rate": 5.999317951785299e-06, "loss": 0.53533935546875, "step": 1570 }, { "epoch": 0.013618559286128092, "grad_norm": 136.34557198871343, "learning_rate": 5.999313600777482e-06, "loss": 0.4788787841796875, "step": 1575 }, { "epoch": 0.013661792807671356, "grad_norm": 12.991813517459219, "learning_rate": 5.999309235937095e-06, "loss": 0.21937255859375, "step": 1580 }, { "epoch": 0.01370502632921462, "grad_norm": 54.271894137363226, "learning_rate": 5.999304857264159e-06, "loss": 0.3357666015625, "step": 1585 }, { "epoch": 0.013748259850757883, "grad_norm": 7.948112658291266, "learning_rate": 5.999300464758693e-06, "loss": 0.4671630859375, "step": 1590 }, { "epoch": 0.013791493372301147, "grad_norm": 4.819279479348753, "learning_rate": 5.999296058420719e-06, "loss": 0.45455780029296877, "step": 1595 }, { "epoch": 0.013834726893844412, "grad_norm": 25.06533908615039, "learning_rate": 5.9992916382502566e-06, "loss": 0.258251953125, "step": 1600 }, { "epoch": 0.013877960415387676, "grad_norm": 14.441142254239793, "learning_rate": 5.999287204247326e-06, "loss": 0.13835906982421875, "step": 1605 }, { "epoch": 0.01392119393693094, "grad_norm": 12.983255109550702, "learning_rate": 5.9992827564119486e-06, "loss": 0.33604888916015624, "step": 1610 }, { "epoch": 0.013964427458474203, "grad_norm": 21.901279682598197, "learning_rate": 5.9992782947441436e-06, "loss": 0.27433929443359373, "step": 1615 }, { "epoch": 0.014007660980017466, "grad_norm": 8.316977554305007, "learning_rate": 5.999273819243932e-06, "loss": 0.17952423095703124, "step": 1620 }, { "epoch": 0.01405089450156073, "grad_norm": 3.419964267052973, "learning_rate": 5.999269329911335e-06, "loss": 0.1079742431640625, "step": 1625 }, { "epoch": 0.014094128023103994, "grad_norm": 12.30457801192187, "learning_rate": 5.999264826746373e-06, "loss": 0.2862525939941406, "step": 1630 }, { "epoch": 0.014137361544647257, "grad_norm": 8.927715849148413, "learning_rate": 5.999260309749065e-06, "loss": 0.2558837890625, "step": 1635 }, { "epoch": 0.01418059506619052, "grad_norm": 21.58665168509805, "learning_rate": 5.999255778919435e-06, "loss": 0.3130218505859375, "step": 1640 }, { "epoch": 0.014223828587733786, "grad_norm": 5.422588667932342, "learning_rate": 5.9992512342575025e-06, "loss": 0.19470367431640626, "step": 1645 }, { "epoch": 0.01426706210927705, "grad_norm": 5.8677780481254995, "learning_rate": 5.999246675763288e-06, "loss": 0.065972900390625, "step": 1650 }, { "epoch": 0.014310295630820313, "grad_norm": 23.34715500259292, "learning_rate": 5.999242103436812e-06, "loss": 0.259100341796875, "step": 1655 }, { "epoch": 0.014353529152363577, "grad_norm": 74.6023519486189, "learning_rate": 5.9992375172780976e-06, "loss": 0.363385009765625, "step": 1660 }, { "epoch": 0.01439676267390684, "grad_norm": 27.39536300968279, "learning_rate": 5.9992329172871645e-06, "loss": 0.26944580078125, "step": 1665 }, { "epoch": 0.014439996195450104, "grad_norm": 3.048722344783873, "learning_rate": 5.999228303464034e-06, "loss": 0.2505584716796875, "step": 1670 }, { "epoch": 0.014483229716993368, "grad_norm": 67.93374376130014, "learning_rate": 5.999223675808728e-06, "loss": 0.353765869140625, "step": 1675 }, { "epoch": 0.014526463238536631, "grad_norm": 5.13608907135736, "learning_rate": 5.999219034321266e-06, "loss": 0.401043701171875, "step": 1680 }, { "epoch": 0.014569696760079895, "grad_norm": 71.60251279429755, "learning_rate": 5.999214379001672e-06, "loss": 0.1449981689453125, "step": 1685 }, { "epoch": 0.014612930281623158, "grad_norm": 6.854786211441044, "learning_rate": 5.999209709849967e-06, "loss": 0.228125, "step": 1690 }, { "epoch": 0.014656163803166424, "grad_norm": 46.195665203243195, "learning_rate": 5.999205026866171e-06, "loss": 0.1592529296875, "step": 1695 }, { "epoch": 0.014699397324709687, "grad_norm": 45.89086859179107, "learning_rate": 5.999200330050306e-06, "loss": 0.47275390625, "step": 1700 }, { "epoch": 0.014742630846252951, "grad_norm": 4.9791201730107595, "learning_rate": 5.999195619402394e-06, "loss": 0.61473388671875, "step": 1705 }, { "epoch": 0.014785864367796215, "grad_norm": 56.86143857116347, "learning_rate": 5.999190894922457e-06, "loss": 0.1508258819580078, "step": 1710 }, { "epoch": 0.014829097889339478, "grad_norm": 131.82449215459474, "learning_rate": 5.999186156610518e-06, "loss": 0.24370880126953126, "step": 1715 }, { "epoch": 0.014872331410882742, "grad_norm": 38.3555778820072, "learning_rate": 5.9991814044665965e-06, "loss": 0.376373291015625, "step": 1720 }, { "epoch": 0.014915564932426005, "grad_norm": 15.005277584559494, "learning_rate": 5.999176638490715e-06, "loss": 0.210748291015625, "step": 1725 }, { "epoch": 0.014958798453969269, "grad_norm": 22.772169485578512, "learning_rate": 5.999171858682896e-06, "loss": 0.6682510375976562, "step": 1730 }, { "epoch": 0.015002031975512533, "grad_norm": 12.679661452313745, "learning_rate": 5.999167065043162e-06, "loss": 0.20877685546875, "step": 1735 }, { "epoch": 0.015045265497055798, "grad_norm": 12.790795151180843, "learning_rate": 5.999162257571534e-06, "loss": 0.1003265380859375, "step": 1740 }, { "epoch": 0.015088499018599062, "grad_norm": 41.331833518563556, "learning_rate": 5.9991574362680345e-06, "loss": 0.43896484375, "step": 1745 }, { "epoch": 0.015131732540142325, "grad_norm": 31.22779044770662, "learning_rate": 5.9991526011326864e-06, "loss": 0.133795166015625, "step": 1750 }, { "epoch": 0.015174966061685589, "grad_norm": 31.12329299001148, "learning_rate": 5.999147752165511e-06, "loss": 0.20987892150878906, "step": 1755 }, { "epoch": 0.015218199583228852, "grad_norm": 2.7964922742139273, "learning_rate": 5.999142889366531e-06, "loss": 0.14477920532226562, "step": 1760 }, { "epoch": 0.015261433104772116, "grad_norm": 0.9912607070141665, "learning_rate": 5.99913801273577e-06, "loss": 0.28494415283203123, "step": 1765 }, { "epoch": 0.01530466662631538, "grad_norm": 4.947096169118334, "learning_rate": 5.999133122273249e-06, "loss": 0.08752751350402832, "step": 1770 }, { "epoch": 0.015347900147858643, "grad_norm": 24.962497025994352, "learning_rate": 5.999128217978991e-06, "loss": 0.140960693359375, "step": 1775 }, { "epoch": 0.015391133669401907, "grad_norm": 2.9721810438442318, "learning_rate": 5.999123299853019e-06, "loss": 0.1611419677734375, "step": 1780 }, { "epoch": 0.015434367190945172, "grad_norm": 10.960363275105408, "learning_rate": 5.999118367895355e-06, "loss": 0.13330078125, "step": 1785 }, { "epoch": 0.015477600712488436, "grad_norm": 26.191300792739614, "learning_rate": 5.999113422106022e-06, "loss": 0.1732421875, "step": 1790 }, { "epoch": 0.0155208342340317, "grad_norm": 33.49451249748265, "learning_rate": 5.999108462485043e-06, "loss": 0.14404296875, "step": 1795 }, { "epoch": 0.015564067755574963, "grad_norm": 21.14786018269144, "learning_rate": 5.999103489032441e-06, "loss": 0.0987823486328125, "step": 1800 }, { "epoch": 0.015607301277118226, "grad_norm": 5.277079863300792, "learning_rate": 5.999098501748239e-06, "loss": 0.3642822265625, "step": 1805 }, { "epoch": 0.015650534798661492, "grad_norm": 69.38959531705747, "learning_rate": 5.999093500632458e-06, "loss": 0.28946762084960936, "step": 1810 }, { "epoch": 0.015693768320204755, "grad_norm": 18.777953997084126, "learning_rate": 5.999088485685124e-06, "loss": 0.2371368408203125, "step": 1815 }, { "epoch": 0.01573700184174802, "grad_norm": 6.987756300972136, "learning_rate": 5.999083456906259e-06, "loss": 0.3241855621337891, "step": 1820 }, { "epoch": 0.015780235363291283, "grad_norm": 24.33090315676269, "learning_rate": 5.999078414295886e-06, "loss": 0.06328125, "step": 1825 }, { "epoch": 0.015823468884834546, "grad_norm": 35.05030236932646, "learning_rate": 5.999073357854028e-06, "loss": 0.6345703125, "step": 1830 }, { "epoch": 0.01586670240637781, "grad_norm": 2.0153388207900016, "learning_rate": 5.999068287580709e-06, "loss": 0.24466552734375, "step": 1835 }, { "epoch": 0.015909935927921073, "grad_norm": 5.794174098225368, "learning_rate": 5.999063203475952e-06, "loss": 0.30667724609375, "step": 1840 }, { "epoch": 0.015953169449464337, "grad_norm": 5.011081796635994, "learning_rate": 5.999058105539781e-06, "loss": 0.09263916015625, "step": 1845 }, { "epoch": 0.0159964029710076, "grad_norm": 23.73950251881533, "learning_rate": 5.999052993772219e-06, "loss": 0.22084197998046876, "step": 1850 }, { "epoch": 0.016039636492550864, "grad_norm": 6.22754510878233, "learning_rate": 5.999047868173289e-06, "loss": 0.4479377746582031, "step": 1855 }, { "epoch": 0.016082870014094128, "grad_norm": 35.5942393607147, "learning_rate": 5.999042728743014e-06, "loss": 0.10564041137695312, "step": 1860 }, { "epoch": 0.01612610353563739, "grad_norm": 57.03514143169735, "learning_rate": 5.9990375754814215e-06, "loss": 0.4078765869140625, "step": 1865 }, { "epoch": 0.016169337057180655, "grad_norm": 20.62340697839167, "learning_rate": 5.999032408388532e-06, "loss": 0.21805801391601562, "step": 1870 }, { "epoch": 0.01621257057872392, "grad_norm": 3.134763167284692, "learning_rate": 5.999027227464368e-06, "loss": 0.28462982177734375, "step": 1875 }, { "epoch": 0.016255804100267182, "grad_norm": 25.403968989308364, "learning_rate": 5.999022032708958e-06, "loss": 0.106634521484375, "step": 1880 }, { "epoch": 0.016299037621810446, "grad_norm": 5.0596507441982626, "learning_rate": 5.999016824122322e-06, "loss": 0.194195556640625, "step": 1885 }, { "epoch": 0.01634227114335371, "grad_norm": 5.213536493818506, "learning_rate": 5.999011601704486e-06, "loss": 0.21570281982421874, "step": 1890 }, { "epoch": 0.016385504664896973, "grad_norm": 2.4620337544331226, "learning_rate": 5.999006365455474e-06, "loss": 0.2059539794921875, "step": 1895 }, { "epoch": 0.01642873818644024, "grad_norm": 68.62748031602169, "learning_rate": 5.999001115375308e-06, "loss": 0.182666015625, "step": 1900 }, { "epoch": 0.016471971707983504, "grad_norm": 1.4632640182149481, "learning_rate": 5.998995851464015e-06, "loss": 0.26763916015625, "step": 1905 }, { "epoch": 0.016515205229526767, "grad_norm": 2.966705729851531, "learning_rate": 5.998990573721619e-06, "loss": 0.2338470458984375, "step": 1910 }, { "epoch": 0.01655843875107003, "grad_norm": 25.91074711740251, "learning_rate": 5.998985282148142e-06, "loss": 0.1747802734375, "step": 1915 }, { "epoch": 0.016601672272613294, "grad_norm": 7.901563820151338, "learning_rate": 5.99897997674361e-06, "loss": 0.12189788818359375, "step": 1920 }, { "epoch": 0.016644905794156558, "grad_norm": 42.299459200449164, "learning_rate": 5.998974657508047e-06, "loss": 0.459716796875, "step": 1925 }, { "epoch": 0.01668813931569982, "grad_norm": 3.8889493990839146, "learning_rate": 5.998969324441479e-06, "loss": 0.12099761962890625, "step": 1930 }, { "epoch": 0.016731372837243085, "grad_norm": 4.801346358127452, "learning_rate": 5.998963977543929e-06, "loss": 0.15751953125, "step": 1935 }, { "epoch": 0.01677460635878635, "grad_norm": 74.55486743913696, "learning_rate": 5.998958616815422e-06, "loss": 0.3522125244140625, "step": 1940 }, { "epoch": 0.016817839880329612, "grad_norm": 13.071645626598812, "learning_rate": 5.998953242255983e-06, "loss": 0.4583984375, "step": 1945 }, { "epoch": 0.016861073401872876, "grad_norm": 61.35117552428944, "learning_rate": 5.9989478538656365e-06, "loss": 0.333538818359375, "step": 1950 }, { "epoch": 0.01690430692341614, "grad_norm": 9.377863896463753, "learning_rate": 5.998942451644408e-06, "loss": 0.142535400390625, "step": 1955 }, { "epoch": 0.016947540444959403, "grad_norm": 12.287238901039498, "learning_rate": 5.998937035592321e-06, "loss": 0.2520538330078125, "step": 1960 }, { "epoch": 0.016990773966502667, "grad_norm": 29.911927356302918, "learning_rate": 5.998931605709402e-06, "loss": 0.18187713623046875, "step": 1965 }, { "epoch": 0.01703400748804593, "grad_norm": 8.360146555998506, "learning_rate": 5.998926161995675e-06, "loss": 0.2646484375, "step": 1970 }, { "epoch": 0.017077241009589194, "grad_norm": 15.978095760188113, "learning_rate": 5.998920704451166e-06, "loss": 0.108587646484375, "step": 1975 }, { "epoch": 0.017120474531132458, "grad_norm": 11.449448187921563, "learning_rate": 5.998915233075899e-06, "loss": 0.158758544921875, "step": 1980 }, { "epoch": 0.01716370805267572, "grad_norm": 27.188977517583094, "learning_rate": 5.9989097478699005e-06, "loss": 0.27326507568359376, "step": 1985 }, { "epoch": 0.017206941574218985, "grad_norm": 45.58464269312311, "learning_rate": 5.998904248833195e-06, "loss": 0.214520263671875, "step": 1990 }, { "epoch": 0.017250175095762252, "grad_norm": 36.97020403999267, "learning_rate": 5.998898735965809e-06, "loss": 0.25567626953125, "step": 1995 }, { "epoch": 0.017293408617305515, "grad_norm": 14.416812631810012, "learning_rate": 5.9988932092677654e-06, "loss": 0.32529296875, "step": 2000 }, { "epoch": 0.01733664213884878, "grad_norm": 13.613209390229633, "learning_rate": 5.998887668739092e-06, "loss": 0.3499542236328125, "step": 2005 }, { "epoch": 0.017379875660392043, "grad_norm": 31.092349374325142, "learning_rate": 5.998882114379815e-06, "loss": 0.17557373046875, "step": 2010 }, { "epoch": 0.017423109181935306, "grad_norm": 6.816661072143682, "learning_rate": 5.998876546189957e-06, "loss": 0.10596923828125, "step": 2015 }, { "epoch": 0.01746634270347857, "grad_norm": 66.1128088676067, "learning_rate": 5.998870964169546e-06, "loss": 0.2763053894042969, "step": 2020 }, { "epoch": 0.017509576225021833, "grad_norm": 38.74481904161518, "learning_rate": 5.998865368318606e-06, "loss": 0.207183837890625, "step": 2025 }, { "epoch": 0.017552809746565097, "grad_norm": 3.3470689958668625, "learning_rate": 5.9988597586371655e-06, "loss": 0.358245849609375, "step": 2030 }, { "epoch": 0.01759604326810836, "grad_norm": 14.187902664170913, "learning_rate": 5.998854135125248e-06, "loss": 0.106488037109375, "step": 2035 }, { "epoch": 0.017639276789651624, "grad_norm": 1.9339702246256614, "learning_rate": 5.9988484977828805e-06, "loss": 0.40565185546875, "step": 2040 }, { "epoch": 0.017682510311194888, "grad_norm": 44.15329148955497, "learning_rate": 5.998842846610088e-06, "loss": 0.4584442138671875, "step": 2045 }, { "epoch": 0.01772574383273815, "grad_norm": 1.1141645801187603, "learning_rate": 5.998837181606898e-06, "loss": 0.2024932861328125, "step": 2050 }, { "epoch": 0.017768977354281415, "grad_norm": 1.013672845539717, "learning_rate": 5.998831502773335e-06, "loss": 0.15552215576171874, "step": 2055 }, { "epoch": 0.01781221087582468, "grad_norm": 11.596395553109538, "learning_rate": 5.998825810109428e-06, "loss": 0.28316497802734375, "step": 2060 }, { "epoch": 0.017855444397367942, "grad_norm": 78.64884578459387, "learning_rate": 5.9988201036152e-06, "loss": 0.355133056640625, "step": 2065 }, { "epoch": 0.017898677918911206, "grad_norm": 83.00490521104443, "learning_rate": 5.998814383290679e-06, "loss": 0.269061279296875, "step": 2070 }, { "epoch": 0.01794191144045447, "grad_norm": 1.8775004472519756, "learning_rate": 5.998808649135891e-06, "loss": 0.204351806640625, "step": 2075 }, { "epoch": 0.017985144961997733, "grad_norm": 22.157916594789686, "learning_rate": 5.998802901150862e-06, "loss": 0.207025146484375, "step": 2080 }, { "epoch": 0.018028378483541, "grad_norm": 19.81725714552153, "learning_rate": 5.998797139335619e-06, "loss": 0.557080078125, "step": 2085 }, { "epoch": 0.018071612005084264, "grad_norm": 2.165304538284595, "learning_rate": 5.998791363690189e-06, "loss": 0.5327606201171875, "step": 2090 }, { "epoch": 0.018114845526627527, "grad_norm": 5.085143482676793, "learning_rate": 5.998785574214598e-06, "loss": 0.156707763671875, "step": 2095 }, { "epoch": 0.01815807904817079, "grad_norm": 4.431099485474278, "learning_rate": 5.998779770908873e-06, "loss": 0.18433685302734376, "step": 2100 }, { "epoch": 0.018201312569714054, "grad_norm": 0.9797658032212556, "learning_rate": 5.998773953773041e-06, "loss": 0.17810516357421874, "step": 2105 }, { "epoch": 0.018244546091257318, "grad_norm": 6.707750967117144, "learning_rate": 5.998768122807129e-06, "loss": 0.16854782104492189, "step": 2110 }, { "epoch": 0.01828777961280058, "grad_norm": 2.3072983604404413, "learning_rate": 5.998762278011162e-06, "loss": 0.194140625, "step": 2115 }, { "epoch": 0.018331013134343845, "grad_norm": 10.935574523561115, "learning_rate": 5.998756419385168e-06, "loss": 0.31851806640625, "step": 2120 }, { "epoch": 0.01837424665588711, "grad_norm": 37.450091969769645, "learning_rate": 5.9987505469291765e-06, "loss": 0.2838623046875, "step": 2125 }, { "epoch": 0.018417480177430372, "grad_norm": 57.12321270838071, "learning_rate": 5.998744660643211e-06, "loss": 0.1112060546875, "step": 2130 }, { "epoch": 0.018460713698973636, "grad_norm": 18.157965830717092, "learning_rate": 5.9987387605273e-06, "loss": 0.5282470703125, "step": 2135 }, { "epoch": 0.0185039472205169, "grad_norm": 5.482891381327876, "learning_rate": 5.998732846581472e-06, "loss": 0.209771728515625, "step": 2140 }, { "epoch": 0.018547180742060163, "grad_norm": 24.88083272384327, "learning_rate": 5.998726918805752e-06, "loss": 0.2626007080078125, "step": 2145 }, { "epoch": 0.018590414263603427, "grad_norm": 29.187835960703385, "learning_rate": 5.998720977200168e-06, "loss": 0.264825439453125, "step": 2150 }, { "epoch": 0.01863364778514669, "grad_norm": 22.78371093213365, "learning_rate": 5.998715021764749e-06, "loss": 0.094744873046875, "step": 2155 }, { "epoch": 0.018676881306689954, "grad_norm": 38.32772017286862, "learning_rate": 5.99870905249952e-06, "loss": 0.327978515625, "step": 2160 }, { "epoch": 0.018720114828233218, "grad_norm": 18.92436375685499, "learning_rate": 5.9987030694045115e-06, "loss": 0.464910888671875, "step": 2165 }, { "epoch": 0.01876334834977648, "grad_norm": 41.32615827645853, "learning_rate": 5.998697072479748e-06, "loss": 0.1587921142578125, "step": 2170 }, { "epoch": 0.018806581871319745, "grad_norm": 17.229929223089087, "learning_rate": 5.99869106172526e-06, "loss": 0.5612357139587403, "step": 2175 }, { "epoch": 0.018849815392863012, "grad_norm": 119.1158538501811, "learning_rate": 5.998685037141072e-06, "loss": 0.54295654296875, "step": 2180 }, { "epoch": 0.018893048914406275, "grad_norm": 1.3782306047149762, "learning_rate": 5.998678998727215e-06, "loss": 0.495587158203125, "step": 2185 }, { "epoch": 0.01893628243594954, "grad_norm": 54.99120676194636, "learning_rate": 5.998672946483715e-06, "loss": 0.341229248046875, "step": 2190 }, { "epoch": 0.018979515957492803, "grad_norm": 95.32308723645251, "learning_rate": 5.9986668804106e-06, "loss": 0.3261932373046875, "step": 2195 }, { "epoch": 0.019022749479036066, "grad_norm": 9.59054502676677, "learning_rate": 5.998660800507898e-06, "loss": 0.12318763732910157, "step": 2200 }, { "epoch": 0.01906598300057933, "grad_norm": 1.5570070588773688, "learning_rate": 5.998654706775638e-06, "loss": 0.11727981567382813, "step": 2205 }, { "epoch": 0.019109216522122593, "grad_norm": 26.009106044427963, "learning_rate": 5.998648599213847e-06, "loss": 0.4603118896484375, "step": 2210 }, { "epoch": 0.019152450043665857, "grad_norm": 46.26931564002567, "learning_rate": 5.998642477822554e-06, "loss": 0.3288421630859375, "step": 2215 }, { "epoch": 0.01919568356520912, "grad_norm": 7.949564783907723, "learning_rate": 5.998636342601788e-06, "loss": 0.16673431396484376, "step": 2220 }, { "epoch": 0.019238917086752384, "grad_norm": 34.590496976408716, "learning_rate": 5.9986301935515745e-06, "loss": 0.3246002197265625, "step": 2225 }, { "epoch": 0.019282150608295648, "grad_norm": 42.98653328923751, "learning_rate": 5.998624030671944e-06, "loss": 0.303302001953125, "step": 2230 }, { "epoch": 0.01932538412983891, "grad_norm": 4.352570786135551, "learning_rate": 5.998617853962925e-06, "loss": 0.14111328125, "step": 2235 }, { "epoch": 0.019368617651382175, "grad_norm": 13.08065264324717, "learning_rate": 5.998611663424544e-06, "loss": 0.19739990234375, "step": 2240 }, { "epoch": 0.01941185117292544, "grad_norm": 16.230850242630975, "learning_rate": 5.9986054590568326e-06, "loss": 0.509991455078125, "step": 2245 }, { "epoch": 0.019455084694468702, "grad_norm": 28.22010100122366, "learning_rate": 5.998599240859817e-06, "loss": 0.3948699951171875, "step": 2250 }, { "epoch": 0.019498318216011966, "grad_norm": 8.079849232166206, "learning_rate": 5.998593008833527e-06, "loss": 0.183380126953125, "step": 2255 }, { "epoch": 0.01954155173755523, "grad_norm": 59.619322725171365, "learning_rate": 5.99858676297799e-06, "loss": 0.27463531494140625, "step": 2260 }, { "epoch": 0.019584785259098493, "grad_norm": 5.653709346733877, "learning_rate": 5.998580503293237e-06, "loss": 0.102459716796875, "step": 2265 }, { "epoch": 0.01962801878064176, "grad_norm": 23.56926356423226, "learning_rate": 5.998574229779295e-06, "loss": 0.20005416870117188, "step": 2270 }, { "epoch": 0.019671252302185024, "grad_norm": 15.304890169713657, "learning_rate": 5.998567942436193e-06, "loss": 0.190576171875, "step": 2275 }, { "epoch": 0.019714485823728287, "grad_norm": 51.57058828999472, "learning_rate": 5.9985616412639614e-06, "loss": 0.2981903076171875, "step": 2280 }, { "epoch": 0.01975771934527155, "grad_norm": 9.499988006311836, "learning_rate": 5.998555326262628e-06, "loss": 0.16576080322265624, "step": 2285 }, { "epoch": 0.019800952866814815, "grad_norm": 11.056508598349795, "learning_rate": 5.998548997432223e-06, "loss": 0.13953857421875, "step": 2290 }, { "epoch": 0.019844186388358078, "grad_norm": 5.173917743925908, "learning_rate": 5.998542654772775e-06, "loss": 0.1368133544921875, "step": 2295 }, { "epoch": 0.01988741990990134, "grad_norm": 17.41658515144147, "learning_rate": 5.998536298284313e-06, "loss": 0.422552490234375, "step": 2300 }, { "epoch": 0.019930653431444605, "grad_norm": 11.29413003563021, "learning_rate": 5.998529927966867e-06, "loss": 0.16658935546875, "step": 2305 }, { "epoch": 0.01997388695298787, "grad_norm": 11.302952703870117, "learning_rate": 5.9985235438204655e-06, "loss": 0.20975341796875, "step": 2310 }, { "epoch": 0.020017120474531132, "grad_norm": 11.386947322399136, "learning_rate": 5.998517145845138e-06, "loss": 0.2214569091796875, "step": 2315 }, { "epoch": 0.020060353996074396, "grad_norm": 3.0958309985100043, "learning_rate": 5.9985107340409155e-06, "loss": 0.222125244140625, "step": 2320 }, { "epoch": 0.02010358751761766, "grad_norm": 4.180842198511598, "learning_rate": 5.9985043084078245e-06, "loss": 0.1969970703125, "step": 2325 }, { "epoch": 0.020146821039160923, "grad_norm": 24.787498325511958, "learning_rate": 5.998497868945898e-06, "loss": 0.22794189453125, "step": 2330 }, { "epoch": 0.020190054560704187, "grad_norm": 55.22182645196355, "learning_rate": 5.998491415655165e-06, "loss": 0.51513671875, "step": 2335 }, { "epoch": 0.02023328808224745, "grad_norm": 20.309555916905794, "learning_rate": 5.998484948535653e-06, "loss": 0.4919921875, "step": 2340 }, { "epoch": 0.020276521603790714, "grad_norm": 6.292020531537329, "learning_rate": 5.998478467587395e-06, "loss": 0.11231689453125, "step": 2345 }, { "epoch": 0.020319755125333978, "grad_norm": 2.653688147569982, "learning_rate": 5.998471972810417e-06, "loss": 0.118292236328125, "step": 2350 }, { "epoch": 0.02036298864687724, "grad_norm": 0.690335954926542, "learning_rate": 5.998465464204754e-06, "loss": 0.1749725341796875, "step": 2355 }, { "epoch": 0.020406222168420505, "grad_norm": 1.9466003943414056, "learning_rate": 5.998458941770431e-06, "loss": 0.28659820556640625, "step": 2360 }, { "epoch": 0.020449455689963772, "grad_norm": 55.42128788046401, "learning_rate": 5.998452405507481e-06, "loss": 0.3239013671875, "step": 2365 }, { "epoch": 0.020492689211507036, "grad_norm": 111.56419692393244, "learning_rate": 5.998445855415933e-06, "loss": 0.4449127197265625, "step": 2370 }, { "epoch": 0.0205359227330503, "grad_norm": 4.85511463344986, "learning_rate": 5.998439291495819e-06, "loss": 0.17552490234375, "step": 2375 }, { "epoch": 0.020579156254593563, "grad_norm": 49.36745007002281, "learning_rate": 5.9984327137471665e-06, "loss": 0.41702880859375, "step": 2380 }, { "epoch": 0.020622389776136826, "grad_norm": 29.644581159629166, "learning_rate": 5.998426122170008e-06, "loss": 0.2177490234375, "step": 2385 }, { "epoch": 0.02066562329768009, "grad_norm": 23.028032245699073, "learning_rate": 5.998419516764372e-06, "loss": 0.17884597778320313, "step": 2390 }, { "epoch": 0.020708856819223354, "grad_norm": 1.8256143357771561, "learning_rate": 5.9984128975302915e-06, "loss": 0.095098876953125, "step": 2395 }, { "epoch": 0.020752090340766617, "grad_norm": 23.133187752985876, "learning_rate": 5.998406264467796e-06, "loss": 0.1510009765625, "step": 2400 }, { "epoch": 0.02079532386230988, "grad_norm": 94.9316849884995, "learning_rate": 5.998399617576915e-06, "loss": 0.308380126953125, "step": 2405 }, { "epoch": 0.020838557383853144, "grad_norm": 63.96154914339019, "learning_rate": 5.998392956857679e-06, "loss": 0.26883544921875, "step": 2410 }, { "epoch": 0.020881790905396408, "grad_norm": 1.7988698231709315, "learning_rate": 5.99838628231012e-06, "loss": 0.542218017578125, "step": 2415 }, { "epoch": 0.02092502442693967, "grad_norm": 3.9613718905120447, "learning_rate": 5.99837959393427e-06, "loss": 0.3076681137084961, "step": 2420 }, { "epoch": 0.020968257948482935, "grad_norm": 4.108601249769285, "learning_rate": 5.998372891730157e-06, "loss": 0.12229080200195312, "step": 2425 }, { "epoch": 0.0210114914700262, "grad_norm": 35.65529491429013, "learning_rate": 5.9983661756978135e-06, "loss": 0.2680816650390625, "step": 2430 }, { "epoch": 0.021054724991569462, "grad_norm": 1.6465718930585, "learning_rate": 5.998359445837269e-06, "loss": 0.20438232421875, "step": 2435 }, { "epoch": 0.021097958513112726, "grad_norm": 5.782053212215411, "learning_rate": 5.9983527021485575e-06, "loss": 0.351275634765625, "step": 2440 }, { "epoch": 0.02114119203465599, "grad_norm": 4.41386495480498, "learning_rate": 5.9983459446317075e-06, "loss": 0.1288818359375, "step": 2445 }, { "epoch": 0.021184425556199253, "grad_norm": 7.3173403468577884, "learning_rate": 5.9983391732867515e-06, "loss": 0.18839263916015625, "step": 2450 }, { "epoch": 0.02122765907774252, "grad_norm": 71.72176200452695, "learning_rate": 5.998332388113718e-06, "loss": 0.38502349853515627, "step": 2455 }, { "epoch": 0.021270892599285784, "grad_norm": 29.86732055001166, "learning_rate": 5.998325589112643e-06, "loss": 0.37933349609375, "step": 2460 }, { "epoch": 0.021314126120829047, "grad_norm": 20.16134563992899, "learning_rate": 5.998318776283555e-06, "loss": 0.110577392578125, "step": 2465 }, { "epoch": 0.02135735964237231, "grad_norm": 3.3257065744569094, "learning_rate": 5.998311949626485e-06, "loss": 0.2826171875, "step": 2470 }, { "epoch": 0.021400593163915575, "grad_norm": 37.318251201578335, "learning_rate": 5.998305109141465e-06, "loss": 0.360357666015625, "step": 2475 }, { "epoch": 0.021443826685458838, "grad_norm": 22.76767455060262, "learning_rate": 5.998298254828527e-06, "loss": 0.14459686279296874, "step": 2480 }, { "epoch": 0.021487060207002102, "grad_norm": 43.07264156643978, "learning_rate": 5.998291386687702e-06, "loss": 0.4365043640136719, "step": 2485 }, { "epoch": 0.021530293728545365, "grad_norm": 33.565454981076456, "learning_rate": 5.998284504719023e-06, "loss": 0.2197601318359375, "step": 2490 }, { "epoch": 0.02157352725008863, "grad_norm": 7.288629989188285, "learning_rate": 5.9982776089225204e-06, "loss": 0.17109375, "step": 2495 }, { "epoch": 0.021616760771631893, "grad_norm": 83.43744312517919, "learning_rate": 5.998270699298226e-06, "loss": 0.34671630859375, "step": 2500 }, { "epoch": 0.021659994293175156, "grad_norm": 40.371935041657, "learning_rate": 5.998263775846173e-06, "loss": 0.4296875, "step": 2505 }, { "epoch": 0.02170322781471842, "grad_norm": 0.7329746105251513, "learning_rate": 5.998256838566391e-06, "loss": 0.28565521240234376, "step": 2510 }, { "epoch": 0.021746461336261683, "grad_norm": 27.40049988333968, "learning_rate": 5.998249887458915e-06, "loss": 0.14293975830078126, "step": 2515 }, { "epoch": 0.021789694857804947, "grad_norm": 20.452605420241444, "learning_rate": 5.998242922523774e-06, "loss": 0.238323974609375, "step": 2520 }, { "epoch": 0.02183292837934821, "grad_norm": 13.40607460192467, "learning_rate": 5.998235943761002e-06, "loss": 0.254730224609375, "step": 2525 }, { "epoch": 0.021876161900891474, "grad_norm": 7.302626491447924, "learning_rate": 5.9982289511706315e-06, "loss": 0.225860595703125, "step": 2530 }, { "epoch": 0.021919395422434738, "grad_norm": 2.6671832304691043, "learning_rate": 5.998221944752693e-06, "loss": 0.2890380859375, "step": 2535 }, { "epoch": 0.021962628943978, "grad_norm": 66.00842542578367, "learning_rate": 5.99821492450722e-06, "loss": 0.4829082489013672, "step": 2540 }, { "epoch": 0.022005862465521265, "grad_norm": 20.998413508134192, "learning_rate": 5.998207890434246e-06, "loss": 0.328094482421875, "step": 2545 }, { "epoch": 0.022049095987064532, "grad_norm": 11.869252368581579, "learning_rate": 5.9982008425338e-06, "loss": 0.2457489013671875, "step": 2550 }, { "epoch": 0.022092329508607796, "grad_norm": 20.25775902853539, "learning_rate": 5.998193780805918e-06, "loss": 0.254541015625, "step": 2555 }, { "epoch": 0.02213556303015106, "grad_norm": 18.527270730513038, "learning_rate": 5.998186705250631e-06, "loss": 0.166741943359375, "step": 2560 }, { "epoch": 0.022178796551694323, "grad_norm": 3.368536828647671, "learning_rate": 5.998179615867972e-06, "loss": 0.2065643310546875, "step": 2565 }, { "epoch": 0.022222030073237586, "grad_norm": 115.27740784986246, "learning_rate": 5.998172512657974e-06, "loss": 0.28062744140625, "step": 2570 }, { "epoch": 0.02226526359478085, "grad_norm": 32.21134710032568, "learning_rate": 5.998165395620668e-06, "loss": 0.246148681640625, "step": 2575 }, { "epoch": 0.022308497116324114, "grad_norm": 68.86585933784845, "learning_rate": 5.9981582647560885e-06, "loss": 0.47183837890625, "step": 2580 }, { "epoch": 0.022351730637867377, "grad_norm": 0.13340114438943912, "learning_rate": 5.998151120064269e-06, "loss": 0.2786256790161133, "step": 2585 }, { "epoch": 0.02239496415941064, "grad_norm": 20.97640923241511, "learning_rate": 5.99814396154524e-06, "loss": 0.0532928466796875, "step": 2590 }, { "epoch": 0.022438197680953904, "grad_norm": 77.7189391874747, "learning_rate": 5.998136789199037e-06, "loss": 0.3734710693359375, "step": 2595 }, { "epoch": 0.022481431202497168, "grad_norm": 6.963601539833865, "learning_rate": 5.9981296030256915e-06, "loss": 0.24317626953125, "step": 2600 }, { "epoch": 0.02252466472404043, "grad_norm": 41.79506390298864, "learning_rate": 5.998122403025238e-06, "loss": 0.2158905029296875, "step": 2605 }, { "epoch": 0.022567898245583695, "grad_norm": 31.675788557836345, "learning_rate": 5.998115189197708e-06, "loss": 0.11409378051757812, "step": 2610 }, { "epoch": 0.02261113176712696, "grad_norm": 13.734270340623922, "learning_rate": 5.998107961543136e-06, "loss": 0.190777587890625, "step": 2615 }, { "epoch": 0.022654365288670222, "grad_norm": 19.426553897626636, "learning_rate": 5.998100720061555e-06, "loss": 0.184033203125, "step": 2620 }, { "epoch": 0.022697598810213486, "grad_norm": 3.2430384016807627, "learning_rate": 5.998093464752998e-06, "loss": 0.2681640625, "step": 2625 }, { "epoch": 0.02274083233175675, "grad_norm": 11.243494042734048, "learning_rate": 5.998086195617499e-06, "loss": 0.05604248046875, "step": 2630 }, { "epoch": 0.022784065853300013, "grad_norm": 7.322345905908056, "learning_rate": 5.998078912655092e-06, "loss": 0.323992919921875, "step": 2635 }, { "epoch": 0.022827299374843277, "grad_norm": 18.086631363823265, "learning_rate": 5.99807161586581e-06, "loss": 0.5193084716796875, "step": 2640 }, { "epoch": 0.022870532896386544, "grad_norm": 80.02714309033323, "learning_rate": 5.998064305249686e-06, "loss": 0.39014892578125, "step": 2645 }, { "epoch": 0.022913766417929807, "grad_norm": 25.748287508469634, "learning_rate": 5.9980569808067544e-06, "loss": 0.13689155578613282, "step": 2650 }, { "epoch": 0.02295699993947307, "grad_norm": 49.620252069404756, "learning_rate": 5.99804964253705e-06, "loss": 0.311785888671875, "step": 2655 }, { "epoch": 0.023000233461016335, "grad_norm": 40.51664277551738, "learning_rate": 5.9980422904406036e-06, "loss": 0.15326766967773436, "step": 2660 }, { "epoch": 0.023043466982559598, "grad_norm": 28.5948118038837, "learning_rate": 5.998034924517452e-06, "loss": 0.4631507873535156, "step": 2665 }, { "epoch": 0.023086700504102862, "grad_norm": 71.06368900411948, "learning_rate": 5.998027544767629e-06, "loss": 0.375274658203125, "step": 2670 }, { "epoch": 0.023129934025646125, "grad_norm": 12.787261641200962, "learning_rate": 5.998020151191167e-06, "loss": 0.3917755126953125, "step": 2675 }, { "epoch": 0.02317316754718939, "grad_norm": 77.81926845264105, "learning_rate": 5.998012743788101e-06, "loss": 0.18221817016601563, "step": 2680 }, { "epoch": 0.023216401068732653, "grad_norm": 103.85821107680692, "learning_rate": 5.998005322558465e-06, "loss": 0.580792236328125, "step": 2685 }, { "epoch": 0.023259634590275916, "grad_norm": 70.08195714345425, "learning_rate": 5.9979978875022936e-06, "loss": 0.3008697509765625, "step": 2690 }, { "epoch": 0.02330286811181918, "grad_norm": 9.065278455069272, "learning_rate": 5.997990438619621e-06, "loss": 0.3764312744140625, "step": 2695 }, { "epoch": 0.023346101633362443, "grad_norm": 7.591791228990985, "learning_rate": 5.9979829759104806e-06, "loss": 0.22577133178710937, "step": 2700 }, { "epoch": 0.023389335154905707, "grad_norm": 23.7311828332352, "learning_rate": 5.997975499374909e-06, "loss": 0.13056259155273436, "step": 2705 }, { "epoch": 0.02343256867644897, "grad_norm": 93.6657062361169, "learning_rate": 5.997968009012938e-06, "loss": 0.2710693359375, "step": 2710 }, { "epoch": 0.023475802197992234, "grad_norm": 16.822659519851573, "learning_rate": 5.997960504824605e-06, "loss": 0.1887481689453125, "step": 2715 }, { "epoch": 0.023519035719535498, "grad_norm": 48.33223031919988, "learning_rate": 5.997952986809942e-06, "loss": 0.36198577880859373, "step": 2720 }, { "epoch": 0.02356226924107876, "grad_norm": 2.0683093586425563, "learning_rate": 5.997945454968984e-06, "loss": 0.234759521484375, "step": 2725 }, { "epoch": 0.023605502762622025, "grad_norm": 27.708616598112496, "learning_rate": 5.997937909301768e-06, "loss": 0.22835693359375, "step": 2730 }, { "epoch": 0.023648736284165292, "grad_norm": 8.343092977025664, "learning_rate": 5.997930349808327e-06, "loss": 0.30819091796875, "step": 2735 }, { "epoch": 0.023691969805708556, "grad_norm": 6.112031386400662, "learning_rate": 5.997922776488695e-06, "loss": 0.26456146240234374, "step": 2740 }, { "epoch": 0.02373520332725182, "grad_norm": 31.83807882333566, "learning_rate": 5.997915189342909e-06, "loss": 0.10802001953125, "step": 2745 }, { "epoch": 0.023778436848795083, "grad_norm": 48.35058901749898, "learning_rate": 5.997907588371004e-06, "loss": 0.1279327392578125, "step": 2750 }, { "epoch": 0.023821670370338346, "grad_norm": 16.51528321332334, "learning_rate": 5.9978999735730114e-06, "loss": 0.1399627685546875, "step": 2755 }, { "epoch": 0.02386490389188161, "grad_norm": 7.86666466801857, "learning_rate": 5.997892344948971e-06, "loss": 0.2607452392578125, "step": 2760 }, { "epoch": 0.023908137413424874, "grad_norm": 101.21523433024784, "learning_rate": 5.9978847024989155e-06, "loss": 0.51126708984375, "step": 2765 }, { "epoch": 0.023951370934968137, "grad_norm": 1.9608258541554962, "learning_rate": 5.99787704622288e-06, "loss": 0.13143310546875, "step": 2770 }, { "epoch": 0.0239946044565114, "grad_norm": 25.436386772922678, "learning_rate": 5.997869376120902e-06, "loss": 0.2486328125, "step": 2775 }, { "epoch": 0.024037837978054664, "grad_norm": 31.645287262215966, "learning_rate": 5.997861692193014e-06, "loss": 0.22600326538085938, "step": 2780 }, { "epoch": 0.024081071499597928, "grad_norm": 60.38633435295071, "learning_rate": 5.997853994439253e-06, "loss": 0.44593505859375, "step": 2785 }, { "epoch": 0.02412430502114119, "grad_norm": 42.03104834241949, "learning_rate": 5.997846282859655e-06, "loss": 0.3621734619140625, "step": 2790 }, { "epoch": 0.024167538542684455, "grad_norm": 2.9814278773270613, "learning_rate": 5.997838557454255e-06, "loss": 0.43660888671875, "step": 2795 }, { "epoch": 0.02421077206422772, "grad_norm": 1.330602674332869, "learning_rate": 5.9978308182230875e-06, "loss": 0.3610420227050781, "step": 2800 }, { "epoch": 0.024254005585770982, "grad_norm": 7.214576244906337, "learning_rate": 5.997823065166189e-06, "loss": 0.1289398193359375, "step": 2805 }, { "epoch": 0.024297239107314246, "grad_norm": 35.58307775519597, "learning_rate": 5.997815298283597e-06, "loss": 0.2060516357421875, "step": 2810 }, { "epoch": 0.02434047262885751, "grad_norm": 18.875792368550904, "learning_rate": 5.997807517575344e-06, "loss": 0.16671142578125, "step": 2815 }, { "epoch": 0.024383706150400773, "grad_norm": 23.15700776234825, "learning_rate": 5.9977997230414686e-06, "loss": 0.31947021484375, "step": 2820 }, { "epoch": 0.024426939671944037, "grad_norm": 52.11033155770522, "learning_rate": 5.997791914682005e-06, "loss": 0.4217418670654297, "step": 2825 }, { "epoch": 0.024470173193487304, "grad_norm": 35.41116564387331, "learning_rate": 5.9977840924969905e-06, "loss": 0.504559326171875, "step": 2830 }, { "epoch": 0.024513406715030567, "grad_norm": 13.971566293826367, "learning_rate": 5.99777625648646e-06, "loss": 0.08721923828125, "step": 2835 }, { "epoch": 0.02455664023657383, "grad_norm": 21.093255685040994, "learning_rate": 5.997768406650451e-06, "loss": 0.184588623046875, "step": 2840 }, { "epoch": 0.024599873758117095, "grad_norm": 9.508181561661893, "learning_rate": 5.997760542988999e-06, "loss": 0.157720947265625, "step": 2845 }, { "epoch": 0.02464310727966036, "grad_norm": 23.56605046250613, "learning_rate": 5.997752665502141e-06, "loss": 0.10666961669921875, "step": 2850 }, { "epoch": 0.024686340801203622, "grad_norm": 44.47265041082804, "learning_rate": 5.997744774189911e-06, "loss": 0.2896587371826172, "step": 2855 }, { "epoch": 0.024729574322746885, "grad_norm": 14.579798957716823, "learning_rate": 5.9977368690523475e-06, "loss": 0.1756927490234375, "step": 2860 }, { "epoch": 0.02477280784429015, "grad_norm": 7.375173975149876, "learning_rate": 5.997728950089487e-06, "loss": 0.551678466796875, "step": 2865 }, { "epoch": 0.024816041365833413, "grad_norm": 15.16599513244944, "learning_rate": 5.997721017301365e-06, "loss": 0.2669677734375, "step": 2870 }, { "epoch": 0.024859274887376676, "grad_norm": 24.772346301116986, "learning_rate": 5.997713070688018e-06, "loss": 0.1708221435546875, "step": 2875 }, { "epoch": 0.02490250840891994, "grad_norm": 10.562579660700841, "learning_rate": 5.9977051102494845e-06, "loss": 0.1812255859375, "step": 2880 }, { "epoch": 0.024945741930463203, "grad_norm": 1.4809417510653369, "learning_rate": 5.9976971359857995e-06, "loss": 0.13055343627929689, "step": 2885 }, { "epoch": 0.024988975452006467, "grad_norm": 7.0886088357632175, "learning_rate": 5.997689147896999e-06, "loss": 0.1756103515625, "step": 2890 }, { "epoch": 0.02503220897354973, "grad_norm": 21.2770983832096, "learning_rate": 5.997681145983123e-06, "loss": 0.3319305419921875, "step": 2895 }, { "epoch": 0.025075442495092994, "grad_norm": 12.616195587504663, "learning_rate": 5.997673130244205e-06, "loss": 0.2115966796875, "step": 2900 }, { "epoch": 0.025118676016636258, "grad_norm": 1.3410583156073774, "learning_rate": 5.997665100680284e-06, "loss": 0.3109619140625, "step": 2905 }, { "epoch": 0.02516190953817952, "grad_norm": 1.7093169913582922, "learning_rate": 5.997657057291396e-06, "loss": 0.222412109375, "step": 2910 }, { "epoch": 0.025205143059722785, "grad_norm": 2.4118129854304966, "learning_rate": 5.997649000077579e-06, "loss": 0.06026763916015625, "step": 2915 }, { "epoch": 0.025248376581266052, "grad_norm": 8.745206622483181, "learning_rate": 5.997640929038869e-06, "loss": 0.40472412109375, "step": 2920 }, { "epoch": 0.025291610102809316, "grad_norm": 10.08870411408643, "learning_rate": 5.997632844175305e-06, "loss": 0.552099609375, "step": 2925 }, { "epoch": 0.02533484362435258, "grad_norm": 24.456176968186227, "learning_rate": 5.997624745486922e-06, "loss": 0.183111572265625, "step": 2930 }, { "epoch": 0.025378077145895843, "grad_norm": 63.5244369473623, "learning_rate": 5.997616632973759e-06, "loss": 0.6268058776855469, "step": 2935 }, { "epoch": 0.025421310667439107, "grad_norm": 54.54848474726506, "learning_rate": 5.997608506635853e-06, "loss": 0.3764904022216797, "step": 2940 }, { "epoch": 0.02546454418898237, "grad_norm": 21.263576265654308, "learning_rate": 5.997600366473241e-06, "loss": 0.3692626953125, "step": 2945 }, { "epoch": 0.025507777710525634, "grad_norm": 1.6074080571941034, "learning_rate": 5.997592212485962e-06, "loss": 0.1380584716796875, "step": 2950 }, { "epoch": 0.025551011232068897, "grad_norm": 42.928844680240374, "learning_rate": 5.997584044674053e-06, "loss": 0.2227203369140625, "step": 2955 }, { "epoch": 0.02559424475361216, "grad_norm": 63.31496698541111, "learning_rate": 5.99757586303755e-06, "loss": 0.3593620300292969, "step": 2960 }, { "epoch": 0.025637478275155424, "grad_norm": 14.786586660773526, "learning_rate": 5.997567667576493e-06, "loss": 0.2767486572265625, "step": 2965 }, { "epoch": 0.025680711796698688, "grad_norm": 17.927523633035477, "learning_rate": 5.997559458290917e-06, "loss": 0.2374237060546875, "step": 2970 }, { "epoch": 0.02572394531824195, "grad_norm": 18.67336974505587, "learning_rate": 5.997551235180863e-06, "loss": 0.24334716796875, "step": 2975 }, { "epoch": 0.025767178839785215, "grad_norm": 3.2652934081938314, "learning_rate": 5.997542998246367e-06, "loss": 0.2560394287109375, "step": 2980 }, { "epoch": 0.02581041236132848, "grad_norm": 13.682846752872754, "learning_rate": 5.997534747487469e-06, "loss": 0.2900848388671875, "step": 2985 }, { "epoch": 0.025853645882871742, "grad_norm": 20.436379807891015, "learning_rate": 5.997526482904204e-06, "loss": 0.2799102783203125, "step": 2990 }, { "epoch": 0.025896879404415006, "grad_norm": 14.417464438132459, "learning_rate": 5.997518204496612e-06, "loss": 0.15360374450683595, "step": 2995 }, { "epoch": 0.02594011292595827, "grad_norm": 1.8716700068534777, "learning_rate": 5.997509912264732e-06, "loss": 0.14351119995117187, "step": 3000 }, { "epoch": 0.025983346447501533, "grad_norm": 52.886530707313966, "learning_rate": 5.9975016062085995e-06, "loss": 0.68323974609375, "step": 3005 }, { "epoch": 0.026026579969044797, "grad_norm": 38.792462889736086, "learning_rate": 5.997493286328255e-06, "loss": 0.2332061767578125, "step": 3010 }, { "epoch": 0.026069813490588064, "grad_norm": 17.41141840960718, "learning_rate": 5.9974849526237375e-06, "loss": 0.17144775390625, "step": 3015 }, { "epoch": 0.026113047012131328, "grad_norm": 13.814936329141107, "learning_rate": 5.997476605095082e-06, "loss": 0.3119243621826172, "step": 3020 }, { "epoch": 0.02615628053367459, "grad_norm": 7.1008120757706585, "learning_rate": 5.997468243742331e-06, "loss": 0.4421905517578125, "step": 3025 }, { "epoch": 0.026199514055217855, "grad_norm": 2.1577065374598874, "learning_rate": 5.997459868565522e-06, "loss": 0.20199623107910156, "step": 3030 }, { "epoch": 0.02624274757676112, "grad_norm": 35.13734759711331, "learning_rate": 5.997451479564692e-06, "loss": 0.173529052734375, "step": 3035 }, { "epoch": 0.026285981098304382, "grad_norm": 69.61751155136011, "learning_rate": 5.997443076739879e-06, "loss": 0.23892822265625, "step": 3040 }, { "epoch": 0.026329214619847646, "grad_norm": 2.8137946023980307, "learning_rate": 5.997434660091126e-06, "loss": 0.436309814453125, "step": 3045 }, { "epoch": 0.02637244814139091, "grad_norm": 18.361725388586635, "learning_rate": 5.997426229618468e-06, "loss": 0.2005615234375, "step": 3050 }, { "epoch": 0.026415681662934173, "grad_norm": 26.848565354923903, "learning_rate": 5.997417785321945e-06, "loss": 0.17088050842285157, "step": 3055 }, { "epoch": 0.026458915184477436, "grad_norm": 34.74080169835595, "learning_rate": 5.9974093272015956e-06, "loss": 0.48145751953125, "step": 3060 }, { "epoch": 0.0265021487060207, "grad_norm": 54.53856665971964, "learning_rate": 5.99740085525746e-06, "loss": 0.38733673095703125, "step": 3065 }, { "epoch": 0.026545382227563964, "grad_norm": 3.679741926551467, "learning_rate": 5.997392369489577e-06, "loss": 0.09459075927734376, "step": 3070 }, { "epoch": 0.026588615749107227, "grad_norm": 49.38743752246049, "learning_rate": 5.997383869897985e-06, "loss": 0.24397735595703124, "step": 3075 }, { "epoch": 0.02663184927065049, "grad_norm": 1.0446837103229294, "learning_rate": 5.997375356482722e-06, "loss": 0.13250808715820311, "step": 3080 }, { "epoch": 0.026675082792193754, "grad_norm": 9.291373967783189, "learning_rate": 5.99736682924383e-06, "loss": 0.1861663818359375, "step": 3085 }, { "epoch": 0.026718316313737018, "grad_norm": 3.8725614452115096, "learning_rate": 5.997358288181347e-06, "loss": 0.262213134765625, "step": 3090 }, { "epoch": 0.02676154983528028, "grad_norm": 145.9333207164126, "learning_rate": 5.997349733295312e-06, "loss": 0.3151214599609375, "step": 3095 }, { "epoch": 0.026804783356823545, "grad_norm": 47.851908968631, "learning_rate": 5.997341164585766e-06, "loss": 0.207794189453125, "step": 3100 }, { "epoch": 0.026848016878366812, "grad_norm": 4.728578866750632, "learning_rate": 5.9973325820527456e-06, "loss": 0.09859466552734375, "step": 3105 }, { "epoch": 0.026891250399910076, "grad_norm": 17.619698584830438, "learning_rate": 5.997323985696293e-06, "loss": 0.12610015869140626, "step": 3110 }, { "epoch": 0.02693448392145334, "grad_norm": 80.72666923179347, "learning_rate": 5.9973153755164476e-06, "loss": 0.4305755615234375, "step": 3115 }, { "epoch": 0.026977717442996603, "grad_norm": 9.441292945282546, "learning_rate": 5.9973067515132475e-06, "loss": 0.111090087890625, "step": 3120 }, { "epoch": 0.027020950964539867, "grad_norm": 1.055409911348263, "learning_rate": 5.997298113686734e-06, "loss": 0.39486236572265626, "step": 3125 }, { "epoch": 0.02706418448608313, "grad_norm": 29.64659101438804, "learning_rate": 5.997289462036947e-06, "loss": 0.103045654296875, "step": 3130 }, { "epoch": 0.027107418007626394, "grad_norm": 29.797538746468156, "learning_rate": 5.997280796563925e-06, "loss": 0.315155029296875, "step": 3135 }, { "epoch": 0.027150651529169657, "grad_norm": 12.987577558699488, "learning_rate": 5.997272117267709e-06, "loss": 0.21756591796875, "step": 3140 }, { "epoch": 0.02719388505071292, "grad_norm": 12.298782493339774, "learning_rate": 5.997263424148339e-06, "loss": 0.09227447509765625, "step": 3145 }, { "epoch": 0.027237118572256185, "grad_norm": 12.66489272787251, "learning_rate": 5.9972547172058545e-06, "loss": 0.235931396484375, "step": 3150 }, { "epoch": 0.027280352093799448, "grad_norm": 12.189312285091548, "learning_rate": 5.997245996440297e-06, "loss": 0.44429931640625, "step": 3155 }, { "epoch": 0.027323585615342712, "grad_norm": 17.417800001850626, "learning_rate": 5.9972372618517055e-06, "loss": 0.2357086181640625, "step": 3160 }, { "epoch": 0.027366819136885975, "grad_norm": 13.999948142858274, "learning_rate": 5.99722851344012e-06, "loss": 0.349993896484375, "step": 3165 }, { "epoch": 0.02741005265842924, "grad_norm": 7.704162197938255, "learning_rate": 5.997219751205582e-06, "loss": 0.282666015625, "step": 3170 }, { "epoch": 0.027453286179972503, "grad_norm": 46.82042180101868, "learning_rate": 5.9972109751481304e-06, "loss": 0.21712150573730468, "step": 3175 }, { "epoch": 0.027496519701515766, "grad_norm": 2.084323908134423, "learning_rate": 5.997202185267809e-06, "loss": 0.20875244140625, "step": 3180 }, { "epoch": 0.02753975322305903, "grad_norm": 3.0957050866367397, "learning_rate": 5.997193381564654e-06, "loss": 0.403033447265625, "step": 3185 }, { "epoch": 0.027582986744602293, "grad_norm": 0.6862442144992524, "learning_rate": 5.997184564038708e-06, "loss": 0.30354995727539064, "step": 3190 }, { "epoch": 0.027626220266145557, "grad_norm": 5.799839042449476, "learning_rate": 5.997175732690012e-06, "loss": 0.26839599609375, "step": 3195 }, { "epoch": 0.027669453787688824, "grad_norm": 18.749087667409288, "learning_rate": 5.997166887518607e-06, "loss": 0.21404266357421875, "step": 3200 }, { "epoch": 0.027712687309232088, "grad_norm": 1.0524284502852963, "learning_rate": 5.997158028524532e-06, "loss": 0.16273651123046876, "step": 3205 }, { "epoch": 0.02775592083077535, "grad_norm": 21.91139529087551, "learning_rate": 5.997149155707829e-06, "loss": 0.410211181640625, "step": 3210 }, { "epoch": 0.027799154352318615, "grad_norm": 31.25793975778974, "learning_rate": 5.997140269068539e-06, "loss": 0.1681488037109375, "step": 3215 }, { "epoch": 0.02784238787386188, "grad_norm": 29.688396160433925, "learning_rate": 5.997131368606704e-06, "loss": 0.40801849365234377, "step": 3220 }, { "epoch": 0.027885621395405142, "grad_norm": 2.213107605654, "learning_rate": 5.997122454322363e-06, "loss": 0.07279052734375, "step": 3225 }, { "epoch": 0.027928854916948406, "grad_norm": 27.031007505101464, "learning_rate": 5.997113526215558e-06, "loss": 0.17974853515625, "step": 3230 }, { "epoch": 0.02797208843849167, "grad_norm": 14.77375193246778, "learning_rate": 5.9971045842863304e-06, "loss": 0.2685455322265625, "step": 3235 }, { "epoch": 0.028015321960034933, "grad_norm": 74.07538677629535, "learning_rate": 5.997095628534722e-06, "loss": 0.42432708740234376, "step": 3240 }, { "epoch": 0.028058555481578196, "grad_norm": 11.543939536105901, "learning_rate": 5.997086658960773e-06, "loss": 0.18756103515625, "step": 3245 }, { "epoch": 0.02810178900312146, "grad_norm": 28.97870416608045, "learning_rate": 5.997077675564525e-06, "loss": 0.070654296875, "step": 3250 }, { "epoch": 0.028145022524664724, "grad_norm": 48.07595864722879, "learning_rate": 5.9970686783460196e-06, "loss": 0.40447235107421875, "step": 3255 }, { "epoch": 0.028188256046207987, "grad_norm": 17.55588083369903, "learning_rate": 5.997059667305298e-06, "loss": 0.4455841064453125, "step": 3260 }, { "epoch": 0.02823148956775125, "grad_norm": 72.57056989971534, "learning_rate": 5.997050642442402e-06, "loss": 0.35833740234375, "step": 3265 }, { "epoch": 0.028274723089294514, "grad_norm": 6.370243112931571, "learning_rate": 5.997041603757373e-06, "loss": 0.49825439453125, "step": 3270 }, { "epoch": 0.028317956610837778, "grad_norm": 28.91141149716969, "learning_rate": 5.9970325512502546e-06, "loss": 0.27255096435546877, "step": 3275 }, { "epoch": 0.02836119013238104, "grad_norm": 9.256921744516841, "learning_rate": 5.997023484921086e-06, "loss": 0.2096343994140625, "step": 3280 }, { "epoch": 0.028404423653924305, "grad_norm": 8.024245340559172, "learning_rate": 5.997014404769909e-06, "loss": 0.128118896484375, "step": 3285 }, { "epoch": 0.028447657175467572, "grad_norm": 42.033060329360744, "learning_rate": 5.997005310796767e-06, "loss": 0.29150390625, "step": 3290 }, { "epoch": 0.028490890697010836, "grad_norm": 2.259246273208778, "learning_rate": 5.996996203001702e-06, "loss": 0.208941650390625, "step": 3295 }, { "epoch": 0.0285341242185541, "grad_norm": 0.16510899505717194, "learning_rate": 5.996987081384754e-06, "loss": 0.31137847900390625, "step": 3300 }, { "epoch": 0.028577357740097363, "grad_norm": 80.57817193995824, "learning_rate": 5.9969779459459675e-06, "loss": 0.2820220947265625, "step": 3305 }, { "epoch": 0.028620591261640627, "grad_norm": 9.804699474584329, "learning_rate": 5.996968796685383e-06, "loss": 0.1303497314453125, "step": 3310 }, { "epoch": 0.02866382478318389, "grad_norm": 18.14665383658453, "learning_rate": 5.996959633603043e-06, "loss": 0.2966552734375, "step": 3315 }, { "epoch": 0.028707058304727154, "grad_norm": 19.07897166970668, "learning_rate": 5.99695045669899e-06, "loss": 0.10964736938476563, "step": 3320 }, { "epoch": 0.028750291826270417, "grad_norm": 12.904850455645622, "learning_rate": 5.996941265973267e-06, "loss": 0.153790283203125, "step": 3325 }, { "epoch": 0.02879352534781368, "grad_norm": 32.52600638125149, "learning_rate": 5.996932061425916e-06, "loss": 0.1558837890625, "step": 3330 }, { "epoch": 0.028836758869356945, "grad_norm": 3.7503977760453564, "learning_rate": 5.996922843056979e-06, "loss": 0.2754638671875, "step": 3335 }, { "epoch": 0.028879992390900208, "grad_norm": 8.645883202142748, "learning_rate": 5.996913610866498e-06, "loss": 0.19954071044921876, "step": 3340 }, { "epoch": 0.028923225912443472, "grad_norm": 14.005410740765539, "learning_rate": 5.996904364854516e-06, "loss": 0.149029541015625, "step": 3345 }, { "epoch": 0.028966459433986735, "grad_norm": 24.92721347019756, "learning_rate": 5.996895105021076e-06, "loss": 0.2620941162109375, "step": 3350 }, { "epoch": 0.02900969295553, "grad_norm": 3.04910791088459, "learning_rate": 5.9968858313662216e-06, "loss": 0.4684539794921875, "step": 3355 }, { "epoch": 0.029052926477073263, "grad_norm": 14.062360181944278, "learning_rate": 5.996876543889994e-06, "loss": 0.105029296875, "step": 3360 }, { "epoch": 0.029096159998616526, "grad_norm": 4.077204256452031, "learning_rate": 5.996867242592436e-06, "loss": 0.164752197265625, "step": 3365 }, { "epoch": 0.02913939352015979, "grad_norm": 11.33704687715369, "learning_rate": 5.996857927473592e-06, "loss": 0.0915679931640625, "step": 3370 }, { "epoch": 0.029182627041703053, "grad_norm": 3.901707283810482, "learning_rate": 5.996848598533503e-06, "loss": 0.19094619750976563, "step": 3375 }, { "epoch": 0.029225860563246317, "grad_norm": 1.4870554236468227, "learning_rate": 5.996839255772214e-06, "loss": 0.163604736328125, "step": 3380 }, { "epoch": 0.029269094084789584, "grad_norm": 24.140011349262434, "learning_rate": 5.996829899189768e-06, "loss": 0.17459869384765625, "step": 3385 }, { "epoch": 0.029312327606332848, "grad_norm": 13.158265429576902, "learning_rate": 5.996820528786205e-06, "loss": 0.360260009765625, "step": 3390 }, { "epoch": 0.02935556112787611, "grad_norm": 54.368160743125216, "learning_rate": 5.9968111445615715e-06, "loss": 0.6174301147460938, "step": 3395 }, { "epoch": 0.029398794649419375, "grad_norm": 0.867643045191869, "learning_rate": 5.99680174651591e-06, "loss": 0.0692169189453125, "step": 3400 }, { "epoch": 0.02944202817096264, "grad_norm": 11.512034184212174, "learning_rate": 5.996792334649264e-06, "loss": 0.42943115234375, "step": 3405 }, { "epoch": 0.029485261692505902, "grad_norm": 17.25017567215352, "learning_rate": 5.996782908961676e-06, "loss": 0.3407958984375, "step": 3410 }, { "epoch": 0.029528495214049166, "grad_norm": 11.353753552272824, "learning_rate": 5.99677346945319e-06, "loss": 0.28388137817382814, "step": 3415 }, { "epoch": 0.02957172873559243, "grad_norm": 25.202261136893235, "learning_rate": 5.99676401612385e-06, "loss": 0.2816905975341797, "step": 3420 }, { "epoch": 0.029614962257135693, "grad_norm": 18.372482778650564, "learning_rate": 5.996754548973699e-06, "loss": 0.265313720703125, "step": 3425 }, { "epoch": 0.029658195778678956, "grad_norm": 20.53342201697129, "learning_rate": 5.99674506800278e-06, "loss": 0.2863311767578125, "step": 3430 }, { "epoch": 0.02970142930022222, "grad_norm": 15.4114921302518, "learning_rate": 5.996735573211138e-06, "loss": 0.170391845703125, "step": 3435 }, { "epoch": 0.029744662821765484, "grad_norm": 3.143870309681989, "learning_rate": 5.996726064598817e-06, "loss": 0.16829681396484375, "step": 3440 }, { "epoch": 0.029787896343308747, "grad_norm": 25.413276741038164, "learning_rate": 5.99671654216586e-06, "loss": 0.3943115234375, "step": 3445 }, { "epoch": 0.02983112986485201, "grad_norm": 55.58218087073351, "learning_rate": 5.99670700591231e-06, "loss": 0.34083251953125, "step": 3450 }, { "epoch": 0.029874363386395274, "grad_norm": 1.2156580268696426, "learning_rate": 5.996697455838213e-06, "loss": 0.149127197265625, "step": 3455 }, { "epoch": 0.029917596907938538, "grad_norm": 29.947438587076025, "learning_rate": 5.996687891943613e-06, "loss": 0.4609619140625, "step": 3460 }, { "epoch": 0.0299608304294818, "grad_norm": 34.100335091408915, "learning_rate": 5.996678314228552e-06, "loss": 0.427960205078125, "step": 3465 }, { "epoch": 0.030004063951025065, "grad_norm": 14.960554849307984, "learning_rate": 5.996668722693076e-06, "loss": 0.148828125, "step": 3470 }, { "epoch": 0.030047297472568332, "grad_norm": 63.99925596310221, "learning_rate": 5.996659117337228e-06, "loss": 0.3944000244140625, "step": 3475 }, { "epoch": 0.030090530994111596, "grad_norm": 5.526860071348966, "learning_rate": 5.996649498161053e-06, "loss": 0.1851409912109375, "step": 3480 }, { "epoch": 0.03013376451565486, "grad_norm": 13.466948340822842, "learning_rate": 5.996639865164595e-06, "loss": 0.1194549560546875, "step": 3485 }, { "epoch": 0.030176998037198123, "grad_norm": 71.97531973743867, "learning_rate": 5.996630218347901e-06, "loss": 0.5230728149414062, "step": 3490 }, { "epoch": 0.030220231558741387, "grad_norm": 3.786303546996447, "learning_rate": 5.996620557711011e-06, "loss": 0.291363525390625, "step": 3495 }, { "epoch": 0.03026346508028465, "grad_norm": 10.169356997900382, "learning_rate": 5.996610883253972e-06, "loss": 0.24525299072265624, "step": 3500 }, { "epoch": 0.030306698601827914, "grad_norm": 7.361379643105908, "learning_rate": 5.996601194976829e-06, "loss": 0.1677398681640625, "step": 3505 }, { "epoch": 0.030349932123371177, "grad_norm": 34.19114106995583, "learning_rate": 5.9965914928796255e-06, "loss": 0.315673828125, "step": 3510 }, { "epoch": 0.03039316564491444, "grad_norm": 7.474679845869253, "learning_rate": 5.996581776962407e-06, "loss": 0.178466796875, "step": 3515 }, { "epoch": 0.030436399166457705, "grad_norm": 55.85878957245971, "learning_rate": 5.996572047225219e-06, "loss": 0.41751861572265625, "step": 3520 }, { "epoch": 0.030479632688000968, "grad_norm": 30.728262778387496, "learning_rate": 5.996562303668104e-06, "loss": 0.422613525390625, "step": 3525 }, { "epoch": 0.030522866209544232, "grad_norm": 2.1002541259604492, "learning_rate": 5.996552546291109e-06, "loss": 0.6267059326171875, "step": 3530 }, { "epoch": 0.030566099731087495, "grad_norm": 7.382406438801549, "learning_rate": 5.996542775094278e-06, "loss": 0.2985992431640625, "step": 3535 }, { "epoch": 0.03060933325263076, "grad_norm": 13.167003450670862, "learning_rate": 5.996532990077657e-06, "loss": 0.337408447265625, "step": 3540 }, { "epoch": 0.030652566774174023, "grad_norm": 3.5641622739762786, "learning_rate": 5.996523191241291e-06, "loss": 0.548974609375, "step": 3545 }, { "epoch": 0.030695800295717286, "grad_norm": 38.150681950212764, "learning_rate": 5.9965133785852245e-06, "loss": 0.1925537109375, "step": 3550 }, { "epoch": 0.03073903381726055, "grad_norm": 15.785050746289002, "learning_rate": 5.996503552109504e-06, "loss": 0.27313232421875, "step": 3555 }, { "epoch": 0.030782267338803813, "grad_norm": 25.19579044174956, "learning_rate": 5.996493711814172e-06, "loss": 0.25560302734375, "step": 3560 }, { "epoch": 0.030825500860347077, "grad_norm": 45.97132494070094, "learning_rate": 5.996483857699277e-06, "loss": 0.178375244140625, "step": 3565 }, { "epoch": 0.030868734381890344, "grad_norm": 22.157523663121832, "learning_rate": 5.996473989764864e-06, "loss": 0.329949951171875, "step": 3570 }, { "epoch": 0.030911967903433608, "grad_norm": 67.84857791799067, "learning_rate": 5.996464108010977e-06, "loss": 0.297796630859375, "step": 3575 }, { "epoch": 0.03095520142497687, "grad_norm": 69.01287824078044, "learning_rate": 5.996454212437661e-06, "loss": 0.138311767578125, "step": 3580 }, { "epoch": 0.030998434946520135, "grad_norm": 44.201075468115164, "learning_rate": 5.996444303044964e-06, "loss": 0.2831817626953125, "step": 3585 }, { "epoch": 0.0310416684680634, "grad_norm": 0.7455600085884804, "learning_rate": 5.99643437983293e-06, "loss": 0.24610595703125, "step": 3590 }, { "epoch": 0.031084901989606662, "grad_norm": 13.459603478561197, "learning_rate": 5.996424442801607e-06, "loss": 0.084808349609375, "step": 3595 }, { "epoch": 0.031128135511149926, "grad_norm": 78.60227205271379, "learning_rate": 5.9964144919510385e-06, "loss": 0.352679443359375, "step": 3600 }, { "epoch": 0.03117136903269319, "grad_norm": 47.039743348789706, "learning_rate": 5.996404527281271e-06, "loss": 0.315203857421875, "step": 3605 }, { "epoch": 0.031214602554236453, "grad_norm": 27.30091640248483, "learning_rate": 5.99639454879235e-06, "loss": 0.143878173828125, "step": 3610 }, { "epoch": 0.03125783607577972, "grad_norm": 43.80606283334595, "learning_rate": 5.996384556484323e-06, "loss": 0.36147613525390626, "step": 3615 }, { "epoch": 0.031301069597322984, "grad_norm": 32.550823367861696, "learning_rate": 5.996374550357234e-06, "loss": 0.1284576416015625, "step": 3620 }, { "epoch": 0.03134430311886625, "grad_norm": 8.00774248648098, "learning_rate": 5.996364530411132e-06, "loss": 0.24020538330078126, "step": 3625 }, { "epoch": 0.03138753664040951, "grad_norm": 8.438831616781275, "learning_rate": 5.99635449664606e-06, "loss": 0.116778564453125, "step": 3630 }, { "epoch": 0.031430770161952774, "grad_norm": 0.9325939931565346, "learning_rate": 5.996344449062067e-06, "loss": 0.1499725341796875, "step": 3635 }, { "epoch": 0.03147400368349604, "grad_norm": 74.856179640601, "learning_rate": 5.9963343876591975e-06, "loss": 0.40295257568359377, "step": 3640 }, { "epoch": 0.0315172372050393, "grad_norm": 16.097897890788865, "learning_rate": 5.996324312437498e-06, "loss": 0.151629638671875, "step": 3645 }, { "epoch": 0.031560470726582565, "grad_norm": 53.937208697367474, "learning_rate": 5.996314223397016e-06, "loss": 0.22799072265625, "step": 3650 }, { "epoch": 0.03160370424812583, "grad_norm": 9.153258306654859, "learning_rate": 5.996304120537799e-06, "loss": 0.0952484130859375, "step": 3655 }, { "epoch": 0.03164693776966909, "grad_norm": 97.6688197154626, "learning_rate": 5.99629400385989e-06, "loss": 0.3277740478515625, "step": 3660 }, { "epoch": 0.031690171291212356, "grad_norm": 100.69605520297381, "learning_rate": 5.996283873363339e-06, "loss": 0.24210586547851562, "step": 3665 }, { "epoch": 0.03173340481275562, "grad_norm": 24.90850485173193, "learning_rate": 5.9962737290481915e-06, "loss": 0.1045928955078125, "step": 3670 }, { "epoch": 0.03177663833429888, "grad_norm": 35.15491301520147, "learning_rate": 5.996263570914494e-06, "loss": 0.12827911376953124, "step": 3675 }, { "epoch": 0.03181987185584215, "grad_norm": 5.307707647452214, "learning_rate": 5.996253398962294e-06, "loss": 0.19818878173828125, "step": 3680 }, { "epoch": 0.03186310537738541, "grad_norm": 41.353693496392, "learning_rate": 5.996243213191637e-06, "loss": 0.495654296875, "step": 3685 }, { "epoch": 0.031906338898928674, "grad_norm": 28.269210359500864, "learning_rate": 5.996233013602572e-06, "loss": 0.41763916015625, "step": 3690 }, { "epoch": 0.03194957242047194, "grad_norm": 9.503840079581016, "learning_rate": 5.996222800195146e-06, "loss": 0.3297393798828125, "step": 3695 }, { "epoch": 0.0319928059420152, "grad_norm": 10.772436170620892, "learning_rate": 5.9962125729694035e-06, "loss": 0.2137908935546875, "step": 3700 }, { "epoch": 0.032036039463558465, "grad_norm": 7.980186803469304, "learning_rate": 5.996202331925395e-06, "loss": 0.207470703125, "step": 3705 }, { "epoch": 0.03207927298510173, "grad_norm": 38.17736152427998, "learning_rate": 5.996192077063164e-06, "loss": 0.22467041015625, "step": 3710 }, { "epoch": 0.03212250650664499, "grad_norm": 48.32196975277328, "learning_rate": 5.996181808382761e-06, "loss": 0.354345703125, "step": 3715 }, { "epoch": 0.032165740028188256, "grad_norm": 19.239616885785598, "learning_rate": 5.996171525884232e-06, "loss": 0.34300537109375, "step": 3720 }, { "epoch": 0.03220897354973152, "grad_norm": 10.037765145300153, "learning_rate": 5.996161229567626e-06, "loss": 0.146514892578125, "step": 3725 }, { "epoch": 0.03225220707127478, "grad_norm": 9.501342311713584, "learning_rate": 5.996150919432988e-06, "loss": 0.3154541015625, "step": 3730 }, { "epoch": 0.032295440592818046, "grad_norm": 25.74244587516485, "learning_rate": 5.996140595480367e-06, "loss": 0.17959976196289062, "step": 3735 }, { "epoch": 0.03233867411436131, "grad_norm": 18.86213633517142, "learning_rate": 5.996130257709809e-06, "loss": 0.199072265625, "step": 3740 }, { "epoch": 0.032381907635904573, "grad_norm": 9.283095530007518, "learning_rate": 5.9961199061213655e-06, "loss": 0.5088531494140625, "step": 3745 }, { "epoch": 0.03242514115744784, "grad_norm": 57.48387271604306, "learning_rate": 5.996109540715081e-06, "loss": 0.193621826171875, "step": 3750 }, { "epoch": 0.0324683746789911, "grad_norm": 6.03157316959364, "learning_rate": 5.996099161491002e-06, "loss": 0.16477813720703124, "step": 3755 }, { "epoch": 0.032511608200534364, "grad_norm": 49.204356002526595, "learning_rate": 5.99608876844918e-06, "loss": 0.267401123046875, "step": 3760 }, { "epoch": 0.03255484172207763, "grad_norm": 30.611268751835322, "learning_rate": 5.996078361589662e-06, "loss": 0.2640838623046875, "step": 3765 }, { "epoch": 0.03259807524362089, "grad_norm": 1.4795347873382605, "learning_rate": 5.996067940912494e-06, "loss": 0.108087158203125, "step": 3770 }, { "epoch": 0.032641308765164155, "grad_norm": 40.76959993838463, "learning_rate": 5.996057506417726e-06, "loss": 0.3080352783203125, "step": 3775 }, { "epoch": 0.03268454228670742, "grad_norm": 9.073583894556894, "learning_rate": 5.996047058105405e-06, "loss": 0.13972549438476561, "step": 3780 }, { "epoch": 0.03272777580825068, "grad_norm": 47.570670249257006, "learning_rate": 5.99603659597558e-06, "loss": 0.373297119140625, "step": 3785 }, { "epoch": 0.032771009329793946, "grad_norm": 16.93064147004713, "learning_rate": 5.9960261200283e-06, "loss": 0.08757171630859376, "step": 3790 }, { "epoch": 0.03281424285133721, "grad_norm": 103.26965487037845, "learning_rate": 5.996015630263611e-06, "loss": 0.3307310104370117, "step": 3795 }, { "epoch": 0.03285747637288048, "grad_norm": 5.827623468364239, "learning_rate": 5.9960051266815625e-06, "loss": 0.135699462890625, "step": 3800 }, { "epoch": 0.032900709894423744, "grad_norm": 16.148994872190684, "learning_rate": 5.9959946092822035e-06, "loss": 0.108258056640625, "step": 3805 }, { "epoch": 0.03294394341596701, "grad_norm": 1.274599920401651, "learning_rate": 5.995984078065582e-06, "loss": 0.32109375, "step": 3810 }, { "epoch": 0.03298717693751027, "grad_norm": 9.451363028835418, "learning_rate": 5.995973533031748e-06, "loss": 0.20341339111328124, "step": 3815 }, { "epoch": 0.033030410459053534, "grad_norm": 4.807262432117248, "learning_rate": 5.995962974180747e-06, "loss": 0.234765625, "step": 3820 }, { "epoch": 0.0330736439805968, "grad_norm": 8.760117000556264, "learning_rate": 5.99595240151263e-06, "loss": 0.12141799926757812, "step": 3825 }, { "epoch": 0.03311687750214006, "grad_norm": 0.3450540742628264, "learning_rate": 5.995941815027445e-06, "loss": 0.12026615142822265, "step": 3830 }, { "epoch": 0.033160111023683325, "grad_norm": 5.50415989638137, "learning_rate": 5.995931214725242e-06, "loss": 0.14060897827148439, "step": 3835 }, { "epoch": 0.03320334454522659, "grad_norm": 11.162395753154616, "learning_rate": 5.995920600606068e-06, "loss": 0.14638214111328124, "step": 3840 }, { "epoch": 0.03324657806676985, "grad_norm": 16.321007137032026, "learning_rate": 5.995909972669974e-06, "loss": 0.182421875, "step": 3845 }, { "epoch": 0.033289811588313116, "grad_norm": 12.921387105521992, "learning_rate": 5.995899330917007e-06, "loss": 0.065997314453125, "step": 3850 }, { "epoch": 0.03333304510985638, "grad_norm": 176.99180011197137, "learning_rate": 5.995888675347217e-06, "loss": 0.2631500244140625, "step": 3855 }, { "epoch": 0.03337627863139964, "grad_norm": 50.49160125820616, "learning_rate": 5.9958780059606525e-06, "loss": 0.3395988464355469, "step": 3860 }, { "epoch": 0.03341951215294291, "grad_norm": 1.436897077520766, "learning_rate": 5.995867322757365e-06, "loss": 0.19686279296875, "step": 3865 }, { "epoch": 0.03346274567448617, "grad_norm": 24.657556441885653, "learning_rate": 5.995856625737401e-06, "loss": 0.248504638671875, "step": 3870 }, { "epoch": 0.033505979196029434, "grad_norm": 41.69669182728267, "learning_rate": 5.995845914900812e-06, "loss": 0.28360595703125, "step": 3875 }, { "epoch": 0.0335492127175727, "grad_norm": 4.5038803586811405, "learning_rate": 5.9958351902476455e-06, "loss": 0.2310791015625, "step": 3880 }, { "epoch": 0.03359244623911596, "grad_norm": 3.223273664559293, "learning_rate": 5.995824451777952e-06, "loss": 0.23779296875, "step": 3885 }, { "epoch": 0.033635679760659225, "grad_norm": 13.386458066318594, "learning_rate": 5.995813699491781e-06, "loss": 0.193414306640625, "step": 3890 }, { "epoch": 0.03367891328220249, "grad_norm": 30.37142307463635, "learning_rate": 5.995802933389182e-06, "loss": 0.77127685546875, "step": 3895 }, { "epoch": 0.03372214680374575, "grad_norm": 2.0081063310577623, "learning_rate": 5.995792153470204e-06, "loss": 0.113494873046875, "step": 3900 }, { "epoch": 0.033765380325289016, "grad_norm": 12.619077373146107, "learning_rate": 5.995781359734897e-06, "loss": 0.284185791015625, "step": 3905 }, { "epoch": 0.03380861384683228, "grad_norm": 5.799180078191242, "learning_rate": 5.9957705521833125e-06, "loss": 0.44736328125, "step": 3910 }, { "epoch": 0.03385184736837554, "grad_norm": 34.3165018941741, "learning_rate": 5.9957597308154976e-06, "loss": 0.29599609375, "step": 3915 }, { "epoch": 0.033895080889918806, "grad_norm": 17.3437632367942, "learning_rate": 5.995748895631505e-06, "loss": 0.13062744140625, "step": 3920 }, { "epoch": 0.03393831441146207, "grad_norm": 15.094927502520747, "learning_rate": 5.995738046631381e-06, "loss": 0.337188720703125, "step": 3925 }, { "epoch": 0.033981547933005334, "grad_norm": 81.7067195094423, "learning_rate": 5.9957271838151795e-06, "loss": 0.39742279052734375, "step": 3930 }, { "epoch": 0.0340247814545486, "grad_norm": 2.9712297235833054, "learning_rate": 5.995716307182949e-06, "loss": 0.0953216552734375, "step": 3935 }, { "epoch": 0.03406801497609186, "grad_norm": 4.792187178128971, "learning_rate": 5.995705416734739e-06, "loss": 0.2487457275390625, "step": 3940 }, { "epoch": 0.034111248497635124, "grad_norm": 38.26672749707947, "learning_rate": 5.9956945124706e-06, "loss": 0.44716796875, "step": 3945 }, { "epoch": 0.03415448201917839, "grad_norm": 11.022559795392336, "learning_rate": 5.9956835943905834e-06, "loss": 0.198291015625, "step": 3950 }, { "epoch": 0.03419771554072165, "grad_norm": 3.5128613058189146, "learning_rate": 5.995672662494739e-06, "loss": 0.1323638916015625, "step": 3955 }, { "epoch": 0.034240949062264915, "grad_norm": 7.484802520944113, "learning_rate": 5.995661716783116e-06, "loss": 0.19259033203125, "step": 3960 }, { "epoch": 0.03428418258380818, "grad_norm": 5.496117367184064, "learning_rate": 5.995650757255767e-06, "loss": 0.0825347900390625, "step": 3965 }, { "epoch": 0.03432741610535144, "grad_norm": 21.818821288426044, "learning_rate": 5.99563978391274e-06, "loss": 0.4431243896484375, "step": 3970 }, { "epoch": 0.034370649626894706, "grad_norm": 1.222331596804038, "learning_rate": 5.995628796754087e-06, "loss": 0.25610504150390623, "step": 3975 }, { "epoch": 0.03441388314843797, "grad_norm": 5.86840785301387, "learning_rate": 5.99561779577986e-06, "loss": 0.3103271484375, "step": 3980 }, { "epoch": 0.03445711666998124, "grad_norm": 11.169512942208145, "learning_rate": 5.995606780990108e-06, "loss": 0.090484619140625, "step": 3985 }, { "epoch": 0.034500350191524504, "grad_norm": 50.295168095790125, "learning_rate": 5.995595752384882e-06, "loss": 0.24984130859375, "step": 3990 }, { "epoch": 0.03454358371306777, "grad_norm": 36.16126167469503, "learning_rate": 5.995584709964233e-06, "loss": 0.41045989990234377, "step": 3995 }, { "epoch": 0.03458681723461103, "grad_norm": 2.3070076378983555, "learning_rate": 5.995573653728213e-06, "loss": 0.15980129241943358, "step": 4000 }, { "epoch": 0.034630050756154294, "grad_norm": 28.271745949756458, "learning_rate": 5.99556258367687e-06, "loss": 0.40101318359375, "step": 4005 }, { "epoch": 0.03467328427769756, "grad_norm": 3.0142183306826853, "learning_rate": 5.995551499810259e-06, "loss": 0.413232421875, "step": 4010 }, { "epoch": 0.03471651779924082, "grad_norm": 14.252945421441465, "learning_rate": 5.995540402128428e-06, "loss": 0.526104736328125, "step": 4015 }, { "epoch": 0.034759751320784085, "grad_norm": 53.42470209266853, "learning_rate": 5.99552929063143e-06, "loss": 0.35851364135742186, "step": 4020 }, { "epoch": 0.03480298484232735, "grad_norm": 7.5429919275569794, "learning_rate": 5.9955181653193155e-06, "loss": 0.265460205078125, "step": 4025 }, { "epoch": 0.03484621836387061, "grad_norm": 16.04889563171861, "learning_rate": 5.995507026192135e-06, "loss": 0.25053253173828127, "step": 4030 }, { "epoch": 0.034889451885413876, "grad_norm": 12.860694180435196, "learning_rate": 5.995495873249943e-06, "loss": 0.257977294921875, "step": 4035 }, { "epoch": 0.03493268540695714, "grad_norm": 39.406316342461665, "learning_rate": 5.995484706492786e-06, "loss": 0.2563514709472656, "step": 4040 }, { "epoch": 0.0349759189285004, "grad_norm": 11.048324226147916, "learning_rate": 5.99547352592072e-06, "loss": 0.1993408203125, "step": 4045 }, { "epoch": 0.03501915245004367, "grad_norm": 8.789157547054998, "learning_rate": 5.995462331533794e-06, "loss": 0.2191558837890625, "step": 4050 }, { "epoch": 0.03506238597158693, "grad_norm": 29.362499319149915, "learning_rate": 5.99545112333206e-06, "loss": 0.37391357421875, "step": 4055 }, { "epoch": 0.035105619493130194, "grad_norm": 13.7315709643895, "learning_rate": 5.995439901315571e-06, "loss": 0.10457763671875, "step": 4060 }, { "epoch": 0.03514885301467346, "grad_norm": 12.164208051069446, "learning_rate": 5.995428665484378e-06, "loss": 0.14051055908203125, "step": 4065 }, { "epoch": 0.03519208653621672, "grad_norm": 19.747270865673354, "learning_rate": 5.995417415838532e-06, "loss": 0.255987548828125, "step": 4070 }, { "epoch": 0.035235320057759985, "grad_norm": 30.522377722276417, "learning_rate": 5.995406152378086e-06, "loss": 0.193780517578125, "step": 4075 }, { "epoch": 0.03527855357930325, "grad_norm": 30.494222278578977, "learning_rate": 5.995394875103091e-06, "loss": 0.24976806640625, "step": 4080 }, { "epoch": 0.03532178710084651, "grad_norm": 3.5323480014337036, "learning_rate": 5.995383584013601e-06, "loss": 0.09151153564453125, "step": 4085 }, { "epoch": 0.035365020622389776, "grad_norm": 1.9367111939628776, "learning_rate": 5.995372279109665e-06, "loss": 0.5802001953125, "step": 4090 }, { "epoch": 0.03540825414393304, "grad_norm": 39.09606686780997, "learning_rate": 5.995360960391338e-06, "loss": 0.456964111328125, "step": 4095 }, { "epoch": 0.0354514876654763, "grad_norm": 45.060525612700765, "learning_rate": 5.995349627858669e-06, "loss": 0.18196487426757812, "step": 4100 }, { "epoch": 0.035494721187019566, "grad_norm": 80.23403376316354, "learning_rate": 5.995338281511714e-06, "loss": 0.4239288330078125, "step": 4105 }, { "epoch": 0.03553795470856283, "grad_norm": 8.492781267472752, "learning_rate": 5.995326921350522e-06, "loss": 0.2684661865234375, "step": 4110 }, { "epoch": 0.035581188230106094, "grad_norm": 23.53936250614929, "learning_rate": 5.995315547375148e-06, "loss": 0.11331787109375, "step": 4115 }, { "epoch": 0.03562442175164936, "grad_norm": 5.851940740749776, "learning_rate": 5.995304159585643e-06, "loss": 0.1777313232421875, "step": 4120 }, { "epoch": 0.03566765527319262, "grad_norm": 2.7698787036640744, "learning_rate": 5.99529275798206e-06, "loss": 0.09185028076171875, "step": 4125 }, { "epoch": 0.035710888794735884, "grad_norm": 0.98107653799218, "learning_rate": 5.995281342564451e-06, "loss": 0.297442626953125, "step": 4130 }, { "epoch": 0.03575412231627915, "grad_norm": 6.9027086967514455, "learning_rate": 5.99526991333287e-06, "loss": 0.3942352294921875, "step": 4135 }, { "epoch": 0.03579735583782241, "grad_norm": 16.00259758718176, "learning_rate": 5.995258470287368e-06, "loss": 0.1462158203125, "step": 4140 }, { "epoch": 0.035840589359365675, "grad_norm": 41.808376123701755, "learning_rate": 5.995247013427999e-06, "loss": 0.27618408203125, "step": 4145 }, { "epoch": 0.03588382288090894, "grad_norm": 21.13425120816542, "learning_rate": 5.995235542754816e-06, "loss": 0.23951873779296876, "step": 4150 }, { "epoch": 0.0359270564024522, "grad_norm": 46.18742668805337, "learning_rate": 5.99522405826787e-06, "loss": 0.243585205078125, "step": 4155 }, { "epoch": 0.035970289923995466, "grad_norm": 6.676243751529632, "learning_rate": 5.995212559967217e-06, "loss": 0.095623779296875, "step": 4160 }, { "epoch": 0.03601352344553873, "grad_norm": 7.267193411769935, "learning_rate": 5.995201047852907e-06, "loss": 0.327703857421875, "step": 4165 }, { "epoch": 0.036056756967082, "grad_norm": 58.14550496092327, "learning_rate": 5.995189521924995e-06, "loss": 0.2163848876953125, "step": 4170 }, { "epoch": 0.036099990488625264, "grad_norm": 37.41087719594199, "learning_rate": 5.995177982183533e-06, "loss": 0.312957763671875, "step": 4175 }, { "epoch": 0.03614322401016853, "grad_norm": 38.57407160246001, "learning_rate": 5.995166428628576e-06, "loss": 0.195635986328125, "step": 4180 }, { "epoch": 0.03618645753171179, "grad_norm": 31.751172505063426, "learning_rate": 5.995154861260175e-06, "loss": 0.24093399047851563, "step": 4185 }, { "epoch": 0.036229691053255055, "grad_norm": 13.673747604664271, "learning_rate": 5.995143280078385e-06, "loss": 0.25694580078125, "step": 4190 }, { "epoch": 0.03627292457479832, "grad_norm": 36.6824945867649, "learning_rate": 5.995131685083258e-06, "loss": 0.404730224609375, "step": 4195 }, { "epoch": 0.03631615809634158, "grad_norm": 49.64135052464479, "learning_rate": 5.9951200762748495e-06, "loss": 0.5258544921875, "step": 4200 }, { "epoch": 0.036359391617884845, "grad_norm": 4.289784565684946, "learning_rate": 5.995108453653211e-06, "loss": 0.5320785522460938, "step": 4205 }, { "epoch": 0.03640262513942811, "grad_norm": 41.9413398080394, "learning_rate": 5.9950968172183975e-06, "loss": 0.22081298828125, "step": 4210 }, { "epoch": 0.03644585866097137, "grad_norm": 34.04265657457219, "learning_rate": 5.995085166970462e-06, "loss": 0.35426483154296873, "step": 4215 }, { "epoch": 0.036489092182514636, "grad_norm": 6.4243604887796915, "learning_rate": 5.995073502909459e-06, "loss": 0.1500091552734375, "step": 4220 }, { "epoch": 0.0365323257040579, "grad_norm": 92.00911682249468, "learning_rate": 5.99506182503544e-06, "loss": 0.57880859375, "step": 4225 }, { "epoch": 0.03657555922560116, "grad_norm": 2.2225205262882257, "learning_rate": 5.9950501333484616e-06, "loss": 0.2815521240234375, "step": 4230 }, { "epoch": 0.03661879274714443, "grad_norm": 6.2165563946036135, "learning_rate": 5.995038427848576e-06, "loss": 0.557763671875, "step": 4235 }, { "epoch": 0.03666202626868769, "grad_norm": 7.958809880896046, "learning_rate": 5.995026708535838e-06, "loss": 0.264080810546875, "step": 4240 }, { "epoch": 0.036705259790230954, "grad_norm": 21.64842843730988, "learning_rate": 5.995014975410302e-06, "loss": 0.169378662109375, "step": 4245 }, { "epoch": 0.03674849331177422, "grad_norm": 0.2466603990702153, "learning_rate": 5.9950032284720214e-06, "loss": 0.239898681640625, "step": 4250 }, { "epoch": 0.03679172683331748, "grad_norm": 99.69536780338865, "learning_rate": 5.994991467721051e-06, "loss": 0.3973114013671875, "step": 4255 }, { "epoch": 0.036834960354860745, "grad_norm": 8.62386156410222, "learning_rate": 5.994979693157444e-06, "loss": 0.245263671875, "step": 4260 }, { "epoch": 0.03687819387640401, "grad_norm": 14.871310332399908, "learning_rate": 5.994967904781255e-06, "loss": 0.083660888671875, "step": 4265 }, { "epoch": 0.03692142739794727, "grad_norm": 33.056092184193005, "learning_rate": 5.994956102592538e-06, "loss": 0.416534423828125, "step": 4270 }, { "epoch": 0.036964660919490536, "grad_norm": 23.520053617074144, "learning_rate": 5.99494428659135e-06, "loss": 0.3259033203125, "step": 4275 }, { "epoch": 0.0370078944410338, "grad_norm": 0.5604644152356079, "learning_rate": 5.994932456777743e-06, "loss": 0.36328125, "step": 4280 }, { "epoch": 0.03705112796257706, "grad_norm": 41.22115665608996, "learning_rate": 5.994920613151771e-06, "loss": 0.5301513671875, "step": 4285 }, { "epoch": 0.037094361484120326, "grad_norm": 0.3731622777099602, "learning_rate": 5.994908755713491e-06, "loss": 0.1852264404296875, "step": 4290 }, { "epoch": 0.03713759500566359, "grad_norm": 0.24186935464193107, "learning_rate": 5.994896884462955e-06, "loss": 0.1124237060546875, "step": 4295 }, { "epoch": 0.037180828527206854, "grad_norm": 12.151318208501849, "learning_rate": 5.994884999400221e-06, "loss": 0.2850738525390625, "step": 4300 }, { "epoch": 0.03722406204875012, "grad_norm": 38.15720646828692, "learning_rate": 5.994873100525342e-06, "loss": 0.6646209716796875, "step": 4305 }, { "epoch": 0.03726729557029338, "grad_norm": 9.72712767759651, "learning_rate": 5.994861187838371e-06, "loss": 0.313739013671875, "step": 4310 }, { "epoch": 0.037310529091836644, "grad_norm": 10.267355781359553, "learning_rate": 5.994849261339367e-06, "loss": 0.0992462158203125, "step": 4315 }, { "epoch": 0.03735376261337991, "grad_norm": 8.913842230377622, "learning_rate": 5.9948373210283815e-06, "loss": 0.1419525146484375, "step": 4320 }, { "epoch": 0.03739699613492317, "grad_norm": 17.733929104904124, "learning_rate": 5.994825366905472e-06, "loss": 0.396630859375, "step": 4325 }, { "epoch": 0.037440229656466435, "grad_norm": 118.69184443550357, "learning_rate": 5.994813398970691e-06, "loss": 0.241351318359375, "step": 4330 }, { "epoch": 0.0374834631780097, "grad_norm": 23.679745112963356, "learning_rate": 5.994801417224096e-06, "loss": 0.135693359375, "step": 4335 }, { "epoch": 0.03752669669955296, "grad_norm": 15.456738432336248, "learning_rate": 5.994789421665742e-06, "loss": 0.29403076171875, "step": 4340 }, { "epoch": 0.037569930221096226, "grad_norm": 28.783280409642636, "learning_rate": 5.994777412295683e-06, "loss": 0.386431884765625, "step": 4345 }, { "epoch": 0.03761316374263949, "grad_norm": 7.962058493666641, "learning_rate": 5.994765389113975e-06, "loss": 0.1506072998046875, "step": 4350 }, { "epoch": 0.03765639726418276, "grad_norm": 8.12992392748416, "learning_rate": 5.994753352120674e-06, "loss": 0.3081024169921875, "step": 4355 }, { "epoch": 0.037699630785726024, "grad_norm": 11.75772577118677, "learning_rate": 5.994741301315835e-06, "loss": 0.3542236328125, "step": 4360 }, { "epoch": 0.03774286430726929, "grad_norm": 2.5765245550395512, "learning_rate": 5.994729236699515e-06, "loss": 0.1540435791015625, "step": 4365 }, { "epoch": 0.03778609782881255, "grad_norm": 9.56003358635681, "learning_rate": 5.9947171582717664e-06, "loss": 0.1373687744140625, "step": 4370 }, { "epoch": 0.037829331350355815, "grad_norm": 31.17107836488291, "learning_rate": 5.994705066032648e-06, "loss": 0.20551223754882814, "step": 4375 }, { "epoch": 0.03787256487189908, "grad_norm": 8.405521433018864, "learning_rate": 5.994692959982214e-06, "loss": 0.3739898681640625, "step": 4380 }, { "epoch": 0.03791579839344234, "grad_norm": 17.26575667851417, "learning_rate": 5.994680840120522e-06, "loss": 0.185845947265625, "step": 4385 }, { "epoch": 0.037959031914985605, "grad_norm": 21.209471795432652, "learning_rate": 5.994668706447626e-06, "loss": 0.2765380859375, "step": 4390 }, { "epoch": 0.03800226543652887, "grad_norm": 5.54809458613131, "learning_rate": 5.994656558963581e-06, "loss": 0.6253463745117187, "step": 4395 }, { "epoch": 0.03804549895807213, "grad_norm": 4.093479060783493, "learning_rate": 5.994644397668446e-06, "loss": 0.36170654296875, "step": 4400 }, { "epoch": 0.038088732479615396, "grad_norm": 2.1500802854463017, "learning_rate": 5.994632222562275e-06, "loss": 0.13331451416015624, "step": 4405 }, { "epoch": 0.03813196600115866, "grad_norm": 32.12866754901266, "learning_rate": 5.994620033645125e-06, "loss": 0.457867431640625, "step": 4410 }, { "epoch": 0.03817519952270192, "grad_norm": 24.033518666579777, "learning_rate": 5.994607830917053e-06, "loss": 0.406884765625, "step": 4415 }, { "epoch": 0.03821843304424519, "grad_norm": 38.80102932631436, "learning_rate": 5.9945956143781135e-06, "loss": 0.29801025390625, "step": 4420 }, { "epoch": 0.03826166656578845, "grad_norm": 7.099867123587539, "learning_rate": 5.994583384028364e-06, "loss": 0.34847412109375, "step": 4425 }, { "epoch": 0.038304900087331714, "grad_norm": 3.2089655559796864, "learning_rate": 5.9945711398678596e-06, "loss": 0.37069091796875, "step": 4430 }, { "epoch": 0.03834813360887498, "grad_norm": 10.278605050850869, "learning_rate": 5.9945588818966585e-06, "loss": 0.14915771484375, "step": 4435 }, { "epoch": 0.03839136713041824, "grad_norm": 2.516055889627498, "learning_rate": 5.9945466101148156e-06, "loss": 0.12239532470703125, "step": 4440 }, { "epoch": 0.038434600651961505, "grad_norm": 15.339954506062654, "learning_rate": 5.994534324522389e-06, "loss": 0.6231170654296875, "step": 4445 }, { "epoch": 0.03847783417350477, "grad_norm": 57.65145943900323, "learning_rate": 5.994522025119435e-06, "loss": 0.24375, "step": 4450 }, { "epoch": 0.03852106769504803, "grad_norm": 52.961410379638515, "learning_rate": 5.9945097119060094e-06, "loss": 0.2918914794921875, "step": 4455 }, { "epoch": 0.038564301216591296, "grad_norm": 2.800343628244671, "learning_rate": 5.99449738488217e-06, "loss": 0.1043212890625, "step": 4460 }, { "epoch": 0.03860753473813456, "grad_norm": 4.964418544416011, "learning_rate": 5.994485044047973e-06, "loss": 0.30429840087890625, "step": 4465 }, { "epoch": 0.03865076825967782, "grad_norm": 2.4963975948850625, "learning_rate": 5.994472689403476e-06, "loss": 0.3175567626953125, "step": 4470 }, { "epoch": 0.038694001781221087, "grad_norm": 47.33326351367324, "learning_rate": 5.9944603209487345e-06, "loss": 0.5134368896484375, "step": 4475 }, { "epoch": 0.03873723530276435, "grad_norm": 2.2233239761163266, "learning_rate": 5.994447938683808e-06, "loss": 0.12955322265625, "step": 4480 }, { "epoch": 0.038780468824307614, "grad_norm": 17.322206455003094, "learning_rate": 5.994435542608752e-06, "loss": 0.21707305908203126, "step": 4485 }, { "epoch": 0.03882370234585088, "grad_norm": 0.8787864778498153, "learning_rate": 5.994423132723623e-06, "loss": 0.16429595947265624, "step": 4490 }, { "epoch": 0.03886693586739414, "grad_norm": 19.60285141063411, "learning_rate": 5.994410709028479e-06, "loss": 0.33509521484375, "step": 4495 }, { "epoch": 0.038910169388937405, "grad_norm": 19.405759612825864, "learning_rate": 5.994398271523377e-06, "loss": 0.382696533203125, "step": 4500 }, { "epoch": 0.03895340291048067, "grad_norm": 4.770075320734937, "learning_rate": 5.994385820208376e-06, "loss": 0.24686279296875, "step": 4505 }, { "epoch": 0.03899663643202393, "grad_norm": 6.334488243902135, "learning_rate": 5.9943733550835324e-06, "loss": 0.1374603271484375, "step": 4510 }, { "epoch": 0.039039869953567195, "grad_norm": 11.823753266854526, "learning_rate": 5.994360876148903e-06, "loss": 0.1583688735961914, "step": 4515 }, { "epoch": 0.03908310347511046, "grad_norm": 12.26931685220789, "learning_rate": 5.994348383404546e-06, "loss": 0.0991363525390625, "step": 4520 }, { "epoch": 0.03912633699665372, "grad_norm": 0.5005419955985344, "learning_rate": 5.994335876850518e-06, "loss": 0.149786376953125, "step": 4525 }, { "epoch": 0.039169570518196986, "grad_norm": 1.56661729807816, "learning_rate": 5.994323356486878e-06, "loss": 0.2452554702758789, "step": 4530 }, { "epoch": 0.03921280403974025, "grad_norm": 3.417951853383692, "learning_rate": 5.994310822313684e-06, "loss": 0.43164520263671874, "step": 4535 }, { "epoch": 0.03925603756128352, "grad_norm": 3.7882580886872095, "learning_rate": 5.994298274330992e-06, "loss": 0.06259613037109375, "step": 4540 }, { "epoch": 0.039299271082826784, "grad_norm": 5.872422726034751, "learning_rate": 5.994285712538862e-06, "loss": 0.17889938354492188, "step": 4545 }, { "epoch": 0.03934250460437005, "grad_norm": 41.950955677848775, "learning_rate": 5.99427313693735e-06, "loss": 0.268035888671875, "step": 4550 }, { "epoch": 0.03938573812591331, "grad_norm": 12.910878441493791, "learning_rate": 5.9942605475265145e-06, "loss": 0.5483016967773438, "step": 4555 }, { "epoch": 0.039428971647456575, "grad_norm": 4.103292050543791, "learning_rate": 5.9942479443064154e-06, "loss": 0.11714630126953125, "step": 4560 }, { "epoch": 0.03947220516899984, "grad_norm": 13.990105820468722, "learning_rate": 5.994235327277109e-06, "loss": 0.23053359985351562, "step": 4565 }, { "epoch": 0.0395154386905431, "grad_norm": 9.269442538456738, "learning_rate": 5.9942226964386516e-06, "loss": 0.21064453125, "step": 4570 }, { "epoch": 0.039558672212086365, "grad_norm": 66.6926780459758, "learning_rate": 5.994210051791106e-06, "loss": 0.2771453857421875, "step": 4575 }, { "epoch": 0.03960190573362963, "grad_norm": 69.77307315415669, "learning_rate": 5.994197393334528e-06, "loss": 0.4384033203125, "step": 4580 }, { "epoch": 0.03964513925517289, "grad_norm": 14.946370813304894, "learning_rate": 5.994184721068976e-06, "loss": 0.37850341796875, "step": 4585 }, { "epoch": 0.039688372776716156, "grad_norm": 34.04646390950821, "learning_rate": 5.994172034994508e-06, "loss": 0.298089599609375, "step": 4590 }, { "epoch": 0.03973160629825942, "grad_norm": 29.849541535697945, "learning_rate": 5.994159335111184e-06, "loss": 0.2793212890625, "step": 4595 }, { "epoch": 0.03977483981980268, "grad_norm": 9.673641717197478, "learning_rate": 5.9941466214190615e-06, "loss": 0.2631103515625, "step": 4600 }, { "epoch": 0.03981807334134595, "grad_norm": 12.3440076150117, "learning_rate": 5.994133893918199e-06, "loss": 0.126116943359375, "step": 4605 }, { "epoch": 0.03986130686288921, "grad_norm": 3.452513682857165, "learning_rate": 5.994121152608655e-06, "loss": 0.09528274536132812, "step": 4610 }, { "epoch": 0.039904540384432474, "grad_norm": 30.475951315030873, "learning_rate": 5.9941083974904895e-06, "loss": 0.19931640625, "step": 4615 }, { "epoch": 0.03994777390597574, "grad_norm": 33.38640310863349, "learning_rate": 5.994095628563761e-06, "loss": 0.303857421875, "step": 4620 }, { "epoch": 0.039991007427519, "grad_norm": 20.5092909551066, "learning_rate": 5.994082845828527e-06, "loss": 0.2135589599609375, "step": 4625 }, { "epoch": 0.040034240949062265, "grad_norm": 14.53761555813935, "learning_rate": 5.9940700492848496e-06, "loss": 0.08797225952148438, "step": 4630 }, { "epoch": 0.04007747447060553, "grad_norm": 81.92077623452396, "learning_rate": 5.994057238932784e-06, "loss": 0.485015869140625, "step": 4635 }, { "epoch": 0.04012070799214879, "grad_norm": 34.62334335512866, "learning_rate": 5.994044414772392e-06, "loss": 0.46762237548828123, "step": 4640 }, { "epoch": 0.040163941513692056, "grad_norm": 16.12219719539507, "learning_rate": 5.99403157680373e-06, "loss": 0.40089874267578124, "step": 4645 }, { "epoch": 0.04020717503523532, "grad_norm": 62.00520787669035, "learning_rate": 5.99401872502686e-06, "loss": 0.2740570068359375, "step": 4650 }, { "epoch": 0.04025040855677858, "grad_norm": 1.6230647684122816, "learning_rate": 5.99400585944184e-06, "loss": 0.30257568359375, "step": 4655 }, { "epoch": 0.04029364207832185, "grad_norm": 10.673272176274581, "learning_rate": 5.99399298004873e-06, "loss": 0.28931884765625, "step": 4660 }, { "epoch": 0.04033687559986511, "grad_norm": 12.211329056977942, "learning_rate": 5.993980086847588e-06, "loss": 0.14881591796875, "step": 4665 }, { "epoch": 0.040380109121408374, "grad_norm": 1.515559360454444, "learning_rate": 5.993967179838475e-06, "loss": 0.041943359375, "step": 4670 }, { "epoch": 0.04042334264295164, "grad_norm": 4.540144836387348, "learning_rate": 5.99395425902145e-06, "loss": 0.357861328125, "step": 4675 }, { "epoch": 0.0404665761644949, "grad_norm": 13.532198102702182, "learning_rate": 5.993941324396572e-06, "loss": 0.2975288391113281, "step": 4680 }, { "epoch": 0.040509809686038165, "grad_norm": 6.435724719075154, "learning_rate": 5.993928375963901e-06, "loss": 0.47152118682861327, "step": 4685 }, { "epoch": 0.04055304320758143, "grad_norm": 80.95301586915262, "learning_rate": 5.9939154137234985e-06, "loss": 0.3964672088623047, "step": 4690 }, { "epoch": 0.04059627672912469, "grad_norm": 6.372447645935549, "learning_rate": 5.993902437675421e-06, "loss": 0.39435577392578125, "step": 4695 }, { "epoch": 0.040639510250667955, "grad_norm": 6.112679560884395, "learning_rate": 5.99388944781973e-06, "loss": 0.2014129638671875, "step": 4700 }, { "epoch": 0.04068274377221122, "grad_norm": 22.124235492070866, "learning_rate": 5.993876444156487e-06, "loss": 0.175213623046875, "step": 4705 }, { "epoch": 0.04072597729375448, "grad_norm": 13.03228552397711, "learning_rate": 5.993863426685749e-06, "loss": 0.07935676574707032, "step": 4710 }, { "epoch": 0.040769210815297746, "grad_norm": 18.89336066708093, "learning_rate": 5.993850395407578e-06, "loss": 0.14735260009765624, "step": 4715 }, { "epoch": 0.04081244433684101, "grad_norm": 12.099214708331795, "learning_rate": 5.993837350322034e-06, "loss": 0.09800338745117188, "step": 4720 }, { "epoch": 0.04085567785838428, "grad_norm": 5.087538473428322, "learning_rate": 5.993824291429176e-06, "loss": 0.1077911376953125, "step": 4725 }, { "epoch": 0.040898911379927544, "grad_norm": 2.110973819964856, "learning_rate": 5.993811218729064e-06, "loss": 0.0525787353515625, "step": 4730 }, { "epoch": 0.04094214490147081, "grad_norm": 8.585040397964981, "learning_rate": 5.99379813222176e-06, "loss": 0.2261474609375, "step": 4735 }, { "epoch": 0.04098537842301407, "grad_norm": 8.554218240051915, "learning_rate": 5.993785031907324e-06, "loss": 0.34869384765625, "step": 4740 }, { "epoch": 0.041028611944557335, "grad_norm": 10.44023276138021, "learning_rate": 5.993771917785815e-06, "loss": 0.1299102783203125, "step": 4745 }, { "epoch": 0.0410718454661006, "grad_norm": 16.798868807586185, "learning_rate": 5.993758789857296e-06, "loss": 0.195208740234375, "step": 4750 }, { "epoch": 0.04111507898764386, "grad_norm": 4.1571192011891265, "learning_rate": 5.993745648121824e-06, "loss": 0.317626953125, "step": 4755 }, { "epoch": 0.041158312509187125, "grad_norm": 13.100074757271473, "learning_rate": 5.993732492579463e-06, "loss": 0.32596282958984374, "step": 4760 }, { "epoch": 0.04120154603073039, "grad_norm": 44.56117069130143, "learning_rate": 5.9937193232302725e-06, "loss": 0.19561767578125, "step": 4765 }, { "epoch": 0.04124477955227365, "grad_norm": 11.966540624367545, "learning_rate": 5.993706140074312e-06, "loss": 0.10261459350585937, "step": 4770 }, { "epoch": 0.041288013073816916, "grad_norm": 15.225701555423582, "learning_rate": 5.993692943111644e-06, "loss": 0.34290084838867185, "step": 4775 }, { "epoch": 0.04133124659536018, "grad_norm": 3.4573519032108933, "learning_rate": 5.993679732342328e-06, "loss": 0.21327247619628906, "step": 4780 }, { "epoch": 0.04137448011690344, "grad_norm": 21.67076588539965, "learning_rate": 5.9936665077664255e-06, "loss": 0.271734619140625, "step": 4785 }, { "epoch": 0.04141771363844671, "grad_norm": 34.647659531185184, "learning_rate": 5.993653269383999e-06, "loss": 0.1666330337524414, "step": 4790 }, { "epoch": 0.04146094715998997, "grad_norm": 0.8792579173900145, "learning_rate": 5.993640017195107e-06, "loss": 0.4184600830078125, "step": 4795 }, { "epoch": 0.041504180681533234, "grad_norm": 7.899510826354947, "learning_rate": 5.993626751199812e-06, "loss": 0.1684112548828125, "step": 4800 }, { "epoch": 0.0415474142030765, "grad_norm": 41.6769869404634, "learning_rate": 5.993613471398175e-06, "loss": 0.3797607421875, "step": 4805 }, { "epoch": 0.04159064772461976, "grad_norm": 3.5814281106056662, "learning_rate": 5.993600177790257e-06, "loss": 0.30099029541015626, "step": 4810 }, { "epoch": 0.041633881246163025, "grad_norm": 2.8470029470918896, "learning_rate": 5.99358687037612e-06, "loss": 0.04986763000488281, "step": 4815 }, { "epoch": 0.04167711476770629, "grad_norm": 17.648942883686036, "learning_rate": 5.993573549155825e-06, "loss": 0.22022705078125, "step": 4820 }, { "epoch": 0.04172034828924955, "grad_norm": 11.933823256018723, "learning_rate": 5.993560214129432e-06, "loss": 0.2338531494140625, "step": 4825 }, { "epoch": 0.041763581810792816, "grad_norm": 46.55108732723695, "learning_rate": 5.993546865297005e-06, "loss": 0.6072021484375, "step": 4830 }, { "epoch": 0.04180681533233608, "grad_norm": 34.556450904363196, "learning_rate": 5.993533502658604e-06, "loss": 0.4841796875, "step": 4835 }, { "epoch": 0.04185004885387934, "grad_norm": 9.213519977539116, "learning_rate": 5.993520126214291e-06, "loss": 0.1169464111328125, "step": 4840 }, { "epoch": 0.04189328237542261, "grad_norm": 85.89604003491709, "learning_rate": 5.9935067359641275e-06, "loss": 0.6806396484375, "step": 4845 }, { "epoch": 0.04193651589696587, "grad_norm": 22.53939894827532, "learning_rate": 5.993493331908175e-06, "loss": 0.2947509765625, "step": 4850 }, { "epoch": 0.041979749418509134, "grad_norm": 3.6029480712515296, "learning_rate": 5.993479914046497e-06, "loss": 0.215484619140625, "step": 4855 }, { "epoch": 0.0420229829400524, "grad_norm": 54.98690162687589, "learning_rate": 5.993466482379154e-06, "loss": 0.253173828125, "step": 4860 }, { "epoch": 0.04206621646159566, "grad_norm": 8.334424708716112, "learning_rate": 5.993453036906207e-06, "loss": 0.10166854858398437, "step": 4865 }, { "epoch": 0.042109449983138925, "grad_norm": 4.571959244709068, "learning_rate": 5.99343957762772e-06, "loss": 0.133306884765625, "step": 4870 }, { "epoch": 0.04215268350468219, "grad_norm": 24.153141010992925, "learning_rate": 5.9934261045437536e-06, "loss": 0.24415283203125, "step": 4875 }, { "epoch": 0.04219591702622545, "grad_norm": 5.865595676329635, "learning_rate": 5.993412617654371e-06, "loss": 0.08895416259765625, "step": 4880 }, { "epoch": 0.042239150547768715, "grad_norm": 9.956355016757769, "learning_rate": 5.993399116959633e-06, "loss": 0.105438232421875, "step": 4885 }, { "epoch": 0.04228238406931198, "grad_norm": 6.33196928007966, "learning_rate": 5.993385602459604e-06, "loss": 0.13446502685546874, "step": 4890 }, { "epoch": 0.04232561759085524, "grad_norm": 16.379555368304572, "learning_rate": 5.993372074154345e-06, "loss": 0.120294189453125, "step": 4895 }, { "epoch": 0.042368851112398506, "grad_norm": 67.30072882028126, "learning_rate": 5.993358532043917e-06, "loss": 0.2014251708984375, "step": 4900 }, { "epoch": 0.04241208463394177, "grad_norm": 52.9792176312431, "learning_rate": 5.993344976128386e-06, "loss": 0.5816139221191406, "step": 4905 }, { "epoch": 0.04245531815548504, "grad_norm": 17.790308728412896, "learning_rate": 5.9933314064078104e-06, "loss": 0.16073532104492189, "step": 4910 }, { "epoch": 0.042498551677028304, "grad_norm": 27.843100861142734, "learning_rate": 5.993317822882257e-06, "loss": 0.3128021240234375, "step": 4915 }, { "epoch": 0.04254178519857157, "grad_norm": 18.044559033173872, "learning_rate": 5.993304225551785e-06, "loss": 0.24166107177734375, "step": 4920 }, { "epoch": 0.04258501872011483, "grad_norm": 2.614241586458319, "learning_rate": 5.993290614416459e-06, "loss": 0.186505126953125, "step": 4925 }, { "epoch": 0.042628252241658095, "grad_norm": 6.114365715925545, "learning_rate": 5.99327698947634e-06, "loss": 0.08980712890625, "step": 4930 }, { "epoch": 0.04267148576320136, "grad_norm": 13.631923933634331, "learning_rate": 5.9932633507314935e-06, "loss": 0.4137397766113281, "step": 4935 }, { "epoch": 0.04271471928474462, "grad_norm": 7.137954640389229, "learning_rate": 5.99324969818198e-06, "loss": 0.1073272705078125, "step": 4940 }, { "epoch": 0.042757952806287886, "grad_norm": 15.250233784840681, "learning_rate": 5.993236031827863e-06, "loss": 0.3600830078125, "step": 4945 }, { "epoch": 0.04280118632783115, "grad_norm": 20.84577089297286, "learning_rate": 5.993222351669207e-06, "loss": 0.254620361328125, "step": 4950 }, { "epoch": 0.04284441984937441, "grad_norm": 36.59003073800181, "learning_rate": 5.993208657706074e-06, "loss": 0.229400634765625, "step": 4955 }, { "epoch": 0.042887653370917676, "grad_norm": 34.29422728902959, "learning_rate": 5.993194949938527e-06, "loss": 0.348944091796875, "step": 4960 }, { "epoch": 0.04293088689246094, "grad_norm": 10.201280203481643, "learning_rate": 5.993181228366629e-06, "loss": 0.390411376953125, "step": 4965 }, { "epoch": 0.042974120414004204, "grad_norm": 11.570971254615149, "learning_rate": 5.993167492990443e-06, "loss": 0.287164306640625, "step": 4970 }, { "epoch": 0.04301735393554747, "grad_norm": 7.616921487416304, "learning_rate": 5.993153743810034e-06, "loss": 0.393536376953125, "step": 4975 }, { "epoch": 0.04306058745709073, "grad_norm": 24.93447363246874, "learning_rate": 5.993139980825464e-06, "loss": 0.4179534912109375, "step": 4980 }, { "epoch": 0.043103820978633994, "grad_norm": 9.219800352229392, "learning_rate": 5.993126204036797e-06, "loss": 0.1065185546875, "step": 4985 }, { "epoch": 0.04314705450017726, "grad_norm": 4.348020048219271, "learning_rate": 5.993112413444097e-06, "loss": 0.29765472412109373, "step": 4990 }, { "epoch": 0.04319028802172052, "grad_norm": 26.59185937208567, "learning_rate": 5.993098609047426e-06, "loss": 0.9328582763671875, "step": 4995 }, { "epoch": 0.043233521543263785, "grad_norm": 63.27637381367987, "learning_rate": 5.993084790846849e-06, "loss": 0.6402053833007812, "step": 5000 }, { "epoch": 0.04327675506480705, "grad_norm": 59.74239318197652, "learning_rate": 5.9930709588424295e-06, "loss": 0.297821044921875, "step": 5005 }, { "epoch": 0.04331998858635031, "grad_norm": 9.76460749327522, "learning_rate": 5.993057113034231e-06, "loss": 0.1773193359375, "step": 5010 }, { "epoch": 0.043363222107893576, "grad_norm": 4.308596522228607, "learning_rate": 5.993043253422318e-06, "loss": 0.1598541259765625, "step": 5015 }, { "epoch": 0.04340645562943684, "grad_norm": 11.975165289281026, "learning_rate": 5.993029380006754e-06, "loss": 0.394476318359375, "step": 5020 }, { "epoch": 0.0434496891509801, "grad_norm": 11.415575888797678, "learning_rate": 5.993015492787603e-06, "loss": 0.17757225036621094, "step": 5025 }, { "epoch": 0.04349292267252337, "grad_norm": 25.258033885329365, "learning_rate": 5.993001591764929e-06, "loss": 0.126446533203125, "step": 5030 }, { "epoch": 0.04353615619406663, "grad_norm": 7.33836202039701, "learning_rate": 5.992987676938796e-06, "loss": 0.12371635437011719, "step": 5035 }, { "epoch": 0.043579389715609894, "grad_norm": 1.4519252361671209, "learning_rate": 5.992973748309268e-06, "loss": 0.233355712890625, "step": 5040 }, { "epoch": 0.04362262323715316, "grad_norm": 3.350938682621421, "learning_rate": 5.99295980587641e-06, "loss": 0.2321441650390625, "step": 5045 }, { "epoch": 0.04366585675869642, "grad_norm": 28.5946099909493, "learning_rate": 5.992945849640285e-06, "loss": 0.3859230041503906, "step": 5050 }, { "epoch": 0.043709090280239685, "grad_norm": 9.498073405561168, "learning_rate": 5.99293187960096e-06, "loss": 0.15914306640625, "step": 5055 }, { "epoch": 0.04375232380178295, "grad_norm": 9.480914938331487, "learning_rate": 5.992917895758495e-06, "loss": 0.131756591796875, "step": 5060 }, { "epoch": 0.04379555732332621, "grad_norm": 4.744039985214977, "learning_rate": 5.99290389811296e-06, "loss": 0.0863311767578125, "step": 5065 }, { "epoch": 0.043838790844869475, "grad_norm": 8.173949205638676, "learning_rate": 5.992889886664414e-06, "loss": 0.6499267578125, "step": 5070 }, { "epoch": 0.04388202436641274, "grad_norm": 20.408329467763554, "learning_rate": 5.9928758614129254e-06, "loss": 0.156732177734375, "step": 5075 }, { "epoch": 0.043925257887956, "grad_norm": 85.98625797804516, "learning_rate": 5.992861822358559e-06, "loss": 0.74190673828125, "step": 5080 }, { "epoch": 0.043968491409499266, "grad_norm": 11.299762690256214, "learning_rate": 5.992847769501377e-06, "loss": 0.43668212890625, "step": 5085 }, { "epoch": 0.04401172493104253, "grad_norm": 18.343114029377908, "learning_rate": 5.992833702841445e-06, "loss": 0.3595062255859375, "step": 5090 }, { "epoch": 0.04405495845258579, "grad_norm": 33.83477488137649, "learning_rate": 5.992819622378829e-06, "loss": 0.496490478515625, "step": 5095 }, { "epoch": 0.044098191974129064, "grad_norm": 4.869752356586641, "learning_rate": 5.992805528113595e-06, "loss": 0.085418701171875, "step": 5100 }, { "epoch": 0.04414142549567233, "grad_norm": 3.7521527882763035, "learning_rate": 5.992791420045804e-06, "loss": 0.1541412353515625, "step": 5105 }, { "epoch": 0.04418465901721559, "grad_norm": 15.00884459302742, "learning_rate": 5.992777298175525e-06, "loss": 0.0536865234375, "step": 5110 }, { "epoch": 0.044227892538758855, "grad_norm": 32.23176343251736, "learning_rate": 5.99276316250282e-06, "loss": 0.2980194091796875, "step": 5115 }, { "epoch": 0.04427112606030212, "grad_norm": 3.1414306445468374, "learning_rate": 5.9927490130277565e-06, "loss": 0.263720703125, "step": 5120 }, { "epoch": 0.04431435958184538, "grad_norm": 10.837130625722995, "learning_rate": 5.9927348497504e-06, "loss": 0.21568527221679687, "step": 5125 }, { "epoch": 0.044357593103388646, "grad_norm": 2.1394911999758963, "learning_rate": 5.9927206726708135e-06, "loss": 0.028388214111328126, "step": 5130 }, { "epoch": 0.04440082662493191, "grad_norm": 30.61065198384019, "learning_rate": 5.992706481789064e-06, "loss": 0.15987701416015626, "step": 5135 }, { "epoch": 0.04444406014647517, "grad_norm": 8.20353169882918, "learning_rate": 5.992692277105217e-06, "loss": 0.22063217163085938, "step": 5140 }, { "epoch": 0.044487293668018436, "grad_norm": 53.69913006353032, "learning_rate": 5.992678058619337e-06, "loss": 0.501153564453125, "step": 5145 }, { "epoch": 0.0445305271895617, "grad_norm": 1.7003684220222839, "learning_rate": 5.992663826331491e-06, "loss": 0.14174652099609375, "step": 5150 }, { "epoch": 0.044573760711104964, "grad_norm": 82.52837094838806, "learning_rate": 5.992649580241744e-06, "loss": 0.276068115234375, "step": 5155 }, { "epoch": 0.04461699423264823, "grad_norm": 12.296279319928324, "learning_rate": 5.992635320350161e-06, "loss": 0.2113677978515625, "step": 5160 }, { "epoch": 0.04466022775419149, "grad_norm": 19.06107608971957, "learning_rate": 5.992621046656808e-06, "loss": 0.1692474365234375, "step": 5165 }, { "epoch": 0.044703461275734754, "grad_norm": 16.626106329790627, "learning_rate": 5.992606759161752e-06, "loss": 0.16316986083984375, "step": 5170 }, { "epoch": 0.04474669479727802, "grad_norm": 29.41299020219326, "learning_rate": 5.992592457865058e-06, "loss": 0.327783203125, "step": 5175 }, { "epoch": 0.04478992831882128, "grad_norm": 5.929458933924725, "learning_rate": 5.992578142766791e-06, "loss": 0.1285491943359375, "step": 5180 }, { "epoch": 0.044833161840364545, "grad_norm": 30.996325668786863, "learning_rate": 5.992563813867019e-06, "loss": 0.5677490234375, "step": 5185 }, { "epoch": 0.04487639536190781, "grad_norm": 19.7386448892094, "learning_rate": 5.992549471165807e-06, "loss": 0.09358291625976563, "step": 5190 }, { "epoch": 0.04491962888345107, "grad_norm": 11.663400389689656, "learning_rate": 5.992535114663221e-06, "loss": 0.247900390625, "step": 5195 }, { "epoch": 0.044962862404994336, "grad_norm": 28.268835098182137, "learning_rate": 5.992520744359327e-06, "loss": 0.2512518882751465, "step": 5200 }, { "epoch": 0.0450060959265376, "grad_norm": 12.050801335885017, "learning_rate": 5.992506360254193e-06, "loss": 0.209967041015625, "step": 5205 }, { "epoch": 0.04504932944808086, "grad_norm": 13.547537880938307, "learning_rate": 5.992491962347883e-06, "loss": 0.213275146484375, "step": 5210 }, { "epoch": 0.04509256296962413, "grad_norm": 21.22962028969486, "learning_rate": 5.992477550640465e-06, "loss": 0.21924591064453125, "step": 5215 }, { "epoch": 0.04513579649116739, "grad_norm": 2.3880495633003287, "learning_rate": 5.992463125132006e-06, "loss": 0.2731231689453125, "step": 5220 }, { "epoch": 0.045179030012710654, "grad_norm": 23.72703570079012, "learning_rate": 5.992448685822569e-06, "loss": 0.16650848388671874, "step": 5225 }, { "epoch": 0.04522226353425392, "grad_norm": 1.4923288533742756, "learning_rate": 5.992434232712224e-06, "loss": 0.09830818176269532, "step": 5230 }, { "epoch": 0.04526549705579718, "grad_norm": 2.3291455077376484, "learning_rate": 5.992419765801037e-06, "loss": 0.12141876220703125, "step": 5235 }, { "epoch": 0.045308730577340445, "grad_norm": 43.78865383683149, "learning_rate": 5.992405285089076e-06, "loss": 0.3643310546875, "step": 5240 }, { "epoch": 0.04535196409888371, "grad_norm": 41.196027891312845, "learning_rate": 5.992390790576404e-06, "loss": 0.31319580078125, "step": 5245 }, { "epoch": 0.04539519762042697, "grad_norm": 7.286004280706111, "learning_rate": 5.9923762822630916e-06, "loss": 0.11712646484375, "step": 5250 }, { "epoch": 0.045438431141970236, "grad_norm": 24.03646061296651, "learning_rate": 5.992361760149203e-06, "loss": 0.1716461181640625, "step": 5255 }, { "epoch": 0.0454816646635135, "grad_norm": 76.44425899781386, "learning_rate": 5.992347224234807e-06, "loss": 0.391839599609375, "step": 5260 }, { "epoch": 0.04552489818505676, "grad_norm": 5.172400988660927, "learning_rate": 5.99233267451997e-06, "loss": 0.1346435546875, "step": 5265 }, { "epoch": 0.045568131706600026, "grad_norm": 10.656034751786777, "learning_rate": 5.992318111004759e-06, "loss": 0.43778076171875, "step": 5270 }, { "epoch": 0.04561136522814329, "grad_norm": 3.614363966702664, "learning_rate": 5.992303533689242e-06, "loss": 0.1264739990234375, "step": 5275 }, { "epoch": 0.045654598749686554, "grad_norm": 0.6489531760048832, "learning_rate": 5.992288942573484e-06, "loss": 0.31519775390625, "step": 5280 }, { "epoch": 0.045697832271229824, "grad_norm": 6.991048476570912, "learning_rate": 5.992274337657555e-06, "loss": 0.313214111328125, "step": 5285 }, { "epoch": 0.04574106579277309, "grad_norm": 10.927998049848838, "learning_rate": 5.99225971894152e-06, "loss": 0.10377197265625, "step": 5290 }, { "epoch": 0.04578429931431635, "grad_norm": 56.22420313643989, "learning_rate": 5.992245086425449e-06, "loss": 0.30891571044921873, "step": 5295 }, { "epoch": 0.045827532835859615, "grad_norm": 5.8063611018682675, "learning_rate": 5.992230440109407e-06, "loss": 0.7464569091796875, "step": 5300 }, { "epoch": 0.04587076635740288, "grad_norm": 53.681341818267214, "learning_rate": 5.992215779993463e-06, "loss": 0.4928955078125, "step": 5305 }, { "epoch": 0.04591399987894614, "grad_norm": 15.774968163672975, "learning_rate": 5.9922011060776835e-06, "loss": 0.21908111572265626, "step": 5310 }, { "epoch": 0.045957233400489406, "grad_norm": 9.005584913827612, "learning_rate": 5.992186418362138e-06, "loss": 0.455487060546875, "step": 5315 }, { "epoch": 0.04600046692203267, "grad_norm": 3.8503215901245764, "learning_rate": 5.9921717168468935e-06, "loss": 0.16568603515625, "step": 5320 }, { "epoch": 0.04604370044357593, "grad_norm": 32.431733553595535, "learning_rate": 5.9921570015320165e-06, "loss": 0.485693359375, "step": 5325 }, { "epoch": 0.046086933965119196, "grad_norm": 19.331090291993405, "learning_rate": 5.992142272417576e-06, "loss": 0.144403076171875, "step": 5330 }, { "epoch": 0.04613016748666246, "grad_norm": 15.266804679387057, "learning_rate": 5.99212752950364e-06, "loss": 0.170318603515625, "step": 5335 }, { "epoch": 0.046173401008205724, "grad_norm": 15.969803658710331, "learning_rate": 5.992112772790275e-06, "loss": 0.1173583984375, "step": 5340 }, { "epoch": 0.04621663452974899, "grad_norm": 19.146502441249865, "learning_rate": 5.9920980022775515e-06, "loss": 0.1763641357421875, "step": 5345 }, { "epoch": 0.04625986805129225, "grad_norm": 18.39422827460778, "learning_rate": 5.9920832179655364e-06, "loss": 0.34879150390625, "step": 5350 }, { "epoch": 0.046303101572835514, "grad_norm": 10.636580661274575, "learning_rate": 5.992068419854298e-06, "loss": 0.3195556640625, "step": 5355 }, { "epoch": 0.04634633509437878, "grad_norm": 7.977010458927949, "learning_rate": 5.992053607943904e-06, "loss": 0.1600128173828125, "step": 5360 }, { "epoch": 0.04638956861592204, "grad_norm": 11.684389311798721, "learning_rate": 5.992038782234422e-06, "loss": 0.21513519287109376, "step": 5365 }, { "epoch": 0.046432802137465305, "grad_norm": 51.04389114914081, "learning_rate": 5.992023942725923e-06, "loss": 0.21299057006835936, "step": 5370 }, { "epoch": 0.04647603565900857, "grad_norm": 31.738481744115898, "learning_rate": 5.992009089418474e-06, "loss": 0.215020751953125, "step": 5375 }, { "epoch": 0.04651926918055183, "grad_norm": 10.300296918870458, "learning_rate": 5.9919942223121435e-06, "loss": 0.211083984375, "step": 5380 }, { "epoch": 0.046562502702095096, "grad_norm": 13.558396828959024, "learning_rate": 5.991979341406999e-06, "loss": 0.3111904144287109, "step": 5385 }, { "epoch": 0.04660573622363836, "grad_norm": 18.805694031070534, "learning_rate": 5.991964446703111e-06, "loss": 0.23170166015625, "step": 5390 }, { "epoch": 0.04664896974518162, "grad_norm": 3.6887954004992376, "learning_rate": 5.991949538200547e-06, "loss": 0.08118820190429688, "step": 5395 }, { "epoch": 0.04669220326672489, "grad_norm": 30.93653300793523, "learning_rate": 5.991934615899376e-06, "loss": 0.2014617919921875, "step": 5400 }, { "epoch": 0.04673543678826815, "grad_norm": 59.46396915921273, "learning_rate": 5.991919679799668e-06, "loss": 0.424200439453125, "step": 5405 }, { "epoch": 0.046778670309811414, "grad_norm": 5.831104581975946, "learning_rate": 5.991904729901489e-06, "loss": 0.11322174072265626, "step": 5410 }, { "epoch": 0.04682190383135468, "grad_norm": 72.15678155591795, "learning_rate": 5.991889766204911e-06, "loss": 0.524749755859375, "step": 5415 }, { "epoch": 0.04686513735289794, "grad_norm": 42.573933232783006, "learning_rate": 5.991874788710002e-06, "loss": 0.22782554626464843, "step": 5420 }, { "epoch": 0.046908370874441205, "grad_norm": 20.71718982816622, "learning_rate": 5.99185979741683e-06, "loss": 0.12984561920166016, "step": 5425 }, { "epoch": 0.04695160439598447, "grad_norm": 19.3827918291705, "learning_rate": 5.9918447923254654e-06, "loss": 0.1361083984375, "step": 5430 }, { "epoch": 0.04699483791752773, "grad_norm": 4.2430425767208115, "learning_rate": 5.991829773435977e-06, "loss": 0.2940834045410156, "step": 5435 }, { "epoch": 0.047038071439070996, "grad_norm": 11.538460311320542, "learning_rate": 5.991814740748434e-06, "loss": 0.1454071044921875, "step": 5440 }, { "epoch": 0.04708130496061426, "grad_norm": 25.7406048316848, "learning_rate": 5.991799694262905e-06, "loss": 0.18662109375, "step": 5445 }, { "epoch": 0.04712453848215752, "grad_norm": 45.58596486166943, "learning_rate": 5.991784633979461e-06, "loss": 0.291790771484375, "step": 5450 }, { "epoch": 0.047167772003700786, "grad_norm": 1.1139362103304185, "learning_rate": 5.99176955989817e-06, "loss": 0.20712509155273437, "step": 5455 }, { "epoch": 0.04721100552524405, "grad_norm": 24.78748400350814, "learning_rate": 5.991754472019104e-06, "loss": 0.2532470703125, "step": 5460 }, { "epoch": 0.047254239046787314, "grad_norm": 10.404738888424596, "learning_rate": 5.991739370342328e-06, "loss": 0.33775177001953127, "step": 5465 }, { "epoch": 0.047297472568330584, "grad_norm": 36.130658772410946, "learning_rate": 5.991724254867916e-06, "loss": 0.3421043395996094, "step": 5470 }, { "epoch": 0.04734070608987385, "grad_norm": 3.699947192880029, "learning_rate": 5.9917091255959365e-06, "loss": 0.09137840270996093, "step": 5475 }, { "epoch": 0.04738393961141711, "grad_norm": 1.3651021648765227, "learning_rate": 5.991693982526458e-06, "loss": 0.389752197265625, "step": 5480 }, { "epoch": 0.047427173132960375, "grad_norm": 24.599496638222973, "learning_rate": 5.991678825659551e-06, "loss": 0.32618408203125, "step": 5485 }, { "epoch": 0.04747040665450364, "grad_norm": 53.10010880389464, "learning_rate": 5.991663654995287e-06, "loss": 0.1435791015625, "step": 5490 }, { "epoch": 0.0475136401760469, "grad_norm": 6.321816640063645, "learning_rate": 5.9916484705337335e-06, "loss": 0.2631500244140625, "step": 5495 }, { "epoch": 0.047556873697590166, "grad_norm": 0.825682533672026, "learning_rate": 5.991633272274961e-06, "loss": 0.13858642578125, "step": 5500 }, { "epoch": 0.04760010721913343, "grad_norm": 43.546303101991896, "learning_rate": 5.991618060219041e-06, "loss": 0.1105743408203125, "step": 5505 }, { "epoch": 0.04764334074067669, "grad_norm": 8.089616225687083, "learning_rate": 5.991602834366043e-06, "loss": 0.21411895751953125, "step": 5510 }, { "epoch": 0.047686574262219956, "grad_norm": 40.78453281447651, "learning_rate": 5.991587594716037e-06, "loss": 0.2440673828125, "step": 5515 }, { "epoch": 0.04772980778376322, "grad_norm": 5.49142587040015, "learning_rate": 5.991572341269093e-06, "loss": 0.278558349609375, "step": 5520 }, { "epoch": 0.047773041305306484, "grad_norm": 3.1797675117649695, "learning_rate": 5.991557074025282e-06, "loss": 0.288446044921875, "step": 5525 }, { "epoch": 0.04781627482684975, "grad_norm": 5.91685954040495, "learning_rate": 5.991541792984673e-06, "loss": 0.18703155517578124, "step": 5530 }, { "epoch": 0.04785950834839301, "grad_norm": 14.17854109033618, "learning_rate": 5.991526498147339e-06, "loss": 0.09680252075195313, "step": 5535 }, { "epoch": 0.047902741869936274, "grad_norm": 7.18749211287506, "learning_rate": 5.9915111895133485e-06, "loss": 0.1025054931640625, "step": 5540 }, { "epoch": 0.04794597539147954, "grad_norm": 41.930664967060046, "learning_rate": 5.991495867082773e-06, "loss": 0.23470458984375, "step": 5545 }, { "epoch": 0.0479892089130228, "grad_norm": 6.3102695991435604, "learning_rate": 5.991480530855683e-06, "loss": 0.41558685302734377, "step": 5550 }, { "epoch": 0.048032442434566065, "grad_norm": 3.1679101135878645, "learning_rate": 5.9914651808321485e-06, "loss": 0.19187774658203124, "step": 5555 }, { "epoch": 0.04807567595610933, "grad_norm": 24.28019886036319, "learning_rate": 5.991449817012242e-06, "loss": 0.44095306396484374, "step": 5560 }, { "epoch": 0.04811890947765259, "grad_norm": 30.655298580104557, "learning_rate": 5.991434439396032e-06, "loss": 0.091375732421875, "step": 5565 }, { "epoch": 0.048162142999195856, "grad_norm": 3.796257391402833, "learning_rate": 5.991419047983592e-06, "loss": 0.159130859375, "step": 5570 }, { "epoch": 0.04820537652073912, "grad_norm": 1.6794770747020618, "learning_rate": 5.9914036427749905e-06, "loss": 0.0536285400390625, "step": 5575 }, { "epoch": 0.04824861004228238, "grad_norm": 85.18997877062215, "learning_rate": 5.9913882237703e-06, "loss": 0.35983123779296877, "step": 5580 }, { "epoch": 0.04829184356382565, "grad_norm": 5.5781935839328805, "learning_rate": 5.991372790969592e-06, "loss": 0.24957122802734374, "step": 5585 }, { "epoch": 0.04833507708536891, "grad_norm": 20.93719654481842, "learning_rate": 5.991357344372936e-06, "loss": 0.40439929962158205, "step": 5590 }, { "epoch": 0.048378310606912174, "grad_norm": 7.895897859932411, "learning_rate": 5.991341883980405e-06, "loss": 0.3046173095703125, "step": 5595 }, { "epoch": 0.04842154412845544, "grad_norm": 0.19736791597281267, "learning_rate": 5.99132640979207e-06, "loss": 0.32474365234375, "step": 5600 }, { "epoch": 0.0484647776499987, "grad_norm": 5.86574809855355, "learning_rate": 5.9913109218080015e-06, "loss": 0.286871337890625, "step": 5605 }, { "epoch": 0.048508011171541965, "grad_norm": 56.773430051067194, "learning_rate": 5.99129542002827e-06, "loss": 0.438348388671875, "step": 5610 }, { "epoch": 0.04855124469308523, "grad_norm": 28.06264718518467, "learning_rate": 5.99127990445295e-06, "loss": 0.460107421875, "step": 5615 }, { "epoch": 0.04859447821462849, "grad_norm": 25.442460470566175, "learning_rate": 5.991264375082111e-06, "loss": 0.29886703491210936, "step": 5620 }, { "epoch": 0.048637711736171756, "grad_norm": 36.72605380814695, "learning_rate": 5.991248831915824e-06, "loss": 0.162896728515625, "step": 5625 }, { "epoch": 0.04868094525771502, "grad_norm": 8.2400746206033, "learning_rate": 5.991233274954163e-06, "loss": 0.1352203369140625, "step": 5630 }, { "epoch": 0.04872417877925828, "grad_norm": 4.048490252786663, "learning_rate": 5.991217704197198e-06, "loss": 0.32560272216796876, "step": 5635 }, { "epoch": 0.048767412300801546, "grad_norm": 24.272113959505415, "learning_rate": 5.991202119645001e-06, "loss": 0.2224578857421875, "step": 5640 }, { "epoch": 0.04881064582234481, "grad_norm": 5.244722339456694, "learning_rate": 5.991186521297645e-06, "loss": 0.14744338989257813, "step": 5645 }, { "epoch": 0.048853879343888074, "grad_norm": 3.431806786365262, "learning_rate": 5.9911709091552e-06, "loss": 0.08762283325195312, "step": 5650 }, { "epoch": 0.048897112865431344, "grad_norm": 37.52484072809897, "learning_rate": 5.99115528321774e-06, "loss": 0.6543914794921875, "step": 5655 }, { "epoch": 0.04894034638697461, "grad_norm": 49.73684596146965, "learning_rate": 5.991139643485335e-06, "loss": 0.172369384765625, "step": 5660 }, { "epoch": 0.04898357990851787, "grad_norm": 32.99394498138074, "learning_rate": 5.991123989958059e-06, "loss": 0.23685989379882813, "step": 5665 }, { "epoch": 0.049026813430061135, "grad_norm": 3.7188061968181128, "learning_rate": 5.991108322635983e-06, "loss": 0.259844970703125, "step": 5670 }, { "epoch": 0.0490700469516044, "grad_norm": 14.523530039604768, "learning_rate": 5.991092641519181e-06, "loss": 0.23304443359375, "step": 5675 }, { "epoch": 0.04911328047314766, "grad_norm": 9.10207422187583, "learning_rate": 5.9910769466077226e-06, "loss": 0.19621810913085938, "step": 5680 }, { "epoch": 0.049156513994690926, "grad_norm": 48.702379244331674, "learning_rate": 5.991061237901683e-06, "loss": 0.35057373046875, "step": 5685 }, { "epoch": 0.04919974751623419, "grad_norm": 7.504215385751538, "learning_rate": 5.991045515401132e-06, "loss": 0.2747520446777344, "step": 5690 }, { "epoch": 0.04924298103777745, "grad_norm": 11.63539444006882, "learning_rate": 5.991029779106144e-06, "loss": 0.264178466796875, "step": 5695 }, { "epoch": 0.04928621455932072, "grad_norm": 29.949125999766892, "learning_rate": 5.991014029016791e-06, "loss": 0.36942138671875, "step": 5700 }, { "epoch": 0.04932944808086398, "grad_norm": 16.236158778376506, "learning_rate": 5.990998265133144e-06, "loss": 0.207623291015625, "step": 5705 }, { "epoch": 0.049372681602407244, "grad_norm": 22.452961314403222, "learning_rate": 5.99098248745528e-06, "loss": 0.180450439453125, "step": 5710 }, { "epoch": 0.04941591512395051, "grad_norm": 6.908966416463533, "learning_rate": 5.990966695983267e-06, "loss": 0.104534912109375, "step": 5715 }, { "epoch": 0.04945914864549377, "grad_norm": 11.675084681068842, "learning_rate": 5.9909508907171805e-06, "loss": 0.1356597900390625, "step": 5720 }, { "epoch": 0.049502382167037035, "grad_norm": 7.054240029042663, "learning_rate": 5.990935071657093e-06, "loss": 0.3472450256347656, "step": 5725 }, { "epoch": 0.0495456156885803, "grad_norm": 6.455013016335319, "learning_rate": 5.990919238803077e-06, "loss": 0.1850341796875, "step": 5730 }, { "epoch": 0.04958884921012356, "grad_norm": 16.41229722438304, "learning_rate": 5.990903392155206e-06, "loss": 0.22532958984375, "step": 5735 }, { "epoch": 0.049632082731666825, "grad_norm": 46.73642047023574, "learning_rate": 5.990887531713553e-06, "loss": 0.37891845703125, "step": 5740 }, { "epoch": 0.04967531625321009, "grad_norm": 17.265556344232913, "learning_rate": 5.9908716574781904e-06, "loss": 0.192144775390625, "step": 5745 }, { "epoch": 0.04971854977475335, "grad_norm": 24.101344443667635, "learning_rate": 5.990855769449192e-06, "loss": 0.39649658203125, "step": 5750 }, { "epoch": 0.049761783296296616, "grad_norm": 37.935740558840855, "learning_rate": 5.990839867626631e-06, "loss": 0.13017425537109376, "step": 5755 }, { "epoch": 0.04980501681783988, "grad_norm": 0.3331078932993158, "learning_rate": 5.990823952010581e-06, "loss": 0.34348602294921876, "step": 5760 }, { "epoch": 0.04984825033938314, "grad_norm": 6.999081598163134, "learning_rate": 5.9908080226011155e-06, "loss": 0.11233062744140625, "step": 5765 }, { "epoch": 0.04989148386092641, "grad_norm": 2.876159723220014, "learning_rate": 5.990792079398308e-06, "loss": 0.07938613891601562, "step": 5770 }, { "epoch": 0.04993471738246967, "grad_norm": 25.75930588637063, "learning_rate": 5.99077612240223e-06, "loss": 0.38751983642578125, "step": 5775 }, { "epoch": 0.049977950904012934, "grad_norm": 32.87061502406354, "learning_rate": 5.990760151612959e-06, "loss": 0.20768394470214843, "step": 5780 }, { "epoch": 0.0500211844255562, "grad_norm": 6.539087250072669, "learning_rate": 5.990744167030565e-06, "loss": 0.4195068359375, "step": 5785 }, { "epoch": 0.05006441794709946, "grad_norm": 9.033683939340511, "learning_rate": 5.990728168655124e-06, "loss": 0.3314544677734375, "step": 5790 }, { "epoch": 0.050107651468642725, "grad_norm": 2.695604992094483, "learning_rate": 5.990712156486708e-06, "loss": 0.099456787109375, "step": 5795 }, { "epoch": 0.05015088499018599, "grad_norm": 17.42130366673752, "learning_rate": 5.990696130525393e-06, "loss": 0.20919189453125, "step": 5800 }, { "epoch": 0.05019411851172925, "grad_norm": 1.0673074513614202, "learning_rate": 5.990680090771251e-06, "loss": 0.2626213073730469, "step": 5805 }, { "epoch": 0.050237352033272516, "grad_norm": 5.848231562693053, "learning_rate": 5.990664037224356e-06, "loss": 0.229827880859375, "step": 5810 }, { "epoch": 0.05028058555481578, "grad_norm": 15.091987364336436, "learning_rate": 5.990647969884784e-06, "loss": 0.31578369140625, "step": 5815 }, { "epoch": 0.05032381907635904, "grad_norm": 24.29578666413523, "learning_rate": 5.990631888752608e-06, "loss": 0.1866485595703125, "step": 5820 }, { "epoch": 0.050367052597902306, "grad_norm": 20.521440300290443, "learning_rate": 5.990615793827901e-06, "loss": 0.3220428466796875, "step": 5825 }, { "epoch": 0.05041028611944557, "grad_norm": 9.115855009224546, "learning_rate": 5.990599685110739e-06, "loss": 0.3164794921875, "step": 5830 }, { "epoch": 0.050453519640988834, "grad_norm": 3.456421160098619, "learning_rate": 5.990583562601194e-06, "loss": 0.0747650146484375, "step": 5835 }, { "epoch": 0.050496753162532104, "grad_norm": 92.04441914495627, "learning_rate": 5.990567426299343e-06, "loss": 0.15792236328125, "step": 5840 }, { "epoch": 0.05053998668407537, "grad_norm": 26.690643268695805, "learning_rate": 5.990551276205259e-06, "loss": 0.281494140625, "step": 5845 }, { "epoch": 0.05058322020561863, "grad_norm": 20.133715926919965, "learning_rate": 5.990535112319017e-06, "loss": 0.120294189453125, "step": 5850 }, { "epoch": 0.050626453727161895, "grad_norm": 5.611150561255582, "learning_rate": 5.990518934640691e-06, "loss": 0.334735107421875, "step": 5855 }, { "epoch": 0.05066968724870516, "grad_norm": 6.666848445575557, "learning_rate": 5.990502743170356e-06, "loss": 0.421923828125, "step": 5860 }, { "epoch": 0.05071292077024842, "grad_norm": 17.343028627451904, "learning_rate": 5.990486537908087e-06, "loss": 0.24091110229492188, "step": 5865 }, { "epoch": 0.050756154291791686, "grad_norm": 1.7524377789082881, "learning_rate": 5.990470318853958e-06, "loss": 0.15697021484375, "step": 5870 }, { "epoch": 0.05079938781333495, "grad_norm": 21.251502434567726, "learning_rate": 5.9904540860080444e-06, "loss": 0.233685302734375, "step": 5875 }, { "epoch": 0.05084262133487821, "grad_norm": 23.841522966462893, "learning_rate": 5.99043783937042e-06, "loss": 0.3464862823486328, "step": 5880 }, { "epoch": 0.05088585485642148, "grad_norm": 5.172520448601026, "learning_rate": 5.990421578941162e-06, "loss": 0.2395050048828125, "step": 5885 }, { "epoch": 0.05092908837796474, "grad_norm": 21.457342101190452, "learning_rate": 5.990405304720343e-06, "loss": 0.2264129638671875, "step": 5890 }, { "epoch": 0.050972321899508004, "grad_norm": 14.336523251792206, "learning_rate": 5.9903890167080385e-06, "loss": 0.434356689453125, "step": 5895 }, { "epoch": 0.05101555542105127, "grad_norm": 2.160130519080903, "learning_rate": 5.990372714904325e-06, "loss": 0.16024627685546874, "step": 5900 }, { "epoch": 0.05105878894259453, "grad_norm": 6.42332711034847, "learning_rate": 5.990356399309276e-06, "loss": 0.12638397216796876, "step": 5905 }, { "epoch": 0.051102022464137795, "grad_norm": 14.21354876996501, "learning_rate": 5.990340069922967e-06, "loss": 0.12627716064453126, "step": 5910 }, { "epoch": 0.05114525598568106, "grad_norm": 44.56353829143995, "learning_rate": 5.990323726745475e-06, "loss": 0.2958587646484375, "step": 5915 }, { "epoch": 0.05118848950722432, "grad_norm": 10.442643249457534, "learning_rate": 5.990307369776875e-06, "loss": 0.277569580078125, "step": 5920 }, { "epoch": 0.051231723028767585, "grad_norm": 10.766879159824855, "learning_rate": 5.99029099901724e-06, "loss": 0.214593505859375, "step": 5925 }, { "epoch": 0.05127495655031085, "grad_norm": 2.093119637848589, "learning_rate": 5.990274614466648e-06, "loss": 0.278143310546875, "step": 5930 }, { "epoch": 0.05131819007185411, "grad_norm": 34.12719065238651, "learning_rate": 5.990258216125172e-06, "loss": 0.3439849853515625, "step": 5935 }, { "epoch": 0.051361423593397376, "grad_norm": 4.6050341016256215, "learning_rate": 5.990241803992891e-06, "loss": 0.33254852294921877, "step": 5940 }, { "epoch": 0.05140465711494064, "grad_norm": 12.571190936327726, "learning_rate": 5.990225378069879e-06, "loss": 0.3219451904296875, "step": 5945 }, { "epoch": 0.0514478906364839, "grad_norm": 19.123240521513377, "learning_rate": 5.990208938356212e-06, "loss": 0.6510658264160156, "step": 5950 }, { "epoch": 0.05149112415802717, "grad_norm": 23.504920781271643, "learning_rate": 5.9901924848519645e-06, "loss": 0.606884765625, "step": 5955 }, { "epoch": 0.05153435767957043, "grad_norm": 11.687394742668694, "learning_rate": 5.990176017557214e-06, "loss": 0.34431304931640627, "step": 5960 }, { "epoch": 0.051577591201113694, "grad_norm": 5.332216391739885, "learning_rate": 5.990159536472037e-06, "loss": 0.298284912109375, "step": 5965 }, { "epoch": 0.05162082472265696, "grad_norm": 10.163592623113177, "learning_rate": 5.990143041596507e-06, "loss": 0.2527008056640625, "step": 5970 }, { "epoch": 0.05166405824420022, "grad_norm": 12.464352836350306, "learning_rate": 5.990126532930702e-06, "loss": 0.54053955078125, "step": 5975 }, { "epoch": 0.051707291765743485, "grad_norm": 34.527004257059836, "learning_rate": 5.990110010474698e-06, "loss": 0.413232421875, "step": 5980 }, { "epoch": 0.05175052528728675, "grad_norm": 15.635398806512002, "learning_rate": 5.99009347422857e-06, "loss": 0.23136215209960936, "step": 5985 }, { "epoch": 0.05179375880883001, "grad_norm": 3.0785669935266453, "learning_rate": 5.990076924192395e-06, "loss": 0.13506927490234374, "step": 5990 }, { "epoch": 0.051836992330373276, "grad_norm": 7.891222892419634, "learning_rate": 5.99006036036625e-06, "loss": 0.1238311767578125, "step": 5995 }, { "epoch": 0.05188022585191654, "grad_norm": 57.738130951074915, "learning_rate": 5.99004378275021e-06, "loss": 0.1376983642578125, "step": 6000 }, { "epoch": 0.0519234593734598, "grad_norm": 0.7079129787351767, "learning_rate": 5.990027191344354e-06, "loss": 0.2752525329589844, "step": 6005 }, { "epoch": 0.05196669289500307, "grad_norm": 18.606738491616742, "learning_rate": 5.990010586148756e-06, "loss": 0.45446929931640623, "step": 6010 }, { "epoch": 0.05200992641654633, "grad_norm": 3.25139458974645, "learning_rate": 5.989993967163493e-06, "loss": 0.11529083251953125, "step": 6015 }, { "epoch": 0.052053159938089594, "grad_norm": 11.836717584086088, "learning_rate": 5.989977334388642e-06, "loss": 0.347796630859375, "step": 6020 }, { "epoch": 0.052096393459632864, "grad_norm": 3.6793043761522886, "learning_rate": 5.9899606878242796e-06, "loss": 0.4477264404296875, "step": 6025 }, { "epoch": 0.05213962698117613, "grad_norm": 6.146268959899052, "learning_rate": 5.989944027470483e-06, "loss": 0.2206727981567383, "step": 6030 }, { "epoch": 0.05218286050271939, "grad_norm": 36.229264262959816, "learning_rate": 5.9899273533273296e-06, "loss": 0.13801002502441406, "step": 6035 }, { "epoch": 0.052226094024262655, "grad_norm": 11.225451002221515, "learning_rate": 5.9899106653948945e-06, "loss": 0.50455322265625, "step": 6040 }, { "epoch": 0.05226932754580592, "grad_norm": 81.69961420366981, "learning_rate": 5.989893963673255e-06, "loss": 0.33173828125, "step": 6045 }, { "epoch": 0.05231256106734918, "grad_norm": 14.664648758279121, "learning_rate": 5.98987724816249e-06, "loss": 0.098052978515625, "step": 6050 }, { "epoch": 0.052355794588892446, "grad_norm": 21.353530413421552, "learning_rate": 5.989860518862675e-06, "loss": 0.087689208984375, "step": 6055 }, { "epoch": 0.05239902811043571, "grad_norm": 36.838708737217566, "learning_rate": 5.989843775773888e-06, "loss": 0.38897552490234377, "step": 6060 }, { "epoch": 0.05244226163197897, "grad_norm": 29.927215055861414, "learning_rate": 5.989827018896204e-06, "loss": 0.1000457763671875, "step": 6065 }, { "epoch": 0.05248549515352224, "grad_norm": 14.643229734092289, "learning_rate": 5.989810248229703e-06, "loss": 0.16366729736328126, "step": 6070 }, { "epoch": 0.0525287286750655, "grad_norm": 8.28677780273019, "learning_rate": 5.989793463774462e-06, "loss": 0.157647705078125, "step": 6075 }, { "epoch": 0.052571962196608764, "grad_norm": 3.453840867798241, "learning_rate": 5.989776665530556e-06, "loss": 0.43291549682617186, "step": 6080 }, { "epoch": 0.05261519571815203, "grad_norm": 9.682277153386591, "learning_rate": 5.989759853498066e-06, "loss": 0.091192626953125, "step": 6085 }, { "epoch": 0.05265842923969529, "grad_norm": 2.302759411758226, "learning_rate": 5.989743027677067e-06, "loss": 0.155096435546875, "step": 6090 }, { "epoch": 0.052701662761238555, "grad_norm": 14.560025962372503, "learning_rate": 5.989726188067637e-06, "loss": 0.3204132080078125, "step": 6095 }, { "epoch": 0.05274489628278182, "grad_norm": 18.88669447072718, "learning_rate": 5.9897093346698555e-06, "loss": 0.15635948181152343, "step": 6100 }, { "epoch": 0.05278812980432508, "grad_norm": 78.30057303841335, "learning_rate": 5.989692467483797e-06, "loss": 0.19756507873535156, "step": 6105 }, { "epoch": 0.052831363325868345, "grad_norm": 8.75111484381708, "learning_rate": 5.989675586509541e-06, "loss": 0.5751708984375, "step": 6110 }, { "epoch": 0.05287459684741161, "grad_norm": 5.508384506538185, "learning_rate": 5.989658691747167e-06, "loss": 0.17559852600097656, "step": 6115 }, { "epoch": 0.05291783036895487, "grad_norm": 6.28167687102444, "learning_rate": 5.9896417831967495e-06, "loss": 0.093548583984375, "step": 6120 }, { "epoch": 0.052961063890498136, "grad_norm": 20.27877942137751, "learning_rate": 5.989624860858369e-06, "loss": 0.1420166015625, "step": 6125 }, { "epoch": 0.0530042974120414, "grad_norm": 28.769402458993326, "learning_rate": 5.989607924732102e-06, "loss": 0.1720062255859375, "step": 6130 }, { "epoch": 0.05304753093358466, "grad_norm": 24.46929913276026, "learning_rate": 5.989590974818029e-06, "loss": 0.3107269287109375, "step": 6135 }, { "epoch": 0.05309076445512793, "grad_norm": 20.885928133833186, "learning_rate": 5.989574011116226e-06, "loss": 0.22763824462890625, "step": 6140 }, { "epoch": 0.05313399797667119, "grad_norm": 32.93768721310352, "learning_rate": 5.989557033626771e-06, "loss": 0.42392578125, "step": 6145 }, { "epoch": 0.053177231498214454, "grad_norm": 17.66786074767603, "learning_rate": 5.9895400423497435e-06, "loss": 0.432562255859375, "step": 6150 }, { "epoch": 0.05322046501975772, "grad_norm": 6.967714139710072, "learning_rate": 5.989523037285222e-06, "loss": 0.10483474731445312, "step": 6155 }, { "epoch": 0.05326369854130098, "grad_norm": 11.363356603441874, "learning_rate": 5.989506018433284e-06, "loss": 0.28016357421875, "step": 6160 }, { "epoch": 0.053306932062844245, "grad_norm": 13.949979682513952, "learning_rate": 5.989488985794008e-06, "loss": 0.266229248046875, "step": 6165 }, { "epoch": 0.05335016558438751, "grad_norm": 9.954585072214199, "learning_rate": 5.989471939367473e-06, "loss": 0.29259033203125, "step": 6170 }, { "epoch": 0.05339339910593077, "grad_norm": 4.952254486905613, "learning_rate": 5.989454879153759e-06, "loss": 0.195562744140625, "step": 6175 }, { "epoch": 0.053436632627474036, "grad_norm": 27.02204380859462, "learning_rate": 5.989437805152942e-06, "loss": 0.1536041259765625, "step": 6180 }, { "epoch": 0.0534798661490173, "grad_norm": 1.287835419748995, "learning_rate": 5.989420717365102e-06, "loss": 0.2527374267578125, "step": 6185 }, { "epoch": 0.05352309967056056, "grad_norm": 14.200024295715652, "learning_rate": 5.989403615790319e-06, "loss": 0.139532470703125, "step": 6190 }, { "epoch": 0.05356633319210383, "grad_norm": 14.97194182202907, "learning_rate": 5.989386500428669e-06, "loss": 0.20598602294921875, "step": 6195 }, { "epoch": 0.05360956671364709, "grad_norm": 16.89910745208828, "learning_rate": 5.989369371280233e-06, "loss": 0.30275726318359375, "step": 6200 }, { "epoch": 0.053652800235190354, "grad_norm": 34.30553038781233, "learning_rate": 5.989352228345091e-06, "loss": 0.5645538330078125, "step": 6205 }, { "epoch": 0.053696033756733624, "grad_norm": 4.427732570180271, "learning_rate": 5.989335071623319e-06, "loss": 0.4647216796875, "step": 6210 }, { "epoch": 0.05373926727827689, "grad_norm": 12.497242718459741, "learning_rate": 5.989317901114998e-06, "loss": 0.248040771484375, "step": 6215 }, { "epoch": 0.05378250079982015, "grad_norm": 29.69848614067171, "learning_rate": 5.9893007168202075e-06, "loss": 0.42506103515625, "step": 6220 }, { "epoch": 0.053825734321363415, "grad_norm": 2.5711161866320364, "learning_rate": 5.989283518739026e-06, "loss": 0.105108642578125, "step": 6225 }, { "epoch": 0.05386896784290668, "grad_norm": 9.550399303974382, "learning_rate": 5.989266306871533e-06, "loss": 0.267852783203125, "step": 6230 }, { "epoch": 0.05391220136444994, "grad_norm": 4.753423026975241, "learning_rate": 5.989249081217808e-06, "loss": 0.1549346923828125, "step": 6235 }, { "epoch": 0.053955434885993206, "grad_norm": 20.322147233441363, "learning_rate": 5.989231841777931e-06, "loss": 0.177325439453125, "step": 6240 }, { "epoch": 0.05399866840753647, "grad_norm": 9.536558367370969, "learning_rate": 5.98921458855198e-06, "loss": 0.1953125, "step": 6245 }, { "epoch": 0.05404190192907973, "grad_norm": 55.0044181598354, "learning_rate": 5.989197321540036e-06, "loss": 0.548486328125, "step": 6250 }, { "epoch": 0.054085135450623, "grad_norm": 53.2801623543539, "learning_rate": 5.989180040742178e-06, "loss": 0.2865020751953125, "step": 6255 }, { "epoch": 0.05412836897216626, "grad_norm": 3.523705196308838, "learning_rate": 5.989162746158485e-06, "loss": 0.38496551513671873, "step": 6260 }, { "epoch": 0.054171602493709524, "grad_norm": 10.408438482692244, "learning_rate": 5.989145437789039e-06, "loss": 0.266949462890625, "step": 6265 }, { "epoch": 0.05421483601525279, "grad_norm": 25.218065984650213, "learning_rate": 5.989128115633917e-06, "loss": 0.302099609375, "step": 6270 }, { "epoch": 0.05425806953679605, "grad_norm": 59.19835419529653, "learning_rate": 5.989110779693202e-06, "loss": 0.2971649169921875, "step": 6275 }, { "epoch": 0.054301303058339315, "grad_norm": 14.488576859298448, "learning_rate": 5.98909342996697e-06, "loss": 0.147906494140625, "step": 6280 }, { "epoch": 0.05434453657988258, "grad_norm": 25.87072657069233, "learning_rate": 5.989076066455305e-06, "loss": 0.17337646484375, "step": 6285 }, { "epoch": 0.05438777010142584, "grad_norm": 3.2541622669511834, "learning_rate": 5.989058689158284e-06, "loss": 0.1962677001953125, "step": 6290 }, { "epoch": 0.054431003622969105, "grad_norm": 9.85044206372148, "learning_rate": 5.989041298075989e-06, "loss": 0.3478302001953125, "step": 6295 }, { "epoch": 0.05447423714451237, "grad_norm": 8.232563762227075, "learning_rate": 5.9890238932085e-06, "loss": 0.17574462890625, "step": 6300 }, { "epoch": 0.05451747066605563, "grad_norm": 12.046053565613068, "learning_rate": 5.989006474555896e-06, "loss": 0.1551116943359375, "step": 6305 }, { "epoch": 0.054560704187598896, "grad_norm": 92.72066981419472, "learning_rate": 5.988989042118259e-06, "loss": 0.7409423828125, "step": 6310 }, { "epoch": 0.05460393770914216, "grad_norm": 33.92850917082805, "learning_rate": 5.988971595895669e-06, "loss": 0.224169921875, "step": 6315 }, { "epoch": 0.054647171230685423, "grad_norm": 11.977649469987155, "learning_rate": 5.988954135888205e-06, "loss": 0.092999267578125, "step": 6320 }, { "epoch": 0.05469040475222869, "grad_norm": 14.489646128134678, "learning_rate": 5.988936662095949e-06, "loss": 0.110205078125, "step": 6325 }, { "epoch": 0.05473363827377195, "grad_norm": 27.780938644007694, "learning_rate": 5.988919174518981e-06, "loss": 0.1559326171875, "step": 6330 }, { "epoch": 0.054776871795315214, "grad_norm": 12.816357924783196, "learning_rate": 5.988901673157383e-06, "loss": 0.2402679443359375, "step": 6335 }, { "epoch": 0.05482010531685848, "grad_norm": 21.53351294538774, "learning_rate": 5.988884158011233e-06, "loss": 0.201678466796875, "step": 6340 }, { "epoch": 0.05486333883840174, "grad_norm": 1.488983398202143, "learning_rate": 5.988866629080615e-06, "loss": 0.0678802490234375, "step": 6345 }, { "epoch": 0.054906572359945005, "grad_norm": 13.883651430799693, "learning_rate": 5.988849086365607e-06, "loss": 0.12684783935546876, "step": 6350 }, { "epoch": 0.05494980588148827, "grad_norm": 7.654257661921595, "learning_rate": 5.988831529866292e-06, "loss": 0.3094602584838867, "step": 6355 }, { "epoch": 0.05499303940303153, "grad_norm": 17.827806761619378, "learning_rate": 5.98881395958275e-06, "loss": 0.219140625, "step": 6360 }, { "epoch": 0.055036272924574796, "grad_norm": 6.873752965897173, "learning_rate": 5.9887963755150615e-06, "loss": 0.16688232421875, "step": 6365 }, { "epoch": 0.05507950644611806, "grad_norm": 3.7722471811502927, "learning_rate": 5.988778777663308e-06, "loss": 0.219549560546875, "step": 6370 }, { "epoch": 0.05512273996766132, "grad_norm": 15.43533027352225, "learning_rate": 5.988761166027572e-06, "loss": 0.147735595703125, "step": 6375 }, { "epoch": 0.05516597348920459, "grad_norm": 2.40434252892306, "learning_rate": 5.988743540607932e-06, "loss": 0.11253852844238281, "step": 6380 }, { "epoch": 0.05520920701074785, "grad_norm": 25.816662764352493, "learning_rate": 5.988725901404472e-06, "loss": 0.2330596923828125, "step": 6385 }, { "epoch": 0.055252440532291114, "grad_norm": 50.09758618927913, "learning_rate": 5.988708248417272e-06, "loss": 0.239404296875, "step": 6390 }, { "epoch": 0.055295674053834384, "grad_norm": 21.878753003739572, "learning_rate": 5.9886905816464144e-06, "loss": 0.2404149055480957, "step": 6395 }, { "epoch": 0.05533890757537765, "grad_norm": 4.9358826256992465, "learning_rate": 5.988672901091979e-06, "loss": 0.3874755859375, "step": 6400 }, { "epoch": 0.05538214109692091, "grad_norm": 10.726212627780551, "learning_rate": 5.9886552067540485e-06, "loss": 0.7289901733398437, "step": 6405 }, { "epoch": 0.055425374618464175, "grad_norm": 13.932212425860774, "learning_rate": 5.988637498632704e-06, "loss": 0.285955810546875, "step": 6410 }, { "epoch": 0.05546860814000744, "grad_norm": 33.51283504373474, "learning_rate": 5.988619776728028e-06, "loss": 0.1954357147216797, "step": 6415 }, { "epoch": 0.0555118416615507, "grad_norm": 34.686318075555384, "learning_rate": 5.988602041040101e-06, "loss": 0.2401580810546875, "step": 6420 }, { "epoch": 0.055555075183093966, "grad_norm": 34.94094454698152, "learning_rate": 5.988584291569006e-06, "loss": 0.2020904541015625, "step": 6425 }, { "epoch": 0.05559830870463723, "grad_norm": 3.6580438212795245, "learning_rate": 5.988566528314825e-06, "loss": 0.238165283203125, "step": 6430 }, { "epoch": 0.05564154222618049, "grad_norm": 37.826115684209775, "learning_rate": 5.988548751277638e-06, "loss": 0.474310302734375, "step": 6435 }, { "epoch": 0.05568477574772376, "grad_norm": 4.947330605450396, "learning_rate": 5.988530960457527e-06, "loss": 0.10198974609375, "step": 6440 }, { "epoch": 0.05572800926926702, "grad_norm": 28.388981135690262, "learning_rate": 5.988513155854578e-06, "loss": 0.24854736328125, "step": 6445 }, { "epoch": 0.055771242790810284, "grad_norm": 1.7248619685658226, "learning_rate": 5.988495337468869e-06, "loss": 0.28440132141113283, "step": 6450 }, { "epoch": 0.05581447631235355, "grad_norm": 14.167829876542866, "learning_rate": 5.9884775053004845e-06, "loss": 0.06775741577148438, "step": 6455 }, { "epoch": 0.05585770983389681, "grad_norm": 5.566863724977187, "learning_rate": 5.988459659349505e-06, "loss": 0.19974365234375, "step": 6460 }, { "epoch": 0.055900943355440075, "grad_norm": 8.130881264653238, "learning_rate": 5.988441799616014e-06, "loss": 0.12739906311035157, "step": 6465 }, { "epoch": 0.05594417687698334, "grad_norm": 3.2425408778778255, "learning_rate": 5.988423926100093e-06, "loss": 0.1770050048828125, "step": 6470 }, { "epoch": 0.0559874103985266, "grad_norm": 6.658613393337649, "learning_rate": 5.988406038801825e-06, "loss": 0.112188720703125, "step": 6475 }, { "epoch": 0.056030643920069866, "grad_norm": 6.865032319833133, "learning_rate": 5.988388137721293e-06, "loss": 0.11175880432128907, "step": 6480 }, { "epoch": 0.05607387744161313, "grad_norm": 2.341715209463163, "learning_rate": 5.988370222858578e-06, "loss": 0.602081298828125, "step": 6485 }, { "epoch": 0.05611711096315639, "grad_norm": 4.968692743882861, "learning_rate": 5.988352294213764e-06, "loss": 0.3009361267089844, "step": 6490 }, { "epoch": 0.056160344484699656, "grad_norm": 46.55127239761504, "learning_rate": 5.988334351786933e-06, "loss": 0.255133056640625, "step": 6495 }, { "epoch": 0.05620357800624292, "grad_norm": 11.785904801307504, "learning_rate": 5.98831639557817e-06, "loss": 0.299163818359375, "step": 6500 }, { "epoch": 0.056246811527786184, "grad_norm": 15.29892430545126, "learning_rate": 5.988298425587553e-06, "loss": 0.531341552734375, "step": 6505 }, { "epoch": 0.05629004504932945, "grad_norm": 1.558076322544612, "learning_rate": 5.988280441815169e-06, "loss": 0.152618408203125, "step": 6510 }, { "epoch": 0.05633327857087271, "grad_norm": 0.6605780861750016, "learning_rate": 5.9882624442611e-06, "loss": 0.5343704223632812, "step": 6515 }, { "epoch": 0.056376512092415974, "grad_norm": 1.1336219340660192, "learning_rate": 5.988244432925428e-06, "loss": 0.023590087890625, "step": 6520 }, { "epoch": 0.05641974561395924, "grad_norm": 7.666118900635384, "learning_rate": 5.9882264078082374e-06, "loss": 0.4462249755859375, "step": 6525 }, { "epoch": 0.0564629791355025, "grad_norm": 26.188034270372185, "learning_rate": 5.9882083689096104e-06, "loss": 0.3285179138183594, "step": 6530 }, { "epoch": 0.056506212657045765, "grad_norm": 7.892665571560003, "learning_rate": 5.9881903162296295e-06, "loss": 0.234942626953125, "step": 6535 }, { "epoch": 0.05654944617858903, "grad_norm": 49.56403589455168, "learning_rate": 5.98817224976838e-06, "loss": 0.3453369140625, "step": 6540 }, { "epoch": 0.05659267970013229, "grad_norm": 2.688103460661735, "learning_rate": 5.9881541695259446e-06, "loss": 0.22970733642578126, "step": 6545 }, { "epoch": 0.056635913221675556, "grad_norm": 1.3460850039957284, "learning_rate": 5.988136075502405e-06, "loss": 0.119464111328125, "step": 6550 }, { "epoch": 0.05667914674321882, "grad_norm": 3.4002973631576126, "learning_rate": 5.988117967697847e-06, "loss": 0.0648193359375, "step": 6555 }, { "epoch": 0.05672238026476208, "grad_norm": 45.31446473736835, "learning_rate": 5.988099846112351e-06, "loss": 0.541552734375, "step": 6560 }, { "epoch": 0.05676561378630535, "grad_norm": 39.6113286291308, "learning_rate": 5.988081710746004e-06, "loss": 0.329705810546875, "step": 6565 }, { "epoch": 0.05680884730784861, "grad_norm": 9.995253600749033, "learning_rate": 5.9880635615988885e-06, "loss": 0.2397979736328125, "step": 6570 }, { "epoch": 0.056852080829391874, "grad_norm": 24.53965571501581, "learning_rate": 5.988045398671086e-06, "loss": 0.14229583740234375, "step": 6575 }, { "epoch": 0.056895314350935144, "grad_norm": 5.675180309403652, "learning_rate": 5.988027221962684e-06, "loss": 0.211798095703125, "step": 6580 }, { "epoch": 0.05693854787247841, "grad_norm": 30.25179223003392, "learning_rate": 5.988009031473765e-06, "loss": 0.16822509765625, "step": 6585 }, { "epoch": 0.05698178139402167, "grad_norm": 6.359835139849915, "learning_rate": 5.987990827204411e-06, "loss": 0.207879638671875, "step": 6590 }, { "epoch": 0.057025014915564935, "grad_norm": 7.359896503008147, "learning_rate": 5.987972609154707e-06, "loss": 0.149853515625, "step": 6595 }, { "epoch": 0.0570682484371082, "grad_norm": 33.118384097913214, "learning_rate": 5.987954377324738e-06, "loss": 0.5269979476928711, "step": 6600 }, { "epoch": 0.05711148195865146, "grad_norm": 9.572802900849098, "learning_rate": 5.987936131714588e-06, "loss": 0.11392364501953126, "step": 6605 }, { "epoch": 0.057154715480194726, "grad_norm": 7.410497833580914, "learning_rate": 5.98791787232434e-06, "loss": 0.4846343994140625, "step": 6610 }, { "epoch": 0.05719794900173799, "grad_norm": 25.396422971206746, "learning_rate": 5.987899599154079e-06, "loss": 0.5823226928710937, "step": 6615 }, { "epoch": 0.05724118252328125, "grad_norm": 7.183569472488958, "learning_rate": 5.987881312203889e-06, "loss": 0.49287261962890627, "step": 6620 }, { "epoch": 0.05728441604482452, "grad_norm": 20.05153223486897, "learning_rate": 5.9878630114738544e-06, "loss": 0.1078155517578125, "step": 6625 }, { "epoch": 0.05732764956636778, "grad_norm": 4.373601906320577, "learning_rate": 5.987844696964059e-06, "loss": 0.09884796142578126, "step": 6630 }, { "epoch": 0.057370883087911044, "grad_norm": 20.69080387017889, "learning_rate": 5.9878263686745885e-06, "loss": 0.2669036865234375, "step": 6635 }, { "epoch": 0.05741411660945431, "grad_norm": 0.5990554918564157, "learning_rate": 5.987808026605527e-06, "loss": 0.122076416015625, "step": 6640 }, { "epoch": 0.05745735013099757, "grad_norm": 7.5496614486680995, "learning_rate": 5.987789670756958e-06, "loss": 0.34996337890625, "step": 6645 }, { "epoch": 0.057500583652540835, "grad_norm": 27.21577802964631, "learning_rate": 5.987771301128969e-06, "loss": 0.23111572265625, "step": 6650 }, { "epoch": 0.0575438171740841, "grad_norm": 10.389946843376281, "learning_rate": 5.98775291772164e-06, "loss": 0.1244140625, "step": 6655 }, { "epoch": 0.05758705069562736, "grad_norm": 23.32313712845842, "learning_rate": 5.987734520535061e-06, "loss": 0.19346923828125, "step": 6660 }, { "epoch": 0.057630284217170626, "grad_norm": 2.0081145734738715, "learning_rate": 5.987716109569313e-06, "loss": 0.1716217041015625, "step": 6665 }, { "epoch": 0.05767351773871389, "grad_norm": 6.74157986532923, "learning_rate": 5.987697684824482e-06, "loss": 0.1510498046875, "step": 6670 }, { "epoch": 0.05771675126025715, "grad_norm": 5.714290066465693, "learning_rate": 5.987679246300654e-06, "loss": 0.143511962890625, "step": 6675 }, { "epoch": 0.057759984781800416, "grad_norm": 6.723565972266086, "learning_rate": 5.987660793997913e-06, "loss": 0.21044464111328126, "step": 6680 }, { "epoch": 0.05780321830334368, "grad_norm": 5.920591091062968, "learning_rate": 5.987642327916345e-06, "loss": 0.0677642822265625, "step": 6685 }, { "epoch": 0.057846451824886944, "grad_norm": 6.244442320879515, "learning_rate": 5.987623848056034e-06, "loss": 0.37908935546875, "step": 6690 }, { "epoch": 0.05788968534643021, "grad_norm": 12.660404752844551, "learning_rate": 5.987605354417066e-06, "loss": 0.34984130859375, "step": 6695 }, { "epoch": 0.05793291886797347, "grad_norm": 1.0027230201895923, "learning_rate": 5.987586846999526e-06, "loss": 0.1604644775390625, "step": 6700 }, { "epoch": 0.057976152389516734, "grad_norm": 1.582773853866542, "learning_rate": 5.9875683258035e-06, "loss": 0.30054244995117185, "step": 6705 }, { "epoch": 0.05801938591106, "grad_norm": 51.22404883002378, "learning_rate": 5.987549790829072e-06, "loss": 0.5071060180664062, "step": 6710 }, { "epoch": 0.05806261943260326, "grad_norm": 15.883645357779635, "learning_rate": 5.987531242076329e-06, "loss": 0.4895347595214844, "step": 6715 }, { "epoch": 0.058105852954146525, "grad_norm": 1.4274392355752579, "learning_rate": 5.987512679545356e-06, "loss": 0.0433135986328125, "step": 6720 }, { "epoch": 0.05814908647568979, "grad_norm": 0.993599524351508, "learning_rate": 5.987494103236238e-06, "loss": 0.38721923828125, "step": 6725 }, { "epoch": 0.05819231999723305, "grad_norm": 0.6511565287274389, "learning_rate": 5.98747551314906e-06, "loss": 0.662127685546875, "step": 6730 }, { "epoch": 0.058235553518776316, "grad_norm": 5.178141959265238, "learning_rate": 5.987456909283911e-06, "loss": 0.17806396484375, "step": 6735 }, { "epoch": 0.05827878704031958, "grad_norm": 26.16033272482549, "learning_rate": 5.987438291640874e-06, "loss": 0.2427764892578125, "step": 6740 }, { "epoch": 0.05832202056186284, "grad_norm": 6.203256493219313, "learning_rate": 5.987419660220036e-06, "loss": 0.064752197265625, "step": 6745 }, { "epoch": 0.05836525408340611, "grad_norm": 5.943486265142193, "learning_rate": 5.987401015021482e-06, "loss": 0.3089111328125, "step": 6750 }, { "epoch": 0.05840848760494937, "grad_norm": 28.315719675106966, "learning_rate": 5.987382356045298e-06, "loss": 0.12679443359375, "step": 6755 }, { "epoch": 0.058451721126492634, "grad_norm": 3.5145006028764265, "learning_rate": 5.987363683291571e-06, "loss": 0.315435791015625, "step": 6760 }, { "epoch": 0.058494954648035904, "grad_norm": 18.248146841860038, "learning_rate": 5.987344996760387e-06, "loss": 0.0624755859375, "step": 6765 }, { "epoch": 0.05853818816957917, "grad_norm": 21.285428468480514, "learning_rate": 5.987326296451832e-06, "loss": 0.179833984375, "step": 6770 }, { "epoch": 0.05858142169112243, "grad_norm": 12.82691495046103, "learning_rate": 5.98730758236599e-06, "loss": 0.2126220703125, "step": 6775 }, { "epoch": 0.058624655212665695, "grad_norm": 145.97420612374668, "learning_rate": 5.987288854502952e-06, "loss": 0.243115234375, "step": 6780 }, { "epoch": 0.05866788873420896, "grad_norm": 15.591003166882542, "learning_rate": 5.987270112862801e-06, "loss": 0.226019287109375, "step": 6785 }, { "epoch": 0.05871112225575222, "grad_norm": 30.74098427640111, "learning_rate": 5.987251357445624e-06, "loss": 0.3072364807128906, "step": 6790 }, { "epoch": 0.058754355777295486, "grad_norm": 5.712587277748757, "learning_rate": 5.987232588251508e-06, "loss": 0.1674713134765625, "step": 6795 }, { "epoch": 0.05879758929883875, "grad_norm": 14.952493813571305, "learning_rate": 5.987213805280538e-06, "loss": 0.5604522705078125, "step": 6800 }, { "epoch": 0.05884082282038201, "grad_norm": 25.14682960903301, "learning_rate": 5.987195008532803e-06, "loss": 0.3945472717285156, "step": 6805 }, { "epoch": 0.05888405634192528, "grad_norm": 7.88279068010645, "learning_rate": 5.9871761980083885e-06, "loss": 0.2494274139404297, "step": 6810 }, { "epoch": 0.05892728986346854, "grad_norm": 5.493631558737745, "learning_rate": 5.987157373707381e-06, "loss": 0.180548095703125, "step": 6815 }, { "epoch": 0.058970523385011804, "grad_norm": 18.14276630940938, "learning_rate": 5.987138535629868e-06, "loss": 0.222314453125, "step": 6820 }, { "epoch": 0.05901375690655507, "grad_norm": 66.52483653283005, "learning_rate": 5.987119683775936e-06, "loss": 0.2741706848144531, "step": 6825 }, { "epoch": 0.05905699042809833, "grad_norm": 44.7318085267996, "learning_rate": 5.987100818145672e-06, "loss": 0.6509689331054688, "step": 6830 }, { "epoch": 0.059100223949641595, "grad_norm": 5.69876063316472, "learning_rate": 5.987081938739163e-06, "loss": 0.18264923095703126, "step": 6835 }, { "epoch": 0.05914345747118486, "grad_norm": 29.796075709296517, "learning_rate": 5.987063045556496e-06, "loss": 0.1426422119140625, "step": 6840 }, { "epoch": 0.05918669099272812, "grad_norm": 36.41809876198827, "learning_rate": 5.987044138597757e-06, "loss": 0.2724334716796875, "step": 6845 }, { "epoch": 0.059229924514271386, "grad_norm": 21.916121016433063, "learning_rate": 5.987025217863036e-06, "loss": 0.226092529296875, "step": 6850 }, { "epoch": 0.05927315803581465, "grad_norm": 6.518853428186534, "learning_rate": 5.987006283352419e-06, "loss": 0.44632568359375, "step": 6855 }, { "epoch": 0.05931639155735791, "grad_norm": 29.580300541759886, "learning_rate": 5.9869873350659906e-06, "loss": 0.278997802734375, "step": 6860 }, { "epoch": 0.059359625078901176, "grad_norm": 37.286773575844315, "learning_rate": 5.986968373003842e-06, "loss": 0.164996337890625, "step": 6865 }, { "epoch": 0.05940285860044444, "grad_norm": 2.625162623430265, "learning_rate": 5.98694939716606e-06, "loss": 0.1907470703125, "step": 6870 }, { "epoch": 0.059446092121987704, "grad_norm": 15.547590460689753, "learning_rate": 5.98693040755273e-06, "loss": 0.4313232421875, "step": 6875 }, { "epoch": 0.05948932564353097, "grad_norm": 1.0281902766336222, "learning_rate": 5.986911404163941e-06, "loss": 0.67850341796875, "step": 6880 }, { "epoch": 0.05953255916507423, "grad_norm": 75.7648504675348, "learning_rate": 5.986892386999781e-06, "loss": 0.32578125, "step": 6885 }, { "epoch": 0.059575792686617494, "grad_norm": 34.03617190586413, "learning_rate": 5.986873356060337e-06, "loss": 0.19498367309570314, "step": 6890 }, { "epoch": 0.05961902620816076, "grad_norm": 24.739711735986763, "learning_rate": 5.986854311345697e-06, "loss": 0.410009765625, "step": 6895 }, { "epoch": 0.05966225972970402, "grad_norm": 53.96737715209798, "learning_rate": 5.986835252855949e-06, "loss": 0.27275772094726564, "step": 6900 }, { "epoch": 0.059705493251247285, "grad_norm": 2.329567918885942, "learning_rate": 5.986816180591181e-06, "loss": 0.3850341796875, "step": 6905 }, { "epoch": 0.05974872677279055, "grad_norm": 18.96279654697661, "learning_rate": 5.98679709455148e-06, "loss": 0.27756195068359374, "step": 6910 }, { "epoch": 0.05979196029433381, "grad_norm": 23.29515341785764, "learning_rate": 5.986777994736935e-06, "loss": 0.216351318359375, "step": 6915 }, { "epoch": 0.059835193815877076, "grad_norm": 21.150072346925423, "learning_rate": 5.986758881147635e-06, "loss": 0.66226806640625, "step": 6920 }, { "epoch": 0.05987842733742034, "grad_norm": 5.61651434909479, "learning_rate": 5.986739753783666e-06, "loss": 0.2956756591796875, "step": 6925 }, { "epoch": 0.0599216608589636, "grad_norm": 11.789469191039933, "learning_rate": 5.986720612645116e-06, "loss": 0.1623291015625, "step": 6930 }, { "epoch": 0.05996489438050687, "grad_norm": 24.06345226237578, "learning_rate": 5.986701457732076e-06, "loss": 0.1104766845703125, "step": 6935 }, { "epoch": 0.06000812790205013, "grad_norm": 16.36031593574522, "learning_rate": 5.986682289044632e-06, "loss": 0.549591064453125, "step": 6940 }, { "epoch": 0.060051361423593394, "grad_norm": 9.408094751217053, "learning_rate": 5.9866631065828734e-06, "loss": 0.38217697143554685, "step": 6945 }, { "epoch": 0.060094594945136665, "grad_norm": 3.597454936042768, "learning_rate": 5.986643910346889e-06, "loss": 0.27681121826171873, "step": 6950 }, { "epoch": 0.06013782846667993, "grad_norm": 4.625601450259682, "learning_rate": 5.986624700336766e-06, "loss": 0.801318359375, "step": 6955 }, { "epoch": 0.06018106198822319, "grad_norm": 4.10609376882263, "learning_rate": 5.9866054765525945e-06, "loss": 0.070257568359375, "step": 6960 }, { "epoch": 0.060224295509766455, "grad_norm": 61.41414931586364, "learning_rate": 5.986586238994461e-06, "loss": 0.3594825744628906, "step": 6965 }, { "epoch": 0.06026752903130972, "grad_norm": 4.943386454808211, "learning_rate": 5.986566987662457e-06, "loss": 0.20334014892578126, "step": 6970 }, { "epoch": 0.06031076255285298, "grad_norm": 14.68780913656551, "learning_rate": 5.98654772255667e-06, "loss": 0.21892776489257812, "step": 6975 }, { "epoch": 0.060353996074396246, "grad_norm": 50.813384932269, "learning_rate": 5.986528443677189e-06, "loss": 0.364208984375, "step": 6980 }, { "epoch": 0.06039722959593951, "grad_norm": 5.3109431334703405, "learning_rate": 5.9865091510241015e-06, "loss": 0.06706695556640625, "step": 6985 }, { "epoch": 0.06044046311748277, "grad_norm": 55.70863203241484, "learning_rate": 5.986489844597498e-06, "loss": 0.3051422119140625, "step": 6990 }, { "epoch": 0.06048369663902604, "grad_norm": 25.196473546276412, "learning_rate": 5.986470524397467e-06, "loss": 0.18998565673828124, "step": 6995 }, { "epoch": 0.0605269301605693, "grad_norm": 31.688826341215126, "learning_rate": 5.9864511904241e-06, "loss": 0.232672119140625, "step": 7000 }, { "epoch": 0.060570163682112564, "grad_norm": 6.808285719926633, "learning_rate": 5.986431842677482e-06, "loss": 0.100921630859375, "step": 7005 }, { "epoch": 0.06061339720365583, "grad_norm": 16.482299816223353, "learning_rate": 5.986412481157705e-06, "loss": 0.1092529296875, "step": 7010 }, { "epoch": 0.06065663072519909, "grad_norm": 23.18110408568205, "learning_rate": 5.986393105864856e-06, "loss": 0.13861541748046874, "step": 7015 }, { "epoch": 0.060699864246742355, "grad_norm": 25.157963999325688, "learning_rate": 5.986373716799027e-06, "loss": 0.2788177490234375, "step": 7020 }, { "epoch": 0.06074309776828562, "grad_norm": 33.999262810071805, "learning_rate": 5.986354313960307e-06, "loss": 0.242279052734375, "step": 7025 }, { "epoch": 0.06078633128982888, "grad_norm": 65.09712883137937, "learning_rate": 5.986334897348784e-06, "loss": 0.388092041015625, "step": 7030 }, { "epoch": 0.060829564811372146, "grad_norm": 4.370919860570539, "learning_rate": 5.986315466964549e-06, "loss": 0.026455497741699217, "step": 7035 }, { "epoch": 0.06087279833291541, "grad_norm": 33.40613728840607, "learning_rate": 5.98629602280769e-06, "loss": 0.101605224609375, "step": 7040 }, { "epoch": 0.06091603185445867, "grad_norm": 30.85163370106887, "learning_rate": 5.986276564878298e-06, "loss": 0.1306488037109375, "step": 7045 }, { "epoch": 0.060959265376001937, "grad_norm": 14.359311766121019, "learning_rate": 5.986257093176462e-06, "loss": 0.1797882080078125, "step": 7050 }, { "epoch": 0.0610024988975452, "grad_norm": 6.478021710359545, "learning_rate": 5.986237607702273e-06, "loss": 0.06270790100097656, "step": 7055 }, { "epoch": 0.061045732419088464, "grad_norm": 8.691867697198159, "learning_rate": 5.986218108455819e-06, "loss": 0.117437744140625, "step": 7060 }, { "epoch": 0.06108896594063173, "grad_norm": 12.585726046073168, "learning_rate": 5.9861985954371915e-06, "loss": 0.563623046875, "step": 7065 }, { "epoch": 0.06113219946217499, "grad_norm": 1.013361214406276, "learning_rate": 5.9861790686464795e-06, "loss": 0.2742828369140625, "step": 7070 }, { "epoch": 0.061175432983718254, "grad_norm": 7.470681662388342, "learning_rate": 5.986159528083774e-06, "loss": 0.0541259765625, "step": 7075 }, { "epoch": 0.06121866650526152, "grad_norm": 2.954116502314928, "learning_rate": 5.986139973749165e-06, "loss": 0.133001708984375, "step": 7080 }, { "epoch": 0.06126190002680478, "grad_norm": 9.26983426256546, "learning_rate": 5.986120405642742e-06, "loss": 0.1304534912109375, "step": 7085 }, { "epoch": 0.061305133548348045, "grad_norm": 1.3874216999060267, "learning_rate": 5.986100823764595e-06, "loss": 0.27458343505859373, "step": 7090 }, { "epoch": 0.06134836706989131, "grad_norm": 1.820763691447704, "learning_rate": 5.9860812281148156e-06, "loss": 0.3466217041015625, "step": 7095 }, { "epoch": 0.06139160059143457, "grad_norm": 4.314446662826769, "learning_rate": 5.986061618693493e-06, "loss": 0.1471893310546875, "step": 7100 }, { "epoch": 0.061434834112977836, "grad_norm": 33.767013850313, "learning_rate": 5.986041995500718e-06, "loss": 0.29635162353515626, "step": 7105 }, { "epoch": 0.0614780676345211, "grad_norm": 13.386508595683607, "learning_rate": 5.986022358536581e-06, "loss": 0.1512786865234375, "step": 7110 }, { "epoch": 0.06152130115606436, "grad_norm": 11.567196995665277, "learning_rate": 5.986002707801173e-06, "loss": 0.29996337890625, "step": 7115 }, { "epoch": 0.06156453467760763, "grad_norm": 5.706447739307616, "learning_rate": 5.985983043294585e-06, "loss": 0.2516754150390625, "step": 7120 }, { "epoch": 0.06160776819915089, "grad_norm": 6.085139043805599, "learning_rate": 5.985963365016906e-06, "loss": 0.17459716796875, "step": 7125 }, { "epoch": 0.061651001720694154, "grad_norm": 48.97016430493129, "learning_rate": 5.985943672968228e-06, "loss": 0.5583770751953125, "step": 7130 }, { "epoch": 0.061694235242237425, "grad_norm": 9.838540816449404, "learning_rate": 5.985923967148642e-06, "loss": 0.108306884765625, "step": 7135 }, { "epoch": 0.06173746876378069, "grad_norm": 2.838947599119657, "learning_rate": 5.985904247558238e-06, "loss": 0.07516517639160156, "step": 7140 }, { "epoch": 0.06178070228532395, "grad_norm": 25.478810691458673, "learning_rate": 5.985884514197108e-06, "loss": 0.1749725341796875, "step": 7145 }, { "epoch": 0.061823935806867215, "grad_norm": 11.331124858032952, "learning_rate": 5.985864767065342e-06, "loss": 0.056810760498046876, "step": 7150 }, { "epoch": 0.06186716932841048, "grad_norm": 5.329592813871143, "learning_rate": 5.985845006163033e-06, "loss": 0.0609161376953125, "step": 7155 }, { "epoch": 0.06191040284995374, "grad_norm": 30.877565197463802, "learning_rate": 5.985825231490269e-06, "loss": 0.31368255615234375, "step": 7160 }, { "epoch": 0.061953636371497006, "grad_norm": 4.459888853257102, "learning_rate": 5.985805443047142e-06, "loss": 0.355816650390625, "step": 7165 }, { "epoch": 0.06199686989304027, "grad_norm": 10.842117633767222, "learning_rate": 5.985785640833747e-06, "loss": 0.152923583984375, "step": 7170 }, { "epoch": 0.06204010341458353, "grad_norm": 47.72719843477032, "learning_rate": 5.98576582485017e-06, "loss": 0.654266357421875, "step": 7175 }, { "epoch": 0.0620833369361268, "grad_norm": 1.1933777334677997, "learning_rate": 5.985745995096506e-06, "loss": 0.0921356201171875, "step": 7180 }, { "epoch": 0.06212657045767006, "grad_norm": 7.839790839809891, "learning_rate": 5.985726151572846e-06, "loss": 0.211456298828125, "step": 7185 }, { "epoch": 0.062169803979213324, "grad_norm": 18.447030548138258, "learning_rate": 5.985706294279279e-06, "loss": 0.24698486328125, "step": 7190 }, { "epoch": 0.06221303750075659, "grad_norm": 49.971472090471934, "learning_rate": 5.985686423215899e-06, "loss": 0.2658660888671875, "step": 7195 }, { "epoch": 0.06225627102229985, "grad_norm": 23.335935340353107, "learning_rate": 5.9856665383827976e-06, "loss": 0.244464111328125, "step": 7200 }, { "epoch": 0.062299504543843115, "grad_norm": 10.284161244655428, "learning_rate": 5.985646639780066e-06, "loss": 0.07874603271484375, "step": 7205 }, { "epoch": 0.06234273806538638, "grad_norm": 6.343772271115469, "learning_rate": 5.9856267274077955e-06, "loss": 0.4363800048828125, "step": 7210 }, { "epoch": 0.06238597158692964, "grad_norm": 10.125653358718457, "learning_rate": 5.985606801266078e-06, "loss": 0.09927215576171874, "step": 7215 }, { "epoch": 0.062429205108472906, "grad_norm": 6.222765246337794, "learning_rate": 5.985586861355006e-06, "loss": 0.57882080078125, "step": 7220 }, { "epoch": 0.06247243863001617, "grad_norm": 19.721620130201266, "learning_rate": 5.985566907674671e-06, "loss": 0.1725433349609375, "step": 7225 }, { "epoch": 0.06251567215155944, "grad_norm": 52.366649424487925, "learning_rate": 5.985546940225167e-06, "loss": 0.55069580078125, "step": 7230 }, { "epoch": 0.0625589056731027, "grad_norm": 22.677428213281107, "learning_rate": 5.985526959006582e-06, "loss": 0.16351318359375, "step": 7235 }, { "epoch": 0.06260213919464597, "grad_norm": 38.84600192612874, "learning_rate": 5.985506964019012e-06, "loss": 0.2023712158203125, "step": 7240 }, { "epoch": 0.06264537271618922, "grad_norm": 2.199203756482297, "learning_rate": 5.985486955262547e-06, "loss": 0.184771728515625, "step": 7245 }, { "epoch": 0.0626886062377325, "grad_norm": 13.192377095138205, "learning_rate": 5.98546693273728e-06, "loss": 0.24935302734375, "step": 7250 }, { "epoch": 0.06273183975927575, "grad_norm": 10.986085952317021, "learning_rate": 5.9854468964433025e-06, "loss": 0.21720123291015625, "step": 7255 }, { "epoch": 0.06277507328081902, "grad_norm": 20.274327449518577, "learning_rate": 5.985426846380709e-06, "loss": 0.1502838134765625, "step": 7260 }, { "epoch": 0.06281830680236228, "grad_norm": 75.67738846069835, "learning_rate": 5.98540678254959e-06, "loss": 0.2759857177734375, "step": 7265 }, { "epoch": 0.06286154032390555, "grad_norm": 65.57706251321898, "learning_rate": 5.985386704950039e-06, "loss": 0.2800537109375, "step": 7270 }, { "epoch": 0.0629047738454488, "grad_norm": 15.254493187810425, "learning_rate": 5.985366613582148e-06, "loss": 0.330401611328125, "step": 7275 }, { "epoch": 0.06294800736699208, "grad_norm": 3.453555460624964, "learning_rate": 5.98534650844601e-06, "loss": 0.125189208984375, "step": 7280 }, { "epoch": 0.06299124088853533, "grad_norm": 5.830712524527114, "learning_rate": 5.985326389541718e-06, "loss": 0.1407470703125, "step": 7285 }, { "epoch": 0.0630344744100786, "grad_norm": 14.701237570932655, "learning_rate": 5.985306256869365e-06, "loss": 0.28673095703125, "step": 7290 }, { "epoch": 0.06307770793162186, "grad_norm": 6.354493859258391, "learning_rate": 5.9852861104290424e-06, "loss": 0.0706268310546875, "step": 7295 }, { "epoch": 0.06312094145316513, "grad_norm": 40.14101461989431, "learning_rate": 5.985265950220844e-06, "loss": 0.1458770751953125, "step": 7300 }, { "epoch": 0.06316417497470839, "grad_norm": 54.210915245183735, "learning_rate": 5.985245776244863e-06, "loss": 0.12491836547851562, "step": 7305 }, { "epoch": 0.06320740849625166, "grad_norm": 1.021568615444756, "learning_rate": 5.985225588501193e-06, "loss": 0.36185150146484374, "step": 7310 }, { "epoch": 0.06325064201779491, "grad_norm": 20.76145407208608, "learning_rate": 5.9852053869899254e-06, "loss": 0.46036376953125, "step": 7315 }, { "epoch": 0.06329387553933818, "grad_norm": 20.86465890014067, "learning_rate": 5.985185171711155e-06, "loss": 0.2639442443847656, "step": 7320 }, { "epoch": 0.06333710906088144, "grad_norm": 70.73905009709362, "learning_rate": 5.985164942664974e-06, "loss": 0.3012973785400391, "step": 7325 }, { "epoch": 0.06338034258242471, "grad_norm": 40.53125872135531, "learning_rate": 5.985144699851476e-06, "loss": 0.50235595703125, "step": 7330 }, { "epoch": 0.06342357610396797, "grad_norm": 6.821868639154191, "learning_rate": 5.985124443270755e-06, "loss": 0.1679595947265625, "step": 7335 }, { "epoch": 0.06346680962551124, "grad_norm": 5.205720844297822, "learning_rate": 5.985104172922903e-06, "loss": 0.0992156982421875, "step": 7340 }, { "epoch": 0.0635100431470545, "grad_norm": 70.2890089740524, "learning_rate": 5.985083888808015e-06, "loss": 0.566650390625, "step": 7345 }, { "epoch": 0.06355327666859777, "grad_norm": 34.13238188730644, "learning_rate": 5.985063590926184e-06, "loss": 0.61693115234375, "step": 7350 }, { "epoch": 0.06359651019014102, "grad_norm": 47.666094376192575, "learning_rate": 5.985043279277503e-06, "loss": 0.4473602294921875, "step": 7355 }, { "epoch": 0.0636397437116843, "grad_norm": 1.0144815090498815, "learning_rate": 5.985022953862066e-06, "loss": 0.13769683837890626, "step": 7360 }, { "epoch": 0.06368297723322755, "grad_norm": 3.892896288140947, "learning_rate": 5.985002614679967e-06, "loss": 0.2476898193359375, "step": 7365 }, { "epoch": 0.06372621075477082, "grad_norm": 2.9463962802694152, "learning_rate": 5.9849822617313e-06, "loss": 0.18663330078125, "step": 7370 }, { "epoch": 0.06376944427631408, "grad_norm": 7.581763315230119, "learning_rate": 5.984961895016158e-06, "loss": 0.35782470703125, "step": 7375 }, { "epoch": 0.06381267779785735, "grad_norm": 40.68923583100281, "learning_rate": 5.984941514534637e-06, "loss": 0.3649383544921875, "step": 7380 }, { "epoch": 0.0638559113194006, "grad_norm": 4.9787174901870745, "learning_rate": 5.984921120286827e-06, "loss": 0.3585723876953125, "step": 7385 }, { "epoch": 0.06389914484094388, "grad_norm": 10.590416396108138, "learning_rate": 5.984900712272827e-06, "loss": 0.1458984375, "step": 7390 }, { "epoch": 0.06394237836248713, "grad_norm": 33.941430788762815, "learning_rate": 5.984880290492727e-06, "loss": 0.2622802734375, "step": 7395 }, { "epoch": 0.0639856118840304, "grad_norm": 34.173552185190815, "learning_rate": 5.984859854946623e-06, "loss": 0.44094696044921877, "step": 7400 }, { "epoch": 0.06402884540557366, "grad_norm": 17.675553404670467, "learning_rate": 5.98483940563461e-06, "loss": 0.30146484375, "step": 7405 }, { "epoch": 0.06407207892711693, "grad_norm": 21.54488995523119, "learning_rate": 5.984818942556781e-06, "loss": 0.18397064208984376, "step": 7410 }, { "epoch": 0.0641153124486602, "grad_norm": 32.77587573333895, "learning_rate": 5.984798465713231e-06, "loss": 0.37373046875, "step": 7415 }, { "epoch": 0.06415854597020346, "grad_norm": 20.611462629401423, "learning_rate": 5.984777975104053e-06, "loss": 0.31929931640625, "step": 7420 }, { "epoch": 0.06420177949174673, "grad_norm": 1.7815938750495384, "learning_rate": 5.9847574707293444e-06, "loss": 0.305859375, "step": 7425 }, { "epoch": 0.06424501301328998, "grad_norm": 22.7156365191401, "learning_rate": 5.984736952589197e-06, "loss": 0.2639007568359375, "step": 7430 }, { "epoch": 0.06428824653483325, "grad_norm": 13.426348731339662, "learning_rate": 5.9847164206837075e-06, "loss": 0.0759429931640625, "step": 7435 }, { "epoch": 0.06433148005637651, "grad_norm": 17.09283765270829, "learning_rate": 5.984695875012969e-06, "loss": 0.2184661865234375, "step": 7440 }, { "epoch": 0.06437471357791978, "grad_norm": 38.318049160855054, "learning_rate": 5.984675315577076e-06, "loss": 0.21437225341796876, "step": 7445 }, { "epoch": 0.06441794709946304, "grad_norm": 2.501647704315048, "learning_rate": 5.984654742376125e-06, "loss": 0.398828125, "step": 7450 }, { "epoch": 0.06446118062100631, "grad_norm": 77.21801780619263, "learning_rate": 5.98463415541021e-06, "loss": 0.2189300537109375, "step": 7455 }, { "epoch": 0.06450441414254957, "grad_norm": 11.295709172320077, "learning_rate": 5.984613554679426e-06, "loss": 0.1172760009765625, "step": 7460 }, { "epoch": 0.06454764766409284, "grad_norm": 0.4053025354310935, "learning_rate": 5.9845929401838685e-06, "loss": 0.13114089965820314, "step": 7465 }, { "epoch": 0.06459088118563609, "grad_norm": 17.484352177749738, "learning_rate": 5.984572311923631e-06, "loss": 0.30492362976074217, "step": 7470 }, { "epoch": 0.06463411470717936, "grad_norm": 19.486732142561596, "learning_rate": 5.9845516698988106e-06, "loss": 0.10284881591796875, "step": 7475 }, { "epoch": 0.06467734822872262, "grad_norm": 5.15904081897677, "learning_rate": 5.984531014109502e-06, "loss": 0.665716552734375, "step": 7480 }, { "epoch": 0.06472058175026589, "grad_norm": 20.824932048208506, "learning_rate": 5.984510344555799e-06, "loss": 0.23197097778320314, "step": 7485 }, { "epoch": 0.06476381527180915, "grad_norm": 33.62214201208377, "learning_rate": 5.984489661237799e-06, "loss": 0.528289794921875, "step": 7490 }, { "epoch": 0.06480704879335242, "grad_norm": 37.65056540303211, "learning_rate": 5.984468964155595e-06, "loss": 0.43573684692382814, "step": 7495 }, { "epoch": 0.06485028231489567, "grad_norm": 29.83366514505084, "learning_rate": 5.984448253309285e-06, "loss": 0.2563507080078125, "step": 7500 }, { "epoch": 0.06489351583643894, "grad_norm": 3.3850276446806022, "learning_rate": 5.984427528698963e-06, "loss": 0.2460601806640625, "step": 7505 }, { "epoch": 0.0649367493579822, "grad_norm": 22.072598858378704, "learning_rate": 5.9844067903247254e-06, "loss": 0.261541748046875, "step": 7510 }, { "epoch": 0.06497998287952547, "grad_norm": 1.0323185807023358, "learning_rate": 5.984386038186667e-06, "loss": 0.2233612060546875, "step": 7515 }, { "epoch": 0.06502321640106873, "grad_norm": 32.68017262817794, "learning_rate": 5.9843652722848835e-06, "loss": 0.370751953125, "step": 7520 }, { "epoch": 0.065066449922612, "grad_norm": 12.054194972896859, "learning_rate": 5.984344492619471e-06, "loss": 0.24647216796875, "step": 7525 }, { "epoch": 0.06510968344415526, "grad_norm": 8.588117549477687, "learning_rate": 5.984323699190526e-06, "loss": 0.2399139404296875, "step": 7530 }, { "epoch": 0.06515291696569853, "grad_norm": 15.341306343122474, "learning_rate": 5.984302891998144e-06, "loss": 0.25660400390625, "step": 7535 }, { "epoch": 0.06519615048724178, "grad_norm": 51.2554690044937, "learning_rate": 5.98428207104242e-06, "loss": 0.2804222106933594, "step": 7540 }, { "epoch": 0.06523938400878505, "grad_norm": 34.91796706990348, "learning_rate": 5.984261236323451e-06, "loss": 0.10927200317382812, "step": 7545 }, { "epoch": 0.06528261753032831, "grad_norm": 37.45341237750162, "learning_rate": 5.9842403878413336e-06, "loss": 0.24090576171875, "step": 7550 }, { "epoch": 0.06532585105187158, "grad_norm": 2.3494372177288185, "learning_rate": 5.984219525596163e-06, "loss": 0.147064208984375, "step": 7555 }, { "epoch": 0.06536908457341484, "grad_norm": 19.565159144602912, "learning_rate": 5.984198649588035e-06, "loss": 0.5309232711791992, "step": 7560 }, { "epoch": 0.06541231809495811, "grad_norm": 7.73638338505827, "learning_rate": 5.984177759817047e-06, "loss": 0.0853668212890625, "step": 7565 }, { "epoch": 0.06545555161650136, "grad_norm": 11.94442206252975, "learning_rate": 5.984156856283295e-06, "loss": 0.1072723388671875, "step": 7570 }, { "epoch": 0.06549878513804464, "grad_norm": 16.4943789358292, "learning_rate": 5.9841359389868744e-06, "loss": 0.39105224609375, "step": 7575 }, { "epoch": 0.06554201865958789, "grad_norm": 1.4442892134888994, "learning_rate": 5.984115007927883e-06, "loss": 0.1086395263671875, "step": 7580 }, { "epoch": 0.06558525218113116, "grad_norm": 0.18329003287352788, "learning_rate": 5.984094063106417e-06, "loss": 0.0332763671875, "step": 7585 }, { "epoch": 0.06562848570267442, "grad_norm": 5.805288645715813, "learning_rate": 5.9840731045225734e-06, "loss": 0.0970733642578125, "step": 7590 }, { "epoch": 0.06567171922421769, "grad_norm": 1.3674483006655709, "learning_rate": 5.9840521321764476e-06, "loss": 0.05819854736328125, "step": 7595 }, { "epoch": 0.06571495274576096, "grad_norm": 46.39266603302469, "learning_rate": 5.984031146068137e-06, "loss": 0.20220565795898438, "step": 7600 }, { "epoch": 0.06575818626730422, "grad_norm": 3.6860386479924987, "learning_rate": 5.984010146197739e-06, "loss": 0.459466552734375, "step": 7605 }, { "epoch": 0.06580141978884749, "grad_norm": 19.01197146955999, "learning_rate": 5.983989132565349e-06, "loss": 0.1209228515625, "step": 7610 }, { "epoch": 0.06584465331039074, "grad_norm": 35.66932693577322, "learning_rate": 5.9839681051710655e-06, "loss": 0.32550888061523436, "step": 7615 }, { "epoch": 0.06588788683193401, "grad_norm": 10.224292479033545, "learning_rate": 5.983947064014984e-06, "loss": 0.1306365966796875, "step": 7620 }, { "epoch": 0.06593112035347727, "grad_norm": 21.78701295915985, "learning_rate": 5.983926009097203e-06, "loss": 0.2920555114746094, "step": 7625 }, { "epoch": 0.06597435387502054, "grad_norm": 2.503849014850054, "learning_rate": 5.983904940417819e-06, "loss": 0.13079376220703126, "step": 7630 }, { "epoch": 0.0660175873965638, "grad_norm": 39.00914993962339, "learning_rate": 5.9838838579769295e-06, "loss": 0.350653076171875, "step": 7635 }, { "epoch": 0.06606082091810707, "grad_norm": 18.98580535328862, "learning_rate": 5.98386276177463e-06, "loss": 0.08719329833984375, "step": 7640 }, { "epoch": 0.06610405443965033, "grad_norm": 10.616453867606388, "learning_rate": 5.9838416518110194e-06, "loss": 0.07613525390625, "step": 7645 }, { "epoch": 0.0661472879611936, "grad_norm": 16.270010262077438, "learning_rate": 5.983820528086196e-06, "loss": 0.26969757080078127, "step": 7650 }, { "epoch": 0.06619052148273685, "grad_norm": 3.4163828068015465, "learning_rate": 5.983799390600255e-06, "loss": 0.1771270751953125, "step": 7655 }, { "epoch": 0.06623375500428012, "grad_norm": 4.924163370371957, "learning_rate": 5.983778239353295e-06, "loss": 0.3638866424560547, "step": 7660 }, { "epoch": 0.06627698852582338, "grad_norm": 31.049183344008593, "learning_rate": 5.983757074345413e-06, "loss": 0.1159423828125, "step": 7665 }, { "epoch": 0.06632022204736665, "grad_norm": 24.719980143781893, "learning_rate": 5.983735895576707e-06, "loss": 0.20744781494140624, "step": 7670 }, { "epoch": 0.06636345556890991, "grad_norm": 29.257243369226057, "learning_rate": 5.983714703047276e-06, "loss": 0.23479766845703126, "step": 7675 }, { "epoch": 0.06640668909045318, "grad_norm": 14.497931695350282, "learning_rate": 5.983693496757215e-06, "loss": 0.1124542236328125, "step": 7680 }, { "epoch": 0.06644992261199643, "grad_norm": 17.156242097993218, "learning_rate": 5.9836722767066235e-06, "loss": 0.11964111328125, "step": 7685 }, { "epoch": 0.0664931561335397, "grad_norm": 1.3790953165946407, "learning_rate": 5.983651042895599e-06, "loss": 0.1022918701171875, "step": 7690 }, { "epoch": 0.06653638965508296, "grad_norm": 7.741860498355035, "learning_rate": 5.98362979532424e-06, "loss": 0.08522491455078125, "step": 7695 }, { "epoch": 0.06657962317662623, "grad_norm": 0.9188409301132905, "learning_rate": 5.983608533992644e-06, "loss": 0.28893356323242186, "step": 7700 }, { "epoch": 0.06662285669816949, "grad_norm": 35.36519334004482, "learning_rate": 5.983587258900909e-06, "loss": 0.4942413330078125, "step": 7705 }, { "epoch": 0.06666609021971276, "grad_norm": 20.619604448132648, "learning_rate": 5.983565970049134e-06, "loss": 0.122235107421875, "step": 7710 }, { "epoch": 0.06670932374125602, "grad_norm": 35.924641640102394, "learning_rate": 5.983544667437415e-06, "loss": 0.358245849609375, "step": 7715 }, { "epoch": 0.06675255726279929, "grad_norm": 43.90976660022059, "learning_rate": 5.983523351065853e-06, "loss": 0.487646484375, "step": 7720 }, { "epoch": 0.06679579078434254, "grad_norm": 1.2392657644968261, "learning_rate": 5.983502020934544e-06, "loss": 0.5260566711425781, "step": 7725 }, { "epoch": 0.06683902430588581, "grad_norm": 35.414989859808074, "learning_rate": 5.983480677043586e-06, "loss": 0.144073486328125, "step": 7730 }, { "epoch": 0.06688225782742907, "grad_norm": 4.383571580944663, "learning_rate": 5.983459319393081e-06, "loss": 0.4257568359375, "step": 7735 }, { "epoch": 0.06692549134897234, "grad_norm": 4.019821862550249, "learning_rate": 5.983437947983125e-06, "loss": 0.8563232421875, "step": 7740 }, { "epoch": 0.0669687248705156, "grad_norm": 8.153475281121246, "learning_rate": 5.983416562813816e-06, "loss": 0.054180908203125, "step": 7745 }, { "epoch": 0.06701195839205887, "grad_norm": 10.12598252320204, "learning_rate": 5.983395163885253e-06, "loss": 0.1010986328125, "step": 7750 }, { "epoch": 0.06705519191360212, "grad_norm": 0.5289907215368841, "learning_rate": 5.983373751197536e-06, "loss": 0.10904998779296875, "step": 7755 }, { "epoch": 0.0670984254351454, "grad_norm": 82.13426182118599, "learning_rate": 5.983352324750763e-06, "loss": 0.56873779296875, "step": 7760 }, { "epoch": 0.06714165895668865, "grad_norm": 37.68522076892617, "learning_rate": 5.983330884545032e-06, "loss": 0.5251953125, "step": 7765 }, { "epoch": 0.06718489247823192, "grad_norm": 1.410539236294467, "learning_rate": 5.9833094305804425e-06, "loss": 0.24443359375, "step": 7770 }, { "epoch": 0.06722812599977518, "grad_norm": 47.74860816941741, "learning_rate": 5.983287962857094e-06, "loss": 0.2614349365234375, "step": 7775 }, { "epoch": 0.06727135952131845, "grad_norm": 19.04883280336779, "learning_rate": 5.983266481375085e-06, "loss": 0.355523681640625, "step": 7780 }, { "epoch": 0.06731459304286172, "grad_norm": 22.07582499790419, "learning_rate": 5.983244986134514e-06, "loss": 0.5911422729492187, "step": 7785 }, { "epoch": 0.06735782656440498, "grad_norm": 12.091379382447379, "learning_rate": 5.983223477135481e-06, "loss": 0.36107177734375, "step": 7790 }, { "epoch": 0.06740106008594825, "grad_norm": 3.139692512058579, "learning_rate": 5.983201954378086e-06, "loss": 0.2193115234375, "step": 7795 }, { "epoch": 0.0674442936074915, "grad_norm": 8.11250616086428, "learning_rate": 5.983180417862426e-06, "loss": 0.115667724609375, "step": 7800 }, { "epoch": 0.06748752712903477, "grad_norm": 2.839733045211475, "learning_rate": 5.983158867588602e-06, "loss": 0.21786575317382811, "step": 7805 }, { "epoch": 0.06753076065057803, "grad_norm": 5.391922890788551, "learning_rate": 5.983137303556713e-06, "loss": 0.1190093994140625, "step": 7810 }, { "epoch": 0.0675739941721213, "grad_norm": 43.89186880011146, "learning_rate": 5.9831157257668584e-06, "loss": 0.22034912109375, "step": 7815 }, { "epoch": 0.06761722769366456, "grad_norm": 6.275233176132006, "learning_rate": 5.983094134219137e-06, "loss": 0.3613433837890625, "step": 7820 }, { "epoch": 0.06766046121520783, "grad_norm": 4.158301767687799, "learning_rate": 5.983072528913649e-06, "loss": 0.16099853515625, "step": 7825 }, { "epoch": 0.06770369473675109, "grad_norm": 18.443528180326233, "learning_rate": 5.983050909850495e-06, "loss": 0.09251708984375, "step": 7830 }, { "epoch": 0.06774692825829436, "grad_norm": 21.31702896940149, "learning_rate": 5.9830292770297736e-06, "loss": 0.10356063842773437, "step": 7835 }, { "epoch": 0.06779016177983761, "grad_norm": 12.294628727380944, "learning_rate": 5.983007630451585e-06, "loss": 0.2249755859375, "step": 7840 }, { "epoch": 0.06783339530138088, "grad_norm": 57.17860253157511, "learning_rate": 5.982985970116027e-06, "loss": 0.4202880859375, "step": 7845 }, { "epoch": 0.06787662882292414, "grad_norm": 31.46114860567691, "learning_rate": 5.982964296023203e-06, "loss": 0.567578125, "step": 7850 }, { "epoch": 0.06791986234446741, "grad_norm": 0.18004906306977858, "learning_rate": 5.982942608173211e-06, "loss": 0.22883453369140624, "step": 7855 }, { "epoch": 0.06796309586601067, "grad_norm": 4.358086204837963, "learning_rate": 5.982920906566151e-06, "loss": 0.6825614929199219, "step": 7860 }, { "epoch": 0.06800632938755394, "grad_norm": 35.35226845385556, "learning_rate": 5.982899191202123e-06, "loss": 0.256707763671875, "step": 7865 }, { "epoch": 0.0680495629090972, "grad_norm": 27.441967008075725, "learning_rate": 5.9828774620812285e-06, "loss": 0.3933349609375, "step": 7870 }, { "epoch": 0.06809279643064046, "grad_norm": 14.48610786667105, "learning_rate": 5.982855719203566e-06, "loss": 0.5262252807617187, "step": 7875 }, { "epoch": 0.06813602995218372, "grad_norm": 7.8981391610342975, "learning_rate": 5.9828339625692365e-06, "loss": 0.065887451171875, "step": 7880 }, { "epoch": 0.06817926347372699, "grad_norm": 3.731364187816992, "learning_rate": 5.98281219217834e-06, "loss": 0.16113967895507814, "step": 7885 }, { "epoch": 0.06822249699527025, "grad_norm": 32.27005099893371, "learning_rate": 5.982790408030977e-06, "loss": 0.269757080078125, "step": 7890 }, { "epoch": 0.06826573051681352, "grad_norm": 16.68472617029849, "learning_rate": 5.982768610127251e-06, "loss": 0.1395050048828125, "step": 7895 }, { "epoch": 0.06830896403835678, "grad_norm": 14.45075649804219, "learning_rate": 5.9827467984672565e-06, "loss": 0.13636474609375, "step": 7900 }, { "epoch": 0.06835219755990005, "grad_norm": 7.835406943191326, "learning_rate": 5.982724973051098e-06, "loss": 0.4070281982421875, "step": 7905 }, { "epoch": 0.0683954310814433, "grad_norm": 43.04365184978359, "learning_rate": 5.982703133878877e-06, "loss": 0.4547607421875, "step": 7910 }, { "epoch": 0.06843866460298657, "grad_norm": 31.591623741869892, "learning_rate": 5.9826812809506905e-06, "loss": 0.173992919921875, "step": 7915 }, { "epoch": 0.06848189812452983, "grad_norm": 52.743707739241565, "learning_rate": 5.982659414266643e-06, "loss": 0.4704742431640625, "step": 7920 }, { "epoch": 0.0685251316460731, "grad_norm": 16.098964018387463, "learning_rate": 5.982637533826834e-06, "loss": 0.3514404296875, "step": 7925 }, { "epoch": 0.06856836516761636, "grad_norm": 6.901984263404069, "learning_rate": 5.982615639631364e-06, "loss": 0.1846832275390625, "step": 7930 }, { "epoch": 0.06861159868915963, "grad_norm": 13.441118099421248, "learning_rate": 5.982593731680334e-06, "loss": 0.1512451171875, "step": 7935 }, { "epoch": 0.06865483221070288, "grad_norm": 2.529961882081544, "learning_rate": 5.982571809973845e-06, "loss": 0.1293304443359375, "step": 7940 }, { "epoch": 0.06869806573224616, "grad_norm": 12.170030031166355, "learning_rate": 5.982549874511998e-06, "loss": 0.318865966796875, "step": 7945 }, { "epoch": 0.06874129925378941, "grad_norm": 27.632320558707654, "learning_rate": 5.982527925294895e-06, "loss": 0.2369873046875, "step": 7950 }, { "epoch": 0.06878453277533268, "grad_norm": 3.454060576961688, "learning_rate": 5.982505962322637e-06, "loss": 0.1507598876953125, "step": 7955 }, { "epoch": 0.06882776629687594, "grad_norm": 16.487473720247966, "learning_rate": 5.982483985595324e-06, "loss": 0.23675537109375, "step": 7960 }, { "epoch": 0.06887099981841921, "grad_norm": 1.559741170589323, "learning_rate": 5.98246199511306e-06, "loss": 0.0931182861328125, "step": 7965 }, { "epoch": 0.06891423333996248, "grad_norm": 16.217738091360406, "learning_rate": 5.982439990875943e-06, "loss": 0.5340240478515625, "step": 7970 }, { "epoch": 0.06895746686150574, "grad_norm": 6.45516762358546, "learning_rate": 5.982417972884079e-06, "loss": 0.205999755859375, "step": 7975 }, { "epoch": 0.06900070038304901, "grad_norm": 23.970501663987527, "learning_rate": 5.982395941137565e-06, "loss": 0.44271240234375, "step": 7980 }, { "epoch": 0.06904393390459226, "grad_norm": 31.607876031415316, "learning_rate": 5.982373895636505e-06, "loss": 0.43511962890625, "step": 7985 }, { "epoch": 0.06908716742613553, "grad_norm": 7.861821136974702, "learning_rate": 5.982351836380999e-06, "loss": 0.409100341796875, "step": 7990 }, { "epoch": 0.06913040094767879, "grad_norm": 3.666068273506239, "learning_rate": 5.98232976337115e-06, "loss": 0.1139617919921875, "step": 7995 }, { "epoch": 0.06917363446922206, "grad_norm": 10.584145790832588, "learning_rate": 5.982307676607061e-06, "loss": 0.46164703369140625, "step": 8000 }, { "epoch": 0.06921686799076532, "grad_norm": 43.19877108520564, "learning_rate": 5.982285576088832e-06, "loss": 0.200830078125, "step": 8005 }, { "epoch": 0.06926010151230859, "grad_norm": 25.29206732492556, "learning_rate": 5.982263461816565e-06, "loss": 0.26909866333007815, "step": 8010 }, { "epoch": 0.06930333503385185, "grad_norm": 1.7355386404718913, "learning_rate": 5.982241333790362e-06, "loss": 0.149371337890625, "step": 8015 }, { "epoch": 0.06934656855539512, "grad_norm": 26.056560921738043, "learning_rate": 5.9822191920103264e-06, "loss": 0.298193359375, "step": 8020 }, { "epoch": 0.06938980207693837, "grad_norm": 20.998431187668636, "learning_rate": 5.982197036476559e-06, "loss": 0.24342041015625, "step": 8025 }, { "epoch": 0.06943303559848164, "grad_norm": 1.5792632470145378, "learning_rate": 5.982174867189163e-06, "loss": 0.6676761627197265, "step": 8030 }, { "epoch": 0.0694762691200249, "grad_norm": 4.755580294095398, "learning_rate": 5.982152684148239e-06, "loss": 0.126837158203125, "step": 8035 }, { "epoch": 0.06951950264156817, "grad_norm": 2.6607506019034726, "learning_rate": 5.982130487353891e-06, "loss": 0.5063777923583984, "step": 8040 }, { "epoch": 0.06956273616311143, "grad_norm": 4.184742831279821, "learning_rate": 5.98210827680622e-06, "loss": 0.34389419555664064, "step": 8045 }, { "epoch": 0.0696059696846547, "grad_norm": 18.07283489465846, "learning_rate": 5.98208605250533e-06, "loss": 0.30757904052734375, "step": 8050 }, { "epoch": 0.06964920320619795, "grad_norm": 41.50464103864124, "learning_rate": 5.982063814451323e-06, "loss": 0.21404342651367186, "step": 8055 }, { "epoch": 0.06969243672774122, "grad_norm": 4.332002103598077, "learning_rate": 5.9820415626443e-06, "loss": 0.20894775390625, "step": 8060 }, { "epoch": 0.06973567024928448, "grad_norm": 20.518120310976286, "learning_rate": 5.982019297084366e-06, "loss": 0.324847412109375, "step": 8065 }, { "epoch": 0.06977890377082775, "grad_norm": 1.1558081684738541, "learning_rate": 5.981997017771621e-06, "loss": 0.2871795654296875, "step": 8070 }, { "epoch": 0.06982213729237101, "grad_norm": 10.370564134055405, "learning_rate": 5.98197472470617e-06, "loss": 0.08283843994140624, "step": 8075 }, { "epoch": 0.06986537081391428, "grad_norm": 10.004922962447273, "learning_rate": 5.981952417888116e-06, "loss": 0.26651611328125, "step": 8080 }, { "epoch": 0.06990860433545754, "grad_norm": 16.410953912146795, "learning_rate": 5.98193009731756e-06, "loss": 0.324609375, "step": 8085 }, { "epoch": 0.0699518378570008, "grad_norm": 33.250737886682174, "learning_rate": 5.981907762994605e-06, "loss": 0.7393051147460937, "step": 8090 }, { "epoch": 0.06999507137854406, "grad_norm": 11.041957900739206, "learning_rate": 5.981885414919356e-06, "loss": 0.145684814453125, "step": 8095 }, { "epoch": 0.07003830490008733, "grad_norm": 55.183275552371896, "learning_rate": 5.981863053091915e-06, "loss": 0.2662330627441406, "step": 8100 }, { "epoch": 0.07008153842163059, "grad_norm": 79.49009703996923, "learning_rate": 5.981840677512385e-06, "loss": 0.47430953979492185, "step": 8105 }, { "epoch": 0.07012477194317386, "grad_norm": 21.729540160559882, "learning_rate": 5.981818288180869e-06, "loss": 0.09823455810546874, "step": 8110 }, { "epoch": 0.07016800546471712, "grad_norm": 3.8306554554614527, "learning_rate": 5.981795885097471e-06, "loss": 0.22764129638671876, "step": 8115 }, { "epoch": 0.07021123898626039, "grad_norm": 10.33081411287576, "learning_rate": 5.981773468262293e-06, "loss": 0.144268798828125, "step": 8120 }, { "epoch": 0.07025447250780364, "grad_norm": 5.747947322763925, "learning_rate": 5.9817510376754395e-06, "loss": 0.144677734375, "step": 8125 }, { "epoch": 0.07029770602934692, "grad_norm": 1.8018408894047788, "learning_rate": 5.981728593337015e-06, "loss": 0.138592529296875, "step": 8130 }, { "epoch": 0.07034093955089017, "grad_norm": 23.593972860980667, "learning_rate": 5.98170613524712e-06, "loss": 0.1920440673828125, "step": 8135 }, { "epoch": 0.07038417307243344, "grad_norm": 14.416826880078133, "learning_rate": 5.98168366340586e-06, "loss": 0.38294677734375, "step": 8140 }, { "epoch": 0.0704274065939767, "grad_norm": 23.722746393831887, "learning_rate": 5.981661177813339e-06, "loss": 0.387890625, "step": 8145 }, { "epoch": 0.07047064011551997, "grad_norm": 19.857626227991297, "learning_rate": 5.98163867846966e-06, "loss": 0.17018585205078124, "step": 8150 }, { "epoch": 0.07051387363706324, "grad_norm": 4.76262708812557, "learning_rate": 5.981616165374927e-06, "loss": 0.286517333984375, "step": 8155 }, { "epoch": 0.0705571071586065, "grad_norm": 7.750049387733714, "learning_rate": 5.981593638529244e-06, "loss": 0.21763153076171876, "step": 8160 }, { "epoch": 0.07060034068014977, "grad_norm": 2.6729398695869246, "learning_rate": 5.981571097932713e-06, "loss": 0.44575843811035154, "step": 8165 }, { "epoch": 0.07064357420169302, "grad_norm": 2.0024402223314346, "learning_rate": 5.981548543585441e-06, "loss": 0.0902069091796875, "step": 8170 }, { "epoch": 0.0706868077232363, "grad_norm": 1.706919489062809, "learning_rate": 5.98152597548753e-06, "loss": 0.2329345703125, "step": 8175 }, { "epoch": 0.07073004124477955, "grad_norm": 8.103285300765922, "learning_rate": 5.981503393639086e-06, "loss": 0.11658859252929688, "step": 8180 }, { "epoch": 0.07077327476632282, "grad_norm": 8.497273985203831, "learning_rate": 5.98148079804021e-06, "loss": 0.2077667236328125, "step": 8185 }, { "epoch": 0.07081650828786608, "grad_norm": 5.02916837659049, "learning_rate": 5.981458188691008e-06, "loss": 0.12254486083984376, "step": 8190 }, { "epoch": 0.07085974180940935, "grad_norm": 6.523947725414758, "learning_rate": 5.981435565591586e-06, "loss": 0.3102294921875, "step": 8195 }, { "epoch": 0.0709029753309526, "grad_norm": 0.08382796803517993, "learning_rate": 5.981412928742045e-06, "loss": 0.4051685333251953, "step": 8200 }, { "epoch": 0.07094620885249588, "grad_norm": 59.5853984776661, "learning_rate": 5.981390278142492e-06, "loss": 0.55548095703125, "step": 8205 }, { "epoch": 0.07098944237403913, "grad_norm": 15.52611220004276, "learning_rate": 5.98136761379303e-06, "loss": 0.491363525390625, "step": 8210 }, { "epoch": 0.0710326758955824, "grad_norm": 23.45112311882645, "learning_rate": 5.981344935693764e-06, "loss": 0.13139190673828124, "step": 8215 }, { "epoch": 0.07107590941712566, "grad_norm": 3.079739131347528, "learning_rate": 5.981322243844799e-06, "loss": 0.24001846313476563, "step": 8220 }, { "epoch": 0.07111914293866893, "grad_norm": 25.92509394012825, "learning_rate": 5.981299538246238e-06, "loss": 0.278045654296875, "step": 8225 }, { "epoch": 0.07116237646021219, "grad_norm": 15.793702877732578, "learning_rate": 5.981276818898188e-06, "loss": 0.14574737548828126, "step": 8230 }, { "epoch": 0.07120560998175546, "grad_norm": 42.64136054552176, "learning_rate": 5.981254085800753e-06, "loss": 0.409820556640625, "step": 8235 }, { "epoch": 0.07124884350329871, "grad_norm": 48.772555644234586, "learning_rate": 5.9812313389540365e-06, "loss": 0.339117431640625, "step": 8240 }, { "epoch": 0.07129207702484198, "grad_norm": 9.025734240556863, "learning_rate": 5.981208578358146e-06, "loss": 0.144683837890625, "step": 8245 }, { "epoch": 0.07133531054638524, "grad_norm": 2.129919461323107, "learning_rate": 5.981185804013184e-06, "loss": 0.3052398681640625, "step": 8250 }, { "epoch": 0.07137854406792851, "grad_norm": 7.436220916187445, "learning_rate": 5.981163015919257e-06, "loss": 0.4022735595703125, "step": 8255 }, { "epoch": 0.07142177758947177, "grad_norm": 10.104015953275848, "learning_rate": 5.981140214076469e-06, "loss": 0.3692626953125, "step": 8260 }, { "epoch": 0.07146501111101504, "grad_norm": 1.2899925204267642, "learning_rate": 5.9811173984849255e-06, "loss": 0.0333831787109375, "step": 8265 }, { "epoch": 0.0715082446325583, "grad_norm": 15.89249378990258, "learning_rate": 5.981094569144733e-06, "loss": 0.14394073486328124, "step": 8270 }, { "epoch": 0.07155147815410157, "grad_norm": 33.70302413700851, "learning_rate": 5.981071726055995e-06, "loss": 0.124896240234375, "step": 8275 }, { "epoch": 0.07159471167564482, "grad_norm": 18.907049301197375, "learning_rate": 5.9810488692188175e-06, "loss": 0.3084320068359375, "step": 8280 }, { "epoch": 0.0716379451971881, "grad_norm": 9.53544064851108, "learning_rate": 5.981025998633307e-06, "loss": 0.124700927734375, "step": 8285 }, { "epoch": 0.07168117871873135, "grad_norm": 12.29873438935736, "learning_rate": 5.981003114299567e-06, "loss": 0.136895751953125, "step": 8290 }, { "epoch": 0.07172441224027462, "grad_norm": 41.94627989244765, "learning_rate": 5.980980216217705e-06, "loss": 0.09967041015625, "step": 8295 }, { "epoch": 0.07176764576181788, "grad_norm": 0.9888885162434278, "learning_rate": 5.980957304387824e-06, "loss": 0.17067108154296876, "step": 8300 }, { "epoch": 0.07181087928336115, "grad_norm": 11.99870816078589, "learning_rate": 5.980934378810033e-06, "loss": 0.1983489990234375, "step": 8305 }, { "epoch": 0.0718541128049044, "grad_norm": 24.03197270082046, "learning_rate": 5.9809114394844345e-06, "loss": 0.26607666015625, "step": 8310 }, { "epoch": 0.07189734632644768, "grad_norm": 8.614444032749875, "learning_rate": 5.980888486411138e-06, "loss": 0.3445556640625, "step": 8315 }, { "epoch": 0.07194057984799093, "grad_norm": 4.422013995391082, "learning_rate": 5.9808655195902445e-06, "loss": 0.3189483642578125, "step": 8320 }, { "epoch": 0.0719838133695342, "grad_norm": 12.867217606308973, "learning_rate": 5.980842539021864e-06, "loss": 0.197943115234375, "step": 8325 }, { "epoch": 0.07202704689107746, "grad_norm": 6.956444112967165, "learning_rate": 5.980819544706102e-06, "loss": 0.15053558349609375, "step": 8330 }, { "epoch": 0.07207028041262073, "grad_norm": 70.09210344475191, "learning_rate": 5.980796536643062e-06, "loss": 0.503436279296875, "step": 8335 }, { "epoch": 0.072113513934164, "grad_norm": 26.13661795505296, "learning_rate": 5.9807735148328534e-06, "loss": 0.136578369140625, "step": 8340 }, { "epoch": 0.07215674745570726, "grad_norm": 0.42794292113017957, "learning_rate": 5.98075047927558e-06, "loss": 0.21656036376953125, "step": 8345 }, { "epoch": 0.07219998097725053, "grad_norm": 18.902756708818604, "learning_rate": 5.980727429971348e-06, "loss": 0.25789566040039064, "step": 8350 }, { "epoch": 0.07224321449879378, "grad_norm": 2.2551442825069277, "learning_rate": 5.980704366920266e-06, "loss": 0.1163604736328125, "step": 8355 }, { "epoch": 0.07228644802033705, "grad_norm": 24.148052568334506, "learning_rate": 5.980681290122438e-06, "loss": 0.26406707763671877, "step": 8360 }, { "epoch": 0.07232968154188031, "grad_norm": 34.855091930324, "learning_rate": 5.980658199577971e-06, "loss": 0.2327789306640625, "step": 8365 }, { "epoch": 0.07237291506342358, "grad_norm": 4.2942530298820705, "learning_rate": 5.980635095286972e-06, "loss": 0.17288589477539062, "step": 8370 }, { "epoch": 0.07241614858496684, "grad_norm": 34.58465214462065, "learning_rate": 5.9806119772495474e-06, "loss": 0.250225830078125, "step": 8375 }, { "epoch": 0.07245938210651011, "grad_norm": 10.497640962828019, "learning_rate": 5.980588845465803e-06, "loss": 0.14734039306640626, "step": 8380 }, { "epoch": 0.07250261562805337, "grad_norm": 49.47006250548812, "learning_rate": 5.980565699935847e-06, "loss": 0.3252555847167969, "step": 8385 }, { "epoch": 0.07254584914959664, "grad_norm": 21.311334980591017, "learning_rate": 5.980542540659786e-06, "loss": 0.3034637451171875, "step": 8390 }, { "epoch": 0.07258908267113989, "grad_norm": 50.51008572489165, "learning_rate": 5.9805193676377246e-06, "loss": 0.272845458984375, "step": 8395 }, { "epoch": 0.07263231619268316, "grad_norm": 4.925363413943632, "learning_rate": 5.980496180869772e-06, "loss": 0.0630828857421875, "step": 8400 }, { "epoch": 0.07267554971422642, "grad_norm": 9.694011137805518, "learning_rate": 5.980472980356035e-06, "loss": 0.379205322265625, "step": 8405 }, { "epoch": 0.07271878323576969, "grad_norm": 3.530591431303073, "learning_rate": 5.980449766096619e-06, "loss": 0.187908935546875, "step": 8410 }, { "epoch": 0.07276201675731295, "grad_norm": 28.840830649105246, "learning_rate": 5.980426538091633e-06, "loss": 0.25074310302734376, "step": 8415 }, { "epoch": 0.07280525027885622, "grad_norm": 30.386412789699012, "learning_rate": 5.980403296341182e-06, "loss": 0.411578369140625, "step": 8420 }, { "epoch": 0.07284848380039947, "grad_norm": 19.740677985395237, "learning_rate": 5.980380040845374e-06, "loss": 0.28632659912109376, "step": 8425 }, { "epoch": 0.07289171732194275, "grad_norm": 22.406233973154812, "learning_rate": 5.9803567716043186e-06, "loss": 0.1901153564453125, "step": 8430 }, { "epoch": 0.072934950843486, "grad_norm": 66.37424900695468, "learning_rate": 5.98033348861812e-06, "loss": 0.2841522216796875, "step": 8435 }, { "epoch": 0.07297818436502927, "grad_norm": 4.459032588012246, "learning_rate": 5.980310191886887e-06, "loss": 0.2472900390625, "step": 8440 }, { "epoch": 0.07302141788657253, "grad_norm": 8.765534353740888, "learning_rate": 5.980286881410727e-06, "loss": 0.19537353515625, "step": 8445 }, { "epoch": 0.0730646514081158, "grad_norm": 1.7899989457024155, "learning_rate": 5.980263557189746e-06, "loss": 0.25376052856445314, "step": 8450 }, { "epoch": 0.07310788492965906, "grad_norm": 1.3952874896042073, "learning_rate": 5.980240219224053e-06, "loss": 0.486309814453125, "step": 8455 }, { "epoch": 0.07315111845120233, "grad_norm": 3.140620916436154, "learning_rate": 5.980216867513756e-06, "loss": 0.5535568237304688, "step": 8460 }, { "epoch": 0.07319435197274558, "grad_norm": 0.8788766004064322, "learning_rate": 5.980193502058962e-06, "loss": 0.244537353515625, "step": 8465 }, { "epoch": 0.07323758549428885, "grad_norm": 0.32904235351836747, "learning_rate": 5.98017012285978e-06, "loss": 0.1657135009765625, "step": 8470 }, { "epoch": 0.07328081901583211, "grad_norm": 18.521942157382995, "learning_rate": 5.980146729916315e-06, "loss": 0.13960723876953124, "step": 8475 }, { "epoch": 0.07332405253737538, "grad_norm": 7.262380024882953, "learning_rate": 5.980123323228678e-06, "loss": 0.13132476806640625, "step": 8480 }, { "epoch": 0.07336728605891864, "grad_norm": 23.61284234552271, "learning_rate": 5.9800999027969746e-06, "loss": 0.48829345703125, "step": 8485 }, { "epoch": 0.07341051958046191, "grad_norm": 9.82280166129483, "learning_rate": 5.980076468621315e-06, "loss": 0.2700439453125, "step": 8490 }, { "epoch": 0.07345375310200516, "grad_norm": 23.868154662778483, "learning_rate": 5.980053020701805e-06, "loss": 1.05028076171875, "step": 8495 }, { "epoch": 0.07349698662354844, "grad_norm": 28.825116870364386, "learning_rate": 5.980029559038554e-06, "loss": 0.2683013916015625, "step": 8500 }, { "epoch": 0.07354022014509169, "grad_norm": 31.0153875424708, "learning_rate": 5.980006083631669e-06, "loss": 0.20293731689453126, "step": 8505 }, { "epoch": 0.07358345366663496, "grad_norm": 1.48541594483469, "learning_rate": 5.97998259448126e-06, "loss": 0.24383544921875, "step": 8510 }, { "epoch": 0.07362668718817822, "grad_norm": 13.873857653963979, "learning_rate": 5.979959091587435e-06, "loss": 0.23358230590820311, "step": 8515 }, { "epoch": 0.07366992070972149, "grad_norm": 13.944275845824746, "learning_rate": 5.979935574950302e-06, "loss": 0.16703338623046876, "step": 8520 }, { "epoch": 0.07371315423126476, "grad_norm": 7.524155901794601, "learning_rate": 5.9799120445699695e-06, "loss": 0.2816253662109375, "step": 8525 }, { "epoch": 0.07375638775280802, "grad_norm": 42.77926182503163, "learning_rate": 5.979888500446546e-06, "loss": 0.28407135009765627, "step": 8530 }, { "epoch": 0.07379962127435129, "grad_norm": 29.991381828678367, "learning_rate": 5.979864942580139e-06, "loss": 0.276727294921875, "step": 8535 }, { "epoch": 0.07384285479589454, "grad_norm": 24.833029996406392, "learning_rate": 5.97984137097086e-06, "loss": 0.1551605224609375, "step": 8540 }, { "epoch": 0.07388608831743781, "grad_norm": 3.4164219391355397, "learning_rate": 5.979817785618814e-06, "loss": 0.15417404174804689, "step": 8545 }, { "epoch": 0.07392932183898107, "grad_norm": 4.176854125034674, "learning_rate": 5.9797941865241124e-06, "loss": 0.3477264404296875, "step": 8550 }, { "epoch": 0.07397255536052434, "grad_norm": 29.940216630937606, "learning_rate": 5.979770573686863e-06, "loss": 0.416558837890625, "step": 8555 }, { "epoch": 0.0740157888820676, "grad_norm": 3.1097364603081648, "learning_rate": 5.979746947107176e-06, "loss": 0.23563690185546876, "step": 8560 }, { "epoch": 0.07405902240361087, "grad_norm": 15.887856813376283, "learning_rate": 5.979723306785158e-06, "loss": 0.25849609375, "step": 8565 }, { "epoch": 0.07410225592515413, "grad_norm": 1.62167190708301, "learning_rate": 5.9796996527209204e-06, "loss": 0.1259326934814453, "step": 8570 }, { "epoch": 0.0741454894466974, "grad_norm": 9.404633679758016, "learning_rate": 5.979675984914572e-06, "loss": 0.12180938720703124, "step": 8575 }, { "epoch": 0.07418872296824065, "grad_norm": 49.46099396861432, "learning_rate": 5.97965230336622e-06, "loss": 0.307073974609375, "step": 8580 }, { "epoch": 0.07423195648978392, "grad_norm": 3.5038644406011126, "learning_rate": 5.9796286080759745e-06, "loss": 0.348529052734375, "step": 8585 }, { "epoch": 0.07427519001132718, "grad_norm": 66.36948051703378, "learning_rate": 5.979604899043946e-06, "loss": 0.20474090576171874, "step": 8590 }, { "epoch": 0.07431842353287045, "grad_norm": 4.54789300379334, "learning_rate": 5.979581176270243e-06, "loss": 0.085943603515625, "step": 8595 }, { "epoch": 0.07436165705441371, "grad_norm": 30.684473829856486, "learning_rate": 5.979557439754975e-06, "loss": 0.08620834350585938, "step": 8600 }, { "epoch": 0.07440489057595698, "grad_norm": 18.25627432848076, "learning_rate": 5.979533689498251e-06, "loss": 0.2137237548828125, "step": 8605 }, { "epoch": 0.07444812409750023, "grad_norm": 23.001250763112548, "learning_rate": 5.979509925500181e-06, "loss": 0.26997222900390627, "step": 8610 }, { "epoch": 0.0744913576190435, "grad_norm": 45.17658456861925, "learning_rate": 5.979486147760874e-06, "loss": 0.3775238037109375, "step": 8615 }, { "epoch": 0.07453459114058676, "grad_norm": 1.5211602040033727, "learning_rate": 5.979462356280441e-06, "loss": 0.4538166046142578, "step": 8620 }, { "epoch": 0.07457782466213003, "grad_norm": 7.7748948728654526, "learning_rate": 5.97943855105899e-06, "loss": 0.1942058563232422, "step": 8625 }, { "epoch": 0.07462105818367329, "grad_norm": 12.383266648927913, "learning_rate": 5.9794147320966324e-06, "loss": 0.28917884826660156, "step": 8630 }, { "epoch": 0.07466429170521656, "grad_norm": 5.864727299485902, "learning_rate": 5.9793908993934765e-06, "loss": 0.058160400390625, "step": 8635 }, { "epoch": 0.07470752522675982, "grad_norm": 7.756231108092057, "learning_rate": 5.979367052949634e-06, "loss": 0.442388916015625, "step": 8640 }, { "epoch": 0.07475075874830309, "grad_norm": 98.92990538095432, "learning_rate": 5.979343192765214e-06, "loss": 0.1935577392578125, "step": 8645 }, { "epoch": 0.07479399226984634, "grad_norm": 3.078882405351248, "learning_rate": 5.979319318840326e-06, "loss": 0.0875335693359375, "step": 8650 }, { "epoch": 0.07483722579138961, "grad_norm": 20.562792249841646, "learning_rate": 5.9792954311750806e-06, "loss": 0.249945068359375, "step": 8655 }, { "epoch": 0.07488045931293287, "grad_norm": 1.717700697938772, "learning_rate": 5.979271529769587e-06, "loss": 0.4210693359375, "step": 8660 }, { "epoch": 0.07492369283447614, "grad_norm": 4.982152229539973, "learning_rate": 5.979247614623958e-06, "loss": 0.4058509826660156, "step": 8665 }, { "epoch": 0.0749669263560194, "grad_norm": 13.353766271088958, "learning_rate": 5.979223685738301e-06, "loss": 0.22115631103515626, "step": 8670 }, { "epoch": 0.07501015987756267, "grad_norm": 2.1529207919464772, "learning_rate": 5.979199743112728e-06, "loss": 0.13254241943359374, "step": 8675 }, { "epoch": 0.07505339339910592, "grad_norm": 20.153691832846636, "learning_rate": 5.979175786747349e-06, "loss": 0.22319812774658204, "step": 8680 }, { "epoch": 0.0750966269206492, "grad_norm": 25.164120812975742, "learning_rate": 5.979151816642275e-06, "loss": 0.46113739013671873, "step": 8685 }, { "epoch": 0.07513986044219245, "grad_norm": 39.12947025869925, "learning_rate": 5.9791278327976164e-06, "loss": 0.29049072265625, "step": 8690 }, { "epoch": 0.07518309396373572, "grad_norm": 5.67084909882586, "learning_rate": 5.979103835213482e-06, "loss": 0.144329833984375, "step": 8695 }, { "epoch": 0.07522632748527898, "grad_norm": 0.4171443154666093, "learning_rate": 5.979079823889985e-06, "loss": 0.1619537353515625, "step": 8700 }, { "epoch": 0.07526956100682225, "grad_norm": 6.108056400282241, "learning_rate": 5.979055798827234e-06, "loss": 0.2948272705078125, "step": 8705 }, { "epoch": 0.07531279452836552, "grad_norm": 2.4910703768477287, "learning_rate": 5.979031760025343e-06, "loss": 0.135479736328125, "step": 8710 }, { "epoch": 0.07535602804990878, "grad_norm": 16.409211827634966, "learning_rate": 5.979007707484419e-06, "loss": 0.29057159423828127, "step": 8715 }, { "epoch": 0.07539926157145205, "grad_norm": 2.636407925360121, "learning_rate": 5.978983641204575e-06, "loss": 0.061871337890625, "step": 8720 }, { "epoch": 0.0754424950929953, "grad_norm": 28.948328362333765, "learning_rate": 5.978959561185922e-06, "loss": 0.29965972900390625, "step": 8725 }, { "epoch": 0.07548572861453857, "grad_norm": 7.415788497065855, "learning_rate": 5.97893546742857e-06, "loss": 0.2126708984375, "step": 8730 }, { "epoch": 0.07552896213608183, "grad_norm": 4.4489457048365155, "learning_rate": 5.978911359932632e-06, "loss": 0.16948699951171875, "step": 8735 }, { "epoch": 0.0755721956576251, "grad_norm": 18.16599524644508, "learning_rate": 5.978887238698217e-06, "loss": 0.298797607421875, "step": 8740 }, { "epoch": 0.07561542917916836, "grad_norm": 14.90908714965715, "learning_rate": 5.978863103725438e-06, "loss": 0.1343109130859375, "step": 8745 }, { "epoch": 0.07565866270071163, "grad_norm": 0.45360825398195065, "learning_rate": 5.978838955014406e-06, "loss": 0.2889373779296875, "step": 8750 }, { "epoch": 0.07570189622225489, "grad_norm": 26.963020594547434, "learning_rate": 5.978814792565231e-06, "loss": 0.5131210327148438, "step": 8755 }, { "epoch": 0.07574512974379816, "grad_norm": 1.4334256336322186, "learning_rate": 5.978790616378026e-06, "loss": 0.38660888671875, "step": 8760 }, { "epoch": 0.07578836326534141, "grad_norm": 9.3738240001971, "learning_rate": 5.978766426452901e-06, "loss": 0.38077239990234374, "step": 8765 }, { "epoch": 0.07583159678688468, "grad_norm": 15.729166432269185, "learning_rate": 5.9787422227899684e-06, "loss": 0.168768310546875, "step": 8770 }, { "epoch": 0.07587483030842794, "grad_norm": 40.442895326182644, "learning_rate": 5.978718005389341e-06, "loss": 0.384716796875, "step": 8775 }, { "epoch": 0.07591806382997121, "grad_norm": 3.011283447772427, "learning_rate": 5.978693774251129e-06, "loss": 0.227960205078125, "step": 8780 }, { "epoch": 0.07596129735151447, "grad_norm": 19.542844097896786, "learning_rate": 5.978669529375444e-06, "loss": 0.1964141845703125, "step": 8785 }, { "epoch": 0.07600453087305774, "grad_norm": 27.67942003716519, "learning_rate": 5.978645270762399e-06, "loss": 0.7707172393798828, "step": 8790 }, { "epoch": 0.076047764394601, "grad_norm": 14.830066255813845, "learning_rate": 5.978620998412104e-06, "loss": 0.151080322265625, "step": 8795 }, { "epoch": 0.07609099791614427, "grad_norm": 13.174721974608907, "learning_rate": 5.978596712324673e-06, "loss": 0.08379669189453125, "step": 8800 }, { "epoch": 0.07613423143768752, "grad_norm": 4.428950445410255, "learning_rate": 5.978572412500218e-06, "loss": 0.15521240234375, "step": 8805 }, { "epoch": 0.07617746495923079, "grad_norm": 69.20541230313432, "learning_rate": 5.978548098938849e-06, "loss": 0.361865234375, "step": 8810 }, { "epoch": 0.07622069848077405, "grad_norm": 52.33422244223033, "learning_rate": 5.97852377164068e-06, "loss": 0.2043285369873047, "step": 8815 }, { "epoch": 0.07626393200231732, "grad_norm": 42.82582061429009, "learning_rate": 5.978499430605822e-06, "loss": 0.363775634765625, "step": 8820 }, { "epoch": 0.07630716552386058, "grad_norm": 22.450689169208253, "learning_rate": 5.9784750758343875e-06, "loss": 0.16881103515625, "step": 8825 }, { "epoch": 0.07635039904540385, "grad_norm": 12.500135669372403, "learning_rate": 5.97845070732649e-06, "loss": 0.4246917724609375, "step": 8830 }, { "epoch": 0.0763936325669471, "grad_norm": 12.82917316332589, "learning_rate": 5.978426325082241e-06, "loss": 0.31070709228515625, "step": 8835 }, { "epoch": 0.07643686608849037, "grad_norm": 20.46978833020878, "learning_rate": 5.978401929101753e-06, "loss": 0.23080596923828126, "step": 8840 }, { "epoch": 0.07648009961003363, "grad_norm": 15.247873390279445, "learning_rate": 5.978377519385138e-06, "loss": 0.235003662109375, "step": 8845 }, { "epoch": 0.0765233331315769, "grad_norm": 11.789648337478226, "learning_rate": 5.97835309593251e-06, "loss": 0.1722259521484375, "step": 8850 }, { "epoch": 0.07656656665312016, "grad_norm": 3.2514290681226203, "learning_rate": 5.978328658743979e-06, "loss": 0.22811279296875, "step": 8855 }, { "epoch": 0.07660980017466343, "grad_norm": 20.234399745363618, "learning_rate": 5.978304207819661e-06, "loss": 0.29125518798828126, "step": 8860 }, { "epoch": 0.07665303369620668, "grad_norm": 10.938279127584606, "learning_rate": 5.9782797431596665e-06, "loss": 0.25845947265625, "step": 8865 }, { "epoch": 0.07669626721774996, "grad_norm": 6.147141616643055, "learning_rate": 5.978255264764109e-06, "loss": 0.32234344482421873, "step": 8870 }, { "epoch": 0.07673950073929321, "grad_norm": 19.477513503709105, "learning_rate": 5.978230772633101e-06, "loss": 0.1149261474609375, "step": 8875 }, { "epoch": 0.07678273426083648, "grad_norm": 16.25122899367549, "learning_rate": 5.978206266766756e-06, "loss": 0.124334716796875, "step": 8880 }, { "epoch": 0.07682596778237974, "grad_norm": 43.58474260544167, "learning_rate": 5.978181747165187e-06, "loss": 0.18107337951660157, "step": 8885 }, { "epoch": 0.07686920130392301, "grad_norm": 30.14415089438932, "learning_rate": 5.978157213828508e-06, "loss": 0.2802093505859375, "step": 8890 }, { "epoch": 0.07691243482546628, "grad_norm": 6.981593047827186, "learning_rate": 5.978132666756829e-06, "loss": 0.30447025299072267, "step": 8895 }, { "epoch": 0.07695566834700954, "grad_norm": 28.191872106722226, "learning_rate": 5.978108105950266e-06, "loss": 0.17995796203613282, "step": 8900 }, { "epoch": 0.07699890186855281, "grad_norm": 2.13557045401864, "learning_rate": 5.978083531408932e-06, "loss": 0.355859375, "step": 8905 }, { "epoch": 0.07704213539009606, "grad_norm": 42.666784875551826, "learning_rate": 5.97805894313294e-06, "loss": 0.508001708984375, "step": 8910 }, { "epoch": 0.07708536891163933, "grad_norm": 4.493419576742584, "learning_rate": 5.978034341122404e-06, "loss": 0.2560302734375, "step": 8915 }, { "epoch": 0.07712860243318259, "grad_norm": 14.653906601143557, "learning_rate": 5.978009725377435e-06, "loss": 0.2644378662109375, "step": 8920 }, { "epoch": 0.07717183595472586, "grad_norm": 4.718678565291195, "learning_rate": 5.97798509589815e-06, "loss": 0.43170166015625, "step": 8925 }, { "epoch": 0.07721506947626912, "grad_norm": 36.65170361835453, "learning_rate": 5.9779604526846604e-06, "loss": 0.21855621337890624, "step": 8930 }, { "epoch": 0.07725830299781239, "grad_norm": 1.8323101326419664, "learning_rate": 5.977935795737079e-06, "loss": 0.098779296875, "step": 8935 }, { "epoch": 0.07730153651935565, "grad_norm": 36.52441740270857, "learning_rate": 5.977911125055522e-06, "loss": 0.53074951171875, "step": 8940 }, { "epoch": 0.07734477004089892, "grad_norm": 8.641270624643445, "learning_rate": 5.977886440640102e-06, "loss": 0.373236083984375, "step": 8945 }, { "epoch": 0.07738800356244217, "grad_norm": 3.5653051529280217, "learning_rate": 5.977861742490933e-06, "loss": 0.203271484375, "step": 8950 }, { "epoch": 0.07743123708398544, "grad_norm": 21.460393871005493, "learning_rate": 5.977837030608129e-06, "loss": 0.30014724731445314, "step": 8955 }, { "epoch": 0.0774744706055287, "grad_norm": 8.837249824997373, "learning_rate": 5.977812304991803e-06, "loss": 0.1814178466796875, "step": 8960 }, { "epoch": 0.07751770412707197, "grad_norm": 23.67535408017826, "learning_rate": 5.97778756564207e-06, "loss": 0.2436126708984375, "step": 8965 }, { "epoch": 0.07756093764861523, "grad_norm": 7.3447234758569895, "learning_rate": 5.977762812559043e-06, "loss": 0.3354034423828125, "step": 8970 }, { "epoch": 0.0776041711701585, "grad_norm": 5.026198470259687, "learning_rate": 5.977738045742838e-06, "loss": 0.2921356201171875, "step": 8975 }, { "epoch": 0.07764740469170175, "grad_norm": 12.551368454663717, "learning_rate": 5.977713265193568e-06, "loss": 0.464459228515625, "step": 8980 }, { "epoch": 0.07769063821324503, "grad_norm": 16.194930769804973, "learning_rate": 5.9776884709113484e-06, "loss": 0.2688934326171875, "step": 8985 }, { "epoch": 0.07773387173478828, "grad_norm": 0.5577456368669366, "learning_rate": 5.977663662896292e-06, "loss": 0.1492218017578125, "step": 8990 }, { "epoch": 0.07777710525633155, "grad_norm": 39.64723496936373, "learning_rate": 5.977638841148514e-06, "loss": 0.2081146240234375, "step": 8995 }, { "epoch": 0.07782033877787481, "grad_norm": 13.248801803244453, "learning_rate": 5.977614005668129e-06, "loss": 0.2670318603515625, "step": 9000 }, { "epoch": 0.07786357229941808, "grad_norm": 10.76050654973934, "learning_rate": 5.977589156455251e-06, "loss": 0.19356460571289064, "step": 9005 }, { "epoch": 0.07790680582096134, "grad_norm": 16.397702173095745, "learning_rate": 5.977564293509994e-06, "loss": 0.12672653198242187, "step": 9010 }, { "epoch": 0.0779500393425046, "grad_norm": 11.545776322942757, "learning_rate": 5.977539416832475e-06, "loss": 0.27646331787109374, "step": 9015 }, { "epoch": 0.07799327286404786, "grad_norm": 31.86688962254, "learning_rate": 5.977514526422807e-06, "loss": 0.266162109375, "step": 9020 }, { "epoch": 0.07803650638559113, "grad_norm": 4.407300902965463, "learning_rate": 5.977489622281105e-06, "loss": 0.1494232177734375, "step": 9025 }, { "epoch": 0.07807973990713439, "grad_norm": 22.327229847948917, "learning_rate": 5.977464704407484e-06, "loss": 0.1675872802734375, "step": 9030 }, { "epoch": 0.07812297342867766, "grad_norm": 14.83473550647777, "learning_rate": 5.977439772802058e-06, "loss": 0.146563720703125, "step": 9035 }, { "epoch": 0.07816620695022092, "grad_norm": 0.6451148711617317, "learning_rate": 5.977414827464943e-06, "loss": 0.34095458984375, "step": 9040 }, { "epoch": 0.07820944047176419, "grad_norm": 5.683341215387772, "learning_rate": 5.9773898683962545e-06, "loss": 0.1060943603515625, "step": 9045 }, { "epoch": 0.07825267399330744, "grad_norm": 9.357794779901326, "learning_rate": 5.977364895596108e-06, "loss": 0.10186614990234374, "step": 9050 }, { "epoch": 0.07829590751485072, "grad_norm": 2.1711505386724284, "learning_rate": 5.977339909064616e-06, "loss": 0.051678466796875, "step": 9055 }, { "epoch": 0.07833914103639397, "grad_norm": 11.454413202682943, "learning_rate": 5.977314908801896e-06, "loss": 0.26998138427734375, "step": 9060 }, { "epoch": 0.07838237455793724, "grad_norm": 11.760251902530268, "learning_rate": 5.977289894808063e-06, "loss": 0.29180908203125, "step": 9065 }, { "epoch": 0.0784256080794805, "grad_norm": 9.062483283110678, "learning_rate": 5.977264867083231e-06, "loss": 0.21548233032226563, "step": 9070 }, { "epoch": 0.07846884160102377, "grad_norm": 0.23368670262348853, "learning_rate": 5.9772398256275185e-06, "loss": 0.13119354248046874, "step": 9075 }, { "epoch": 0.07851207512256704, "grad_norm": 52.495516599136586, "learning_rate": 5.9772147704410375e-06, "loss": 0.22706375122070313, "step": 9080 }, { "epoch": 0.0785553086441103, "grad_norm": 3.6793451411504146, "learning_rate": 5.977189701523905e-06, "loss": 0.02147369384765625, "step": 9085 }, { "epoch": 0.07859854216565357, "grad_norm": 28.509108387504142, "learning_rate": 5.977164618876238e-06, "loss": 0.169659423828125, "step": 9090 }, { "epoch": 0.07864177568719682, "grad_norm": 16.798258839256604, "learning_rate": 5.977139522498149e-06, "loss": 0.2101116180419922, "step": 9095 }, { "epoch": 0.0786850092087401, "grad_norm": 6.804189042632293, "learning_rate": 5.977114412389757e-06, "loss": 0.186328125, "step": 9100 }, { "epoch": 0.07872824273028335, "grad_norm": 53.500868962266885, "learning_rate": 5.977089288551176e-06, "loss": 0.20690460205078126, "step": 9105 }, { "epoch": 0.07877147625182662, "grad_norm": 2.736540454245606, "learning_rate": 5.977064150982522e-06, "loss": 0.2102783203125, "step": 9110 }, { "epoch": 0.07881470977336988, "grad_norm": 13.447423363319157, "learning_rate": 5.977038999683912e-06, "loss": 0.17527732849121094, "step": 9115 }, { "epoch": 0.07885794329491315, "grad_norm": 1.0706667790437845, "learning_rate": 5.97701383465546e-06, "loss": 0.220318603515625, "step": 9120 }, { "epoch": 0.0789011768164564, "grad_norm": 9.92561567755826, "learning_rate": 5.976988655897284e-06, "loss": 0.10293426513671874, "step": 9125 }, { "epoch": 0.07894441033799968, "grad_norm": 1.7436040374943054, "learning_rate": 5.976963463409499e-06, "loss": 0.29380950927734373, "step": 9130 }, { "epoch": 0.07898764385954293, "grad_norm": 54.618026513294005, "learning_rate": 5.976938257192222e-06, "loss": 0.47209625244140624, "step": 9135 }, { "epoch": 0.0790308773810862, "grad_norm": 9.732011848567852, "learning_rate": 5.976913037245569e-06, "loss": 0.2024078369140625, "step": 9140 }, { "epoch": 0.07907411090262946, "grad_norm": 2.059640014784559, "learning_rate": 5.976887803569655e-06, "loss": 0.061496543884277347, "step": 9145 }, { "epoch": 0.07911734442417273, "grad_norm": 52.38669168397316, "learning_rate": 5.9768625561645984e-06, "loss": 0.64281005859375, "step": 9150 }, { "epoch": 0.07916057794571599, "grad_norm": 28.805499172190313, "learning_rate": 5.976837295030515e-06, "loss": 0.147247314453125, "step": 9155 }, { "epoch": 0.07920381146725926, "grad_norm": 8.614272361852834, "learning_rate": 5.97681202016752e-06, "loss": 0.2215118408203125, "step": 9160 }, { "epoch": 0.07924704498880251, "grad_norm": 33.109867113241826, "learning_rate": 5.976786731575731e-06, "loss": 0.2839042663574219, "step": 9165 }, { "epoch": 0.07929027851034579, "grad_norm": 55.39695993060813, "learning_rate": 5.976761429255266e-06, "loss": 0.51700439453125, "step": 9170 }, { "epoch": 0.07933351203188904, "grad_norm": 17.267428124673867, "learning_rate": 5.97673611320624e-06, "loss": 0.1667633056640625, "step": 9175 }, { "epoch": 0.07937674555343231, "grad_norm": 16.594501421357727, "learning_rate": 5.9767107834287695e-06, "loss": 0.48579559326171873, "step": 9180 }, { "epoch": 0.07941997907497557, "grad_norm": 45.331287950883755, "learning_rate": 5.976685439922971e-06, "loss": 0.21409759521484376, "step": 9185 }, { "epoch": 0.07946321259651884, "grad_norm": 23.112387707814356, "learning_rate": 5.976660082688964e-06, "loss": 0.3886474609375, "step": 9190 }, { "epoch": 0.0795064461180621, "grad_norm": 0.8785985014497925, "learning_rate": 5.976634711726863e-06, "loss": 0.323345947265625, "step": 9195 }, { "epoch": 0.07954967963960537, "grad_norm": 1.0311892188814966, "learning_rate": 5.976609327036785e-06, "loss": 0.12984771728515626, "step": 9200 }, { "epoch": 0.07959291316114862, "grad_norm": 0.10563385890495523, "learning_rate": 5.976583928618849e-06, "loss": 0.2065044403076172, "step": 9205 }, { "epoch": 0.0796361466826919, "grad_norm": 57.57840451139724, "learning_rate": 5.976558516473171e-06, "loss": 0.512060546875, "step": 9210 }, { "epoch": 0.07967938020423515, "grad_norm": 31.719299902755548, "learning_rate": 5.976533090599866e-06, "loss": 0.27025909423828126, "step": 9215 }, { "epoch": 0.07972261372577842, "grad_norm": 10.8571397908985, "learning_rate": 5.976507650999055e-06, "loss": 0.0994049072265625, "step": 9220 }, { "epoch": 0.07976584724732168, "grad_norm": 2.4392913361879995, "learning_rate": 5.976482197670854e-06, "loss": 0.17762680053710939, "step": 9225 }, { "epoch": 0.07980908076886495, "grad_norm": 24.57113138845677, "learning_rate": 5.97645673061538e-06, "loss": 0.3755096435546875, "step": 9230 }, { "epoch": 0.0798523142904082, "grad_norm": 6.970913144712703, "learning_rate": 5.9764312498327505e-06, "loss": 0.16662139892578126, "step": 9235 }, { "epoch": 0.07989554781195148, "grad_norm": 1.4125379796888209, "learning_rate": 5.976405755323082e-06, "loss": 0.187176513671875, "step": 9240 }, { "epoch": 0.07993878133349473, "grad_norm": 1.1952120205816192, "learning_rate": 5.976380247086495e-06, "loss": 0.23267822265625, "step": 9245 }, { "epoch": 0.079982014855038, "grad_norm": 5.19132374557143, "learning_rate": 5.976354725123104e-06, "loss": 0.158544921875, "step": 9250 }, { "epoch": 0.08002524837658126, "grad_norm": 2.211814429015143, "learning_rate": 5.976329189433028e-06, "loss": 0.47084503173828124, "step": 9255 }, { "epoch": 0.08006848189812453, "grad_norm": 4.548719208140557, "learning_rate": 5.976303640016385e-06, "loss": 0.1621185302734375, "step": 9260 }, { "epoch": 0.0801117154196678, "grad_norm": 30.880861680559264, "learning_rate": 5.976278076873293e-06, "loss": 0.2343353271484375, "step": 9265 }, { "epoch": 0.08015494894121106, "grad_norm": 5.056092715333235, "learning_rate": 5.97625250000387e-06, "loss": 0.11478729248046875, "step": 9270 }, { "epoch": 0.08019818246275433, "grad_norm": 34.9384524841081, "learning_rate": 5.976226909408232e-06, "loss": 0.17811279296875, "step": 9275 }, { "epoch": 0.08024141598429758, "grad_norm": 31.988952517940405, "learning_rate": 5.976201305086499e-06, "loss": 0.188714599609375, "step": 9280 }, { "epoch": 0.08028464950584085, "grad_norm": 5.868914842956357, "learning_rate": 5.976175687038789e-06, "loss": 0.11533584594726562, "step": 9285 }, { "epoch": 0.08032788302738411, "grad_norm": 20.47261307073547, "learning_rate": 5.97615005526522e-06, "loss": 0.20108642578125, "step": 9290 }, { "epoch": 0.08037111654892738, "grad_norm": 7.294028996066747, "learning_rate": 5.9761244097659086e-06, "loss": 0.219287109375, "step": 9295 }, { "epoch": 0.08041435007047064, "grad_norm": 1.365957773538245, "learning_rate": 5.9760987505409756e-06, "loss": 0.06087570190429688, "step": 9300 }, { "epoch": 0.08045758359201391, "grad_norm": 4.353683518314225, "learning_rate": 5.976073077590538e-06, "loss": 0.15711669921875, "step": 9305 }, { "epoch": 0.08050081711355717, "grad_norm": 3.293317710216178, "learning_rate": 5.976047390914714e-06, "loss": 0.07652359008789063, "step": 9310 }, { "epoch": 0.08054405063510044, "grad_norm": 6.282589463760422, "learning_rate": 5.976021690513622e-06, "loss": 0.162310791015625, "step": 9315 }, { "epoch": 0.0805872841566437, "grad_norm": 14.063266337037469, "learning_rate": 5.975995976387381e-06, "loss": 0.0961395263671875, "step": 9320 }, { "epoch": 0.08063051767818696, "grad_norm": 2.477228040463217, "learning_rate": 5.9759702485361106e-06, "loss": 0.173046875, "step": 9325 }, { "epoch": 0.08067375119973022, "grad_norm": 2.4614049796108803, "learning_rate": 5.975944506959927e-06, "loss": 0.0711456298828125, "step": 9330 }, { "epoch": 0.08071698472127349, "grad_norm": 31.341458650075428, "learning_rate": 5.975918751658951e-06, "loss": 0.8036293029785156, "step": 9335 }, { "epoch": 0.08076021824281675, "grad_norm": 8.370230753373459, "learning_rate": 5.975892982633301e-06, "loss": 0.069342041015625, "step": 9340 }, { "epoch": 0.08080345176436002, "grad_norm": 7.101659356881122, "learning_rate": 5.975867199883095e-06, "loss": 0.5239959716796875, "step": 9345 }, { "epoch": 0.08084668528590327, "grad_norm": 3.193686645982965, "learning_rate": 5.975841403408453e-06, "loss": 0.19080657958984376, "step": 9350 }, { "epoch": 0.08088991880744655, "grad_norm": 5.56084805397013, "learning_rate": 5.975815593209492e-06, "loss": 0.1024169921875, "step": 9355 }, { "epoch": 0.0809331523289898, "grad_norm": 3.3546530841617224, "learning_rate": 5.975789769286334e-06, "loss": 0.1822998046875, "step": 9360 }, { "epoch": 0.08097638585053307, "grad_norm": 24.83795479123996, "learning_rate": 5.975763931639096e-06, "loss": 0.6505184173583984, "step": 9365 }, { "epoch": 0.08101961937207633, "grad_norm": 88.3480102440235, "learning_rate": 5.975738080267897e-06, "loss": 0.32347412109375, "step": 9370 }, { "epoch": 0.0810628528936196, "grad_norm": 10.281999771889538, "learning_rate": 5.9757122151728584e-06, "loss": 0.080303955078125, "step": 9375 }, { "epoch": 0.08110608641516286, "grad_norm": 7.81431248621866, "learning_rate": 5.975686336354097e-06, "loss": 0.24095458984375, "step": 9380 }, { "epoch": 0.08114931993670613, "grad_norm": 6.572506024081361, "learning_rate": 5.975660443811733e-06, "loss": 0.08730316162109375, "step": 9385 }, { "epoch": 0.08119255345824938, "grad_norm": 12.388105999976384, "learning_rate": 5.975634537545886e-06, "loss": 0.2263214111328125, "step": 9390 }, { "epoch": 0.08123578697979265, "grad_norm": 22.256707024718285, "learning_rate": 5.975608617556675e-06, "loss": 0.374615478515625, "step": 9395 }, { "epoch": 0.08127902050133591, "grad_norm": 9.676545279355881, "learning_rate": 5.975582683844222e-06, "loss": 0.24112548828125, "step": 9400 }, { "epoch": 0.08132225402287918, "grad_norm": 4.134577773012617, "learning_rate": 5.975556736408642e-06, "loss": 0.325030517578125, "step": 9405 }, { "epoch": 0.08136548754442244, "grad_norm": 31.285451596550892, "learning_rate": 5.9755307752500595e-06, "loss": 0.3108612060546875, "step": 9410 }, { "epoch": 0.08140872106596571, "grad_norm": 23.789798237362028, "learning_rate": 5.975504800368591e-06, "loss": 0.12539138793945312, "step": 9415 }, { "epoch": 0.08145195458750897, "grad_norm": 10.734782695866492, "learning_rate": 5.9754788117643576e-06, "loss": 0.2648193359375, "step": 9420 }, { "epoch": 0.08149518810905224, "grad_norm": 8.044500153086254, "learning_rate": 5.975452809437478e-06, "loss": 0.4643035888671875, "step": 9425 }, { "epoch": 0.08153842163059549, "grad_norm": 38.44390351831482, "learning_rate": 5.975426793388074e-06, "loss": 0.37254486083984373, "step": 9430 }, { "epoch": 0.08158165515213876, "grad_norm": 24.566519814914805, "learning_rate": 5.975400763616264e-06, "loss": 0.412890625, "step": 9435 }, { "epoch": 0.08162488867368202, "grad_norm": 30.021177434283064, "learning_rate": 5.975374720122168e-06, "loss": 0.5390625, "step": 9440 }, { "epoch": 0.08166812219522529, "grad_norm": 23.614537287807718, "learning_rate": 5.975348662905907e-06, "loss": 0.2928680419921875, "step": 9445 }, { "epoch": 0.08171135571676856, "grad_norm": 2.598572793682082, "learning_rate": 5.975322591967602e-06, "loss": 0.570050048828125, "step": 9450 }, { "epoch": 0.08175458923831182, "grad_norm": 5.271930757531168, "learning_rate": 5.975296507307371e-06, "loss": 0.235205078125, "step": 9455 }, { "epoch": 0.08179782275985509, "grad_norm": 55.19412288230106, "learning_rate": 5.975270408925336e-06, "loss": 0.3811798095703125, "step": 9460 }, { "epoch": 0.08184105628139834, "grad_norm": 41.14215311965705, "learning_rate": 5.975244296821617e-06, "loss": 0.3150390625, "step": 9465 }, { "epoch": 0.08188428980294161, "grad_norm": 27.796531170150665, "learning_rate": 5.975218170996332e-06, "loss": 0.3490447998046875, "step": 9470 }, { "epoch": 0.08192752332448487, "grad_norm": 8.40684395236496, "learning_rate": 5.975192031449606e-06, "loss": 0.28995819091796876, "step": 9475 }, { "epoch": 0.08197075684602814, "grad_norm": 5.007642253454993, "learning_rate": 5.9751658781815565e-06, "loss": 0.143048095703125, "step": 9480 }, { "epoch": 0.0820139903675714, "grad_norm": 1.1714073927533428, "learning_rate": 5.975139711192305e-06, "loss": 0.351531982421875, "step": 9485 }, { "epoch": 0.08205722388911467, "grad_norm": 3.866397974830824, "learning_rate": 5.975113530481971e-06, "loss": 0.0765869140625, "step": 9490 }, { "epoch": 0.08210045741065793, "grad_norm": 9.979909779458717, "learning_rate": 5.975087336050678e-06, "loss": 0.27613525390625, "step": 9495 }, { "epoch": 0.0821436909322012, "grad_norm": 19.273431084067816, "learning_rate": 5.975061127898544e-06, "loss": 0.32239837646484376, "step": 9500 }, { "epoch": 0.08218692445374445, "grad_norm": 14.276301149651278, "learning_rate": 5.975034906025692e-06, "loss": 0.1444793701171875, "step": 9505 }, { "epoch": 0.08223015797528772, "grad_norm": 0.9411373263182803, "learning_rate": 5.975008670432242e-06, "loss": 0.14062919616699218, "step": 9510 }, { "epoch": 0.08227339149683098, "grad_norm": 0.5035483633415042, "learning_rate": 5.974982421118314e-06, "loss": 0.18337326049804686, "step": 9515 }, { "epoch": 0.08231662501837425, "grad_norm": 5.9860179431210945, "learning_rate": 5.974956158084029e-06, "loss": 0.1004150390625, "step": 9520 }, { "epoch": 0.08235985853991751, "grad_norm": 26.169139011044834, "learning_rate": 5.974929881329511e-06, "loss": 0.17276611328125, "step": 9525 }, { "epoch": 0.08240309206146078, "grad_norm": 7.357241833157973, "learning_rate": 5.974903590854878e-06, "loss": 0.34443359375, "step": 9530 }, { "epoch": 0.08244632558300403, "grad_norm": 15.42446870859426, "learning_rate": 5.974877286660253e-06, "loss": 0.06521072387695312, "step": 9535 }, { "epoch": 0.0824895591045473, "grad_norm": 31.818723851056763, "learning_rate": 5.974850968745756e-06, "loss": 0.1565826416015625, "step": 9540 }, { "epoch": 0.08253279262609056, "grad_norm": 0.30942341941658763, "learning_rate": 5.9748246371115105e-06, "loss": 0.08712425231933593, "step": 9545 }, { "epoch": 0.08257602614763383, "grad_norm": 14.58820331383157, "learning_rate": 5.974798291757636e-06, "loss": 0.170220947265625, "step": 9550 }, { "epoch": 0.08261925966917709, "grad_norm": 36.68360133770911, "learning_rate": 5.974771932684255e-06, "loss": 0.2070587158203125, "step": 9555 }, { "epoch": 0.08266249319072036, "grad_norm": 44.35598954057679, "learning_rate": 5.974745559891488e-06, "loss": 0.130609130859375, "step": 9560 }, { "epoch": 0.08270572671226362, "grad_norm": 1.008583781781222, "learning_rate": 5.974719173379458e-06, "loss": 0.288433837890625, "step": 9565 }, { "epoch": 0.08274896023380689, "grad_norm": 42.6543640197866, "learning_rate": 5.9746927731482855e-06, "loss": 0.19795684814453124, "step": 9570 }, { "epoch": 0.08279219375535014, "grad_norm": 13.557399473037734, "learning_rate": 5.974666359198092e-06, "loss": 0.275927734375, "step": 9575 }, { "epoch": 0.08283542727689341, "grad_norm": 14.994868021795448, "learning_rate": 5.974639931529002e-06, "loss": 0.08589019775390624, "step": 9580 }, { "epoch": 0.08287866079843667, "grad_norm": 8.354158338968944, "learning_rate": 5.974613490141135e-06, "loss": 0.1729248046875, "step": 9585 }, { "epoch": 0.08292189431997994, "grad_norm": 3.688733481590501, "learning_rate": 5.974587035034612e-06, "loss": 0.1016937255859375, "step": 9590 }, { "epoch": 0.0829651278415232, "grad_norm": 9.441072641918902, "learning_rate": 5.974560566209558e-06, "loss": 0.2387054443359375, "step": 9595 }, { "epoch": 0.08300836136306647, "grad_norm": 10.05073242724147, "learning_rate": 5.974534083666093e-06, "loss": 0.207855224609375, "step": 9600 }, { "epoch": 0.08305159488460973, "grad_norm": 0.48277034202950403, "learning_rate": 5.9745075874043395e-06, "loss": 0.5239532470703125, "step": 9605 }, { "epoch": 0.083094828406153, "grad_norm": 13.085196740179505, "learning_rate": 5.97448107742442e-06, "loss": 0.19915313720703126, "step": 9610 }, { "epoch": 0.08313806192769625, "grad_norm": 92.1802215994842, "learning_rate": 5.974454553726457e-06, "loss": 0.16241455078125, "step": 9615 }, { "epoch": 0.08318129544923952, "grad_norm": 29.210575365017384, "learning_rate": 5.974428016310572e-06, "loss": 0.17126617431640626, "step": 9620 }, { "epoch": 0.08322452897078278, "grad_norm": 110.43145697443252, "learning_rate": 5.974401465176887e-06, "loss": 0.288037109375, "step": 9625 }, { "epoch": 0.08326776249232605, "grad_norm": 25.00460932562376, "learning_rate": 5.9743749003255265e-06, "loss": 0.24640960693359376, "step": 9630 }, { "epoch": 0.08331099601386932, "grad_norm": 12.682586544799259, "learning_rate": 5.974348321756611e-06, "loss": 0.14036407470703124, "step": 9635 }, { "epoch": 0.08335422953541258, "grad_norm": 2.069919670258263, "learning_rate": 5.974321729470264e-06, "loss": 0.4230831146240234, "step": 9640 }, { "epoch": 0.08339746305695585, "grad_norm": 1.5642988759445067, "learning_rate": 5.974295123466608e-06, "loss": 0.3356414794921875, "step": 9645 }, { "epoch": 0.0834406965784991, "grad_norm": 33.37305228597064, "learning_rate": 5.974268503745766e-06, "loss": 0.448419189453125, "step": 9650 }, { "epoch": 0.08348393010004238, "grad_norm": 4.437989073590723, "learning_rate": 5.974241870307861e-06, "loss": 0.3619110107421875, "step": 9655 }, { "epoch": 0.08352716362158563, "grad_norm": 28.48901883686757, "learning_rate": 5.974215223153014e-06, "loss": 0.53897705078125, "step": 9660 }, { "epoch": 0.0835703971431289, "grad_norm": 47.69863876007573, "learning_rate": 5.97418856228135e-06, "loss": 0.4198951721191406, "step": 9665 }, { "epoch": 0.08361363066467216, "grad_norm": 7.37734430643273, "learning_rate": 5.974161887692991e-06, "loss": 0.17200927734375, "step": 9670 }, { "epoch": 0.08365686418621543, "grad_norm": 5.776734384634531, "learning_rate": 5.97413519938806e-06, "loss": 0.08606948852539062, "step": 9675 }, { "epoch": 0.08370009770775869, "grad_norm": 33.992644844660084, "learning_rate": 5.9741084973666805e-06, "loss": 0.3325225830078125, "step": 9680 }, { "epoch": 0.08374333122930196, "grad_norm": 19.32988905272157, "learning_rate": 5.974081781628976e-06, "loss": 0.21273193359375, "step": 9685 }, { "epoch": 0.08378656475084521, "grad_norm": 63.07085630605038, "learning_rate": 5.974055052175068e-06, "loss": 0.25467987060546876, "step": 9690 }, { "epoch": 0.08382979827238848, "grad_norm": 2.815391853853809, "learning_rate": 5.974028309005082e-06, "loss": 0.0325042724609375, "step": 9695 }, { "epoch": 0.08387303179393174, "grad_norm": 12.037851811904629, "learning_rate": 5.974001552119139e-06, "loss": 0.2252685546875, "step": 9700 }, { "epoch": 0.08391626531547501, "grad_norm": 16.24358454066937, "learning_rate": 5.973974781517364e-06, "loss": 0.401959228515625, "step": 9705 }, { "epoch": 0.08395949883701827, "grad_norm": 3.865316076918934, "learning_rate": 5.973947997199881e-06, "loss": 0.21248550415039064, "step": 9710 }, { "epoch": 0.08400273235856154, "grad_norm": 6.467413028505928, "learning_rate": 5.973921199166811e-06, "loss": 0.173870849609375, "step": 9715 }, { "epoch": 0.0840459658801048, "grad_norm": 8.809286031730345, "learning_rate": 5.973894387418281e-06, "loss": 0.2296051025390625, "step": 9720 }, { "epoch": 0.08408919940164807, "grad_norm": 20.92991849455114, "learning_rate": 5.973867561954411e-06, "loss": 0.80751953125, "step": 9725 }, { "epoch": 0.08413243292319132, "grad_norm": 12.371843488550692, "learning_rate": 5.973840722775329e-06, "loss": 0.2243804931640625, "step": 9730 }, { "epoch": 0.08417566644473459, "grad_norm": 5.714039296171383, "learning_rate": 5.973813869881154e-06, "loss": 0.24007797241210938, "step": 9735 }, { "epoch": 0.08421889996627785, "grad_norm": 53.919366222947296, "learning_rate": 5.9737870032720135e-06, "loss": 0.5461578369140625, "step": 9740 }, { "epoch": 0.08426213348782112, "grad_norm": 61.206424975567806, "learning_rate": 5.973760122948029e-06, "loss": 0.502044677734375, "step": 9745 }, { "epoch": 0.08430536700936438, "grad_norm": 41.94801898337293, "learning_rate": 5.973733228909326e-06, "loss": 0.2123046875, "step": 9750 }, { "epoch": 0.08434860053090765, "grad_norm": 6.115062526467149, "learning_rate": 5.973706321156029e-06, "loss": 0.4290557861328125, "step": 9755 }, { "epoch": 0.0843918340524509, "grad_norm": 24.94217209590591, "learning_rate": 5.9736793996882604e-06, "loss": 0.339776611328125, "step": 9760 }, { "epoch": 0.08443506757399417, "grad_norm": 0.14727565435626686, "learning_rate": 5.973652464506145e-06, "loss": 0.29270401000976565, "step": 9765 }, { "epoch": 0.08447830109553743, "grad_norm": 3.642490622705205, "learning_rate": 5.973625515609808e-06, "loss": 0.0985137939453125, "step": 9770 }, { "epoch": 0.0845215346170807, "grad_norm": 8.599116293557888, "learning_rate": 5.973598552999373e-06, "loss": 0.36296615600585935, "step": 9775 }, { "epoch": 0.08456476813862396, "grad_norm": 15.27278127704458, "learning_rate": 5.973571576674963e-06, "loss": 0.19447021484375, "step": 9780 }, { "epoch": 0.08460800166016723, "grad_norm": 13.031723854622806, "learning_rate": 5.973544586636705e-06, "loss": 0.10854034423828125, "step": 9785 }, { "epoch": 0.08465123518171049, "grad_norm": 2.2504959782295013, "learning_rate": 5.973517582884721e-06, "loss": 0.21313095092773438, "step": 9790 }, { "epoch": 0.08469446870325376, "grad_norm": 29.583076067330882, "learning_rate": 5.973490565419137e-06, "loss": 0.14772491455078124, "step": 9795 }, { "epoch": 0.08473770222479701, "grad_norm": 2.4452177807405024, "learning_rate": 5.973463534240078e-06, "loss": 0.13181571960449218, "step": 9800 }, { "epoch": 0.08478093574634028, "grad_norm": 24.589433873587694, "learning_rate": 5.973436489347666e-06, "loss": 0.18140716552734376, "step": 9805 }, { "epoch": 0.08482416926788354, "grad_norm": 0.8506310259281606, "learning_rate": 5.97340943074203e-06, "loss": 0.16190185546875, "step": 9810 }, { "epoch": 0.08486740278942681, "grad_norm": 20.31376620483646, "learning_rate": 5.973382358423292e-06, "loss": 0.37896728515625, "step": 9815 }, { "epoch": 0.08491063631097008, "grad_norm": 3.590867293165558, "learning_rate": 5.9733552723915755e-06, "loss": 0.550762939453125, "step": 9820 }, { "epoch": 0.08495386983251334, "grad_norm": 17.213953026451883, "learning_rate": 5.973328172647008e-06, "loss": 0.40640106201171877, "step": 9825 }, { "epoch": 0.08499710335405661, "grad_norm": 57.15070074134439, "learning_rate": 5.973301059189714e-06, "loss": 0.63076171875, "step": 9830 }, { "epoch": 0.08504033687559986, "grad_norm": 13.013714876536765, "learning_rate": 5.973273932019819e-06, "loss": 0.28427734375, "step": 9835 }, { "epoch": 0.08508357039714314, "grad_norm": 16.475540772593913, "learning_rate": 5.973246791137446e-06, "loss": 0.282318115234375, "step": 9840 }, { "epoch": 0.08512680391868639, "grad_norm": 10.686986380929255, "learning_rate": 5.973219636542723e-06, "loss": 0.19091644287109374, "step": 9845 }, { "epoch": 0.08517003744022966, "grad_norm": 24.685458084518686, "learning_rate": 5.9731924682357725e-06, "loss": 0.3499237060546875, "step": 9850 }, { "epoch": 0.08521327096177292, "grad_norm": 41.36641132905231, "learning_rate": 5.973165286216722e-06, "loss": 0.12784423828125, "step": 9855 }, { "epoch": 0.08525650448331619, "grad_norm": 17.47168075622494, "learning_rate": 5.973138090485695e-06, "loss": 0.15927276611328126, "step": 9860 }, { "epoch": 0.08529973800485945, "grad_norm": 13.479222762414784, "learning_rate": 5.973110881042819e-06, "loss": 0.27298431396484374, "step": 9865 }, { "epoch": 0.08534297152640272, "grad_norm": 77.92500926282116, "learning_rate": 5.9730836578882165e-06, "loss": 0.4269927978515625, "step": 9870 }, { "epoch": 0.08538620504794597, "grad_norm": 23.14605937575974, "learning_rate": 5.973056421022016e-06, "loss": 0.223876953125, "step": 9875 }, { "epoch": 0.08542943856948924, "grad_norm": 6.621308871439761, "learning_rate": 5.973029170444342e-06, "loss": 0.17919549942016602, "step": 9880 }, { "epoch": 0.0854726720910325, "grad_norm": 73.95419132200041, "learning_rate": 5.97300190615532e-06, "loss": 0.366058349609375, "step": 9885 }, { "epoch": 0.08551590561257577, "grad_norm": 64.1762721309702, "learning_rate": 5.972974628155076e-06, "loss": 0.21975059509277345, "step": 9890 }, { "epoch": 0.08555913913411903, "grad_norm": 46.64134412080625, "learning_rate": 5.972947336443736e-06, "loss": 0.33580322265625, "step": 9895 }, { "epoch": 0.0856023726556623, "grad_norm": 10.080650747384404, "learning_rate": 5.972920031021425e-06, "loss": 0.11199951171875, "step": 9900 }, { "epoch": 0.08564560617720555, "grad_norm": 9.826661490178553, "learning_rate": 5.972892711888269e-06, "loss": 0.24309768676757812, "step": 9905 }, { "epoch": 0.08568883969874883, "grad_norm": 20.964023429849917, "learning_rate": 5.972865379044396e-06, "loss": 0.3934326171875, "step": 9910 }, { "epoch": 0.08573207322029208, "grad_norm": 0.8421720560775712, "learning_rate": 5.9728380324899295e-06, "loss": 0.50477294921875, "step": 9915 }, { "epoch": 0.08577530674183535, "grad_norm": 39.33236036680243, "learning_rate": 5.972810672224998e-06, "loss": 0.21436767578125, "step": 9920 }, { "epoch": 0.08581854026337861, "grad_norm": 40.67951371621833, "learning_rate": 5.972783298249725e-06, "loss": 0.4034721374511719, "step": 9925 }, { "epoch": 0.08586177378492188, "grad_norm": 0.4303725135673967, "learning_rate": 5.9727559105642385e-06, "loss": 0.35023956298828124, "step": 9930 }, { "epoch": 0.08590500730646514, "grad_norm": 5.86546478100666, "learning_rate": 5.972728509168664e-06, "loss": 0.10536651611328125, "step": 9935 }, { "epoch": 0.08594824082800841, "grad_norm": 1.7463139516775792, "learning_rate": 5.972701094063129e-06, "loss": 0.11437835693359374, "step": 9940 }, { "epoch": 0.08599147434955166, "grad_norm": 17.153913777475477, "learning_rate": 5.972673665247759e-06, "loss": 0.16003799438476562, "step": 9945 }, { "epoch": 0.08603470787109493, "grad_norm": 6.73996172059716, "learning_rate": 5.97264622272268e-06, "loss": 0.23257293701171874, "step": 9950 }, { "epoch": 0.08607794139263819, "grad_norm": 0.44824422414867976, "learning_rate": 5.9726187664880205e-06, "loss": 0.3290901184082031, "step": 9955 }, { "epoch": 0.08612117491418146, "grad_norm": 13.601189710736488, "learning_rate": 5.972591296543905e-06, "loss": 0.1255523681640625, "step": 9960 }, { "epoch": 0.08616440843572472, "grad_norm": 23.144510387102997, "learning_rate": 5.972563812890463e-06, "loss": 0.201654052734375, "step": 9965 }, { "epoch": 0.08620764195726799, "grad_norm": 15.184134004758306, "learning_rate": 5.972536315527816e-06, "loss": 0.2532470703125, "step": 9970 }, { "epoch": 0.08625087547881125, "grad_norm": 20.404571082902933, "learning_rate": 5.972508804456097e-06, "loss": 0.19463577270507812, "step": 9975 }, { "epoch": 0.08629410900035452, "grad_norm": 10.634180119399995, "learning_rate": 5.972481279675429e-06, "loss": 0.2584075927734375, "step": 9980 }, { "epoch": 0.08633734252189777, "grad_norm": 10.299804359001136, "learning_rate": 5.97245374118594e-06, "loss": 0.1218048095703125, "step": 9985 }, { "epoch": 0.08638057604344104, "grad_norm": 40.06039695958394, "learning_rate": 5.972426188987756e-06, "loss": 0.3451530456542969, "step": 9990 }, { "epoch": 0.0864238095649843, "grad_norm": 14.766446178382624, "learning_rate": 5.972398623081007e-06, "loss": 0.359954833984375, "step": 9995 }, { "epoch": 0.08646704308652757, "grad_norm": 3.0176706903033574, "learning_rate": 5.972371043465817e-06, "loss": 0.15145263671875, "step": 10000 }, { "epoch": 0.08651027660807083, "grad_norm": 35.32943606690357, "learning_rate": 5.9723434501423145e-06, "loss": 0.37257537841796873, "step": 10005 }, { "epoch": 0.0865535101296141, "grad_norm": 47.40502877752081, "learning_rate": 5.972315843110627e-06, "loss": 0.99525146484375, "step": 10010 }, { "epoch": 0.08659674365115737, "grad_norm": 14.32018791354528, "learning_rate": 5.972288222370881e-06, "loss": 0.194952392578125, "step": 10015 }, { "epoch": 0.08663997717270062, "grad_norm": 9.127189339177827, "learning_rate": 5.972260587923205e-06, "loss": 0.19522705078125, "step": 10020 }, { "epoch": 0.0866832106942439, "grad_norm": 14.64783675616304, "learning_rate": 5.972232939767726e-06, "loss": 0.223272705078125, "step": 10025 }, { "epoch": 0.08672644421578715, "grad_norm": 4.36702004717657, "learning_rate": 5.97220527790457e-06, "loss": 0.09523162841796876, "step": 10030 }, { "epoch": 0.08676967773733042, "grad_norm": 20.745207614979222, "learning_rate": 5.9721776023338665e-06, "loss": 0.20584602355957032, "step": 10035 }, { "epoch": 0.08681291125887368, "grad_norm": 7.427211569077455, "learning_rate": 5.972149913055743e-06, "loss": 0.03368072509765625, "step": 10040 }, { "epoch": 0.08685614478041695, "grad_norm": 4.90157922698661, "learning_rate": 5.9721222100703265e-06, "loss": 0.386602783203125, "step": 10045 }, { "epoch": 0.0868993783019602, "grad_norm": 1.1931344515562268, "learning_rate": 5.972094493377745e-06, "loss": 0.25673828125, "step": 10050 }, { "epoch": 0.08694261182350348, "grad_norm": 27.960964668300143, "learning_rate": 5.972066762978126e-06, "loss": 0.3246673583984375, "step": 10055 }, { "epoch": 0.08698584534504673, "grad_norm": 2.3474683365735216, "learning_rate": 5.972039018871597e-06, "loss": 0.1843536376953125, "step": 10060 }, { "epoch": 0.08702907886659, "grad_norm": 17.878658841022347, "learning_rate": 5.9720112610582876e-06, "loss": 0.267193603515625, "step": 10065 }, { "epoch": 0.08707231238813326, "grad_norm": 69.97185043265739, "learning_rate": 5.971983489538325e-06, "loss": 0.517156982421875, "step": 10070 }, { "epoch": 0.08711554590967653, "grad_norm": 18.2928886005617, "learning_rate": 5.971955704311838e-06, "loss": 0.2397613525390625, "step": 10075 }, { "epoch": 0.08715877943121979, "grad_norm": 7.831224431503593, "learning_rate": 5.971927905378952e-06, "loss": 0.2041015625, "step": 10080 }, { "epoch": 0.08720201295276306, "grad_norm": 11.923354318568586, "learning_rate": 5.971900092739798e-06, "loss": 0.423291015625, "step": 10085 }, { "epoch": 0.08724524647430631, "grad_norm": 47.00053342877463, "learning_rate": 5.971872266394503e-06, "loss": 0.3716579437255859, "step": 10090 }, { "epoch": 0.08728847999584959, "grad_norm": 14.008309699221641, "learning_rate": 5.971844426343197e-06, "loss": 0.20607452392578124, "step": 10095 }, { "epoch": 0.08733171351739284, "grad_norm": 8.34125434199902, "learning_rate": 5.971816572586005e-06, "loss": 0.14873046875, "step": 10100 }, { "epoch": 0.08737494703893611, "grad_norm": 5.066291995080734, "learning_rate": 5.97178870512306e-06, "loss": 0.22953662872314454, "step": 10105 }, { "epoch": 0.08741818056047937, "grad_norm": 60.472645414328625, "learning_rate": 5.971760823954487e-06, "loss": 0.4205780029296875, "step": 10110 }, { "epoch": 0.08746141408202264, "grad_norm": 22.72625089350026, "learning_rate": 5.971732929080414e-06, "loss": 0.573431396484375, "step": 10115 }, { "epoch": 0.0875046476035659, "grad_norm": 16.622860849239476, "learning_rate": 5.971705020500973e-06, "loss": 0.4200439453125, "step": 10120 }, { "epoch": 0.08754788112510917, "grad_norm": 2.076305037132445, "learning_rate": 5.971677098216292e-06, "loss": 0.23663787841796874, "step": 10125 }, { "epoch": 0.08759111464665242, "grad_norm": 10.13733165262601, "learning_rate": 5.971649162226497e-06, "loss": 0.11474990844726562, "step": 10130 }, { "epoch": 0.0876343481681957, "grad_norm": 1.7005596276599164, "learning_rate": 5.97162121253172e-06, "loss": 0.2712577819824219, "step": 10135 }, { "epoch": 0.08767758168973895, "grad_norm": 29.320907336423886, "learning_rate": 5.971593249132087e-06, "loss": 0.1668304443359375, "step": 10140 }, { "epoch": 0.08772081521128222, "grad_norm": 12.420230706983826, "learning_rate": 5.971565272027729e-06, "loss": 0.1840057373046875, "step": 10145 }, { "epoch": 0.08776404873282548, "grad_norm": 7.051758494966452, "learning_rate": 5.9715372812187754e-06, "loss": 0.11503448486328124, "step": 10150 }, { "epoch": 0.08780728225436875, "grad_norm": 1.3434258363607503, "learning_rate": 5.971509276705354e-06, "loss": 0.11025848388671874, "step": 10155 }, { "epoch": 0.087850515775912, "grad_norm": 16.15911062848116, "learning_rate": 5.9714812584875936e-06, "loss": 0.3657958984375, "step": 10160 }, { "epoch": 0.08789374929745528, "grad_norm": 7.830093630007225, "learning_rate": 5.971453226565625e-06, "loss": 0.191192626953125, "step": 10165 }, { "epoch": 0.08793698281899853, "grad_norm": 5.067349484267504, "learning_rate": 5.971425180939577e-06, "loss": 0.21273021697998046, "step": 10170 }, { "epoch": 0.0879802163405418, "grad_norm": 1.9587835427375284, "learning_rate": 5.971397121609578e-06, "loss": 0.1759735107421875, "step": 10175 }, { "epoch": 0.08802344986208506, "grad_norm": 4.510720845064227, "learning_rate": 5.9713690485757584e-06, "loss": 0.0751617431640625, "step": 10180 }, { "epoch": 0.08806668338362833, "grad_norm": 30.241902805808596, "learning_rate": 5.971340961838246e-06, "loss": 0.18030014038085937, "step": 10185 }, { "epoch": 0.08810991690517159, "grad_norm": 4.177871259550969, "learning_rate": 5.971312861397174e-06, "loss": 0.07701416015625, "step": 10190 }, { "epoch": 0.08815315042671486, "grad_norm": 27.201695273024217, "learning_rate": 5.971284747252668e-06, "loss": 0.260186767578125, "step": 10195 }, { "epoch": 0.08819638394825813, "grad_norm": 2.057913354949861, "learning_rate": 5.97125661940486e-06, "loss": 0.21312026977539061, "step": 10200 }, { "epoch": 0.08823961746980138, "grad_norm": 23.182836353461024, "learning_rate": 5.971228477853878e-06, "loss": 0.37080230712890627, "step": 10205 }, { "epoch": 0.08828285099134466, "grad_norm": 20.18987695488687, "learning_rate": 5.971200322599854e-06, "loss": 0.221630859375, "step": 10210 }, { "epoch": 0.08832608451288791, "grad_norm": 18.504468025299747, "learning_rate": 5.9711721536429164e-06, "loss": 0.1801513671875, "step": 10215 }, { "epoch": 0.08836931803443118, "grad_norm": 9.385593107740867, "learning_rate": 5.971143970983195e-06, "loss": 0.6923187255859375, "step": 10220 }, { "epoch": 0.08841255155597444, "grad_norm": 30.199710921804016, "learning_rate": 5.97111577462082e-06, "loss": 0.413983154296875, "step": 10225 }, { "epoch": 0.08845578507751771, "grad_norm": 4.248779795650375, "learning_rate": 5.971087564555922e-06, "loss": 0.2647735595703125, "step": 10230 }, { "epoch": 0.08849901859906097, "grad_norm": 34.51524542264373, "learning_rate": 5.971059340788631e-06, "loss": 0.23350830078125, "step": 10235 }, { "epoch": 0.08854225212060424, "grad_norm": 9.64036381250708, "learning_rate": 5.9710311033190756e-06, "loss": 0.21261749267578126, "step": 10240 }, { "epoch": 0.0885854856421475, "grad_norm": 13.149358608669377, "learning_rate": 5.971002852147389e-06, "loss": 0.35045928955078126, "step": 10245 }, { "epoch": 0.08862871916369076, "grad_norm": 35.41499038986253, "learning_rate": 5.970974587273697e-06, "loss": 0.4172607421875, "step": 10250 }, { "epoch": 0.08867195268523402, "grad_norm": 26.167377138790563, "learning_rate": 5.970946308698135e-06, "loss": 0.23032073974609374, "step": 10255 }, { "epoch": 0.08871518620677729, "grad_norm": 0.7543024098859612, "learning_rate": 5.970918016420831e-06, "loss": 0.5763046264648437, "step": 10260 }, { "epoch": 0.08875841972832055, "grad_norm": 16.109161892471175, "learning_rate": 5.970889710441916e-06, "loss": 0.19656982421875, "step": 10265 }, { "epoch": 0.08880165324986382, "grad_norm": 40.39310898862554, "learning_rate": 5.970861390761519e-06, "loss": 0.35504608154296874, "step": 10270 }, { "epoch": 0.08884488677140708, "grad_norm": 18.548843871042838, "learning_rate": 5.970833057379772e-06, "loss": 0.307318115234375, "step": 10275 }, { "epoch": 0.08888812029295035, "grad_norm": 5.811525246685331, "learning_rate": 5.9708047102968054e-06, "loss": 0.1634033203125, "step": 10280 }, { "epoch": 0.0889313538144936, "grad_norm": 3.388873684062335, "learning_rate": 5.970776349512751e-06, "loss": 0.042919921875, "step": 10285 }, { "epoch": 0.08897458733603687, "grad_norm": 5.875403118225356, "learning_rate": 5.970747975027737e-06, "loss": 0.1646240234375, "step": 10290 }, { "epoch": 0.08901782085758013, "grad_norm": 1.5534584187433615, "learning_rate": 5.970719586841897e-06, "loss": 0.22379150390625, "step": 10295 }, { "epoch": 0.0890610543791234, "grad_norm": 2.5422521436747383, "learning_rate": 5.9706911849553605e-06, "loss": 0.1654500961303711, "step": 10300 }, { "epoch": 0.08910428790066666, "grad_norm": 5.008481481567721, "learning_rate": 5.970662769368259e-06, "loss": 0.0755126953125, "step": 10305 }, { "epoch": 0.08914752142220993, "grad_norm": 65.08397969505269, "learning_rate": 5.970634340080723e-06, "loss": 0.49951629638671874, "step": 10310 }, { "epoch": 0.08919075494375318, "grad_norm": 7.522829332836227, "learning_rate": 5.970605897092884e-06, "loss": 0.58680419921875, "step": 10315 }, { "epoch": 0.08923398846529645, "grad_norm": 6.6399654946916, "learning_rate": 5.970577440404873e-06, "loss": 0.30081787109375, "step": 10320 }, { "epoch": 0.08927722198683971, "grad_norm": 44.120023945513864, "learning_rate": 5.970548970016821e-06, "loss": 0.38620529174804685, "step": 10325 }, { "epoch": 0.08932045550838298, "grad_norm": 36.130810880146434, "learning_rate": 5.97052048592886e-06, "loss": 0.21912384033203125, "step": 10330 }, { "epoch": 0.08936368902992624, "grad_norm": 39.5038506562415, "learning_rate": 5.970491988141121e-06, "loss": 0.34988861083984374, "step": 10335 }, { "epoch": 0.08940692255146951, "grad_norm": 8.074803399859046, "learning_rate": 5.970463476653736e-06, "loss": 0.09844970703125, "step": 10340 }, { "epoch": 0.08945015607301277, "grad_norm": 4.637900067444965, "learning_rate": 5.9704349514668345e-06, "loss": 0.09890823364257813, "step": 10345 }, { "epoch": 0.08949338959455604, "grad_norm": 7.514684237120038, "learning_rate": 5.97040641258055e-06, "loss": 0.08438873291015625, "step": 10350 }, { "epoch": 0.08953662311609929, "grad_norm": 9.60019389650197, "learning_rate": 5.970377859995014e-06, "loss": 0.39909896850585935, "step": 10355 }, { "epoch": 0.08957985663764256, "grad_norm": 12.889128118206099, "learning_rate": 5.970349293710358e-06, "loss": 0.22690658569335936, "step": 10360 }, { "epoch": 0.08962309015918582, "grad_norm": 7.5347843409252695, "learning_rate": 5.970320713726712e-06, "loss": 0.09138336181640624, "step": 10365 }, { "epoch": 0.08966632368072909, "grad_norm": 14.395895497592173, "learning_rate": 5.9702921200442105e-06, "loss": 0.3865966796875, "step": 10370 }, { "epoch": 0.08970955720227235, "grad_norm": 10.681270272223031, "learning_rate": 5.9702635126629846e-06, "loss": 0.248828125, "step": 10375 }, { "epoch": 0.08975279072381562, "grad_norm": 21.26451611967302, "learning_rate": 5.970234891583166e-06, "loss": 0.2202484130859375, "step": 10380 }, { "epoch": 0.08979602424535889, "grad_norm": 40.345423514409305, "learning_rate": 5.970206256804885e-06, "loss": 0.44039382934570315, "step": 10385 }, { "epoch": 0.08983925776690214, "grad_norm": 6.9368008569774355, "learning_rate": 5.970177608328277e-06, "loss": 0.075677490234375, "step": 10390 }, { "epoch": 0.08988249128844542, "grad_norm": 17.8173304255902, "learning_rate": 5.9701489461534715e-06, "loss": 0.457183837890625, "step": 10395 }, { "epoch": 0.08992572480998867, "grad_norm": 7.797718176290588, "learning_rate": 5.970120270280601e-06, "loss": 0.24974746704101564, "step": 10400 }, { "epoch": 0.08996895833153194, "grad_norm": 38.62216957069026, "learning_rate": 5.970091580709799e-06, "loss": 0.348394775390625, "step": 10405 }, { "epoch": 0.0900121918530752, "grad_norm": 50.440045096447776, "learning_rate": 5.970062877441197e-06, "loss": 0.3813629150390625, "step": 10410 }, { "epoch": 0.09005542537461847, "grad_norm": 17.06038654121812, "learning_rate": 5.970034160474927e-06, "loss": 0.0702667236328125, "step": 10415 }, { "epoch": 0.09009865889616173, "grad_norm": 0.5489078390311333, "learning_rate": 5.970005429811122e-06, "loss": 0.1612548828125, "step": 10420 }, { "epoch": 0.090141892417705, "grad_norm": 31.19305630712008, "learning_rate": 5.969976685449915e-06, "loss": 0.11552963256835938, "step": 10425 }, { "epoch": 0.09018512593924825, "grad_norm": 21.737735484558215, "learning_rate": 5.969947927391437e-06, "loss": 0.18717803955078124, "step": 10430 }, { "epoch": 0.09022835946079152, "grad_norm": 22.225014226003523, "learning_rate": 5.969919155635822e-06, "loss": 0.24605560302734375, "step": 10435 }, { "epoch": 0.09027159298233478, "grad_norm": 0.2892320693059339, "learning_rate": 5.969890370183203e-06, "loss": 0.13109664916992186, "step": 10440 }, { "epoch": 0.09031482650387805, "grad_norm": 2.84529327586152, "learning_rate": 5.969861571033711e-06, "loss": 0.6265625, "step": 10445 }, { "epoch": 0.09035806002542131, "grad_norm": 5.332570636330933, "learning_rate": 5.969832758187481e-06, "loss": 0.1914306640625, "step": 10450 }, { "epoch": 0.09040129354696458, "grad_norm": 5.092519359858804, "learning_rate": 5.969803931644644e-06, "loss": 0.25055580139160155, "step": 10455 }, { "epoch": 0.09044452706850784, "grad_norm": 41.86736106785221, "learning_rate": 5.969775091405333e-06, "loss": 0.4351310729980469, "step": 10460 }, { "epoch": 0.0904877605900511, "grad_norm": 7.374065007592485, "learning_rate": 5.969746237469683e-06, "loss": 0.22297210693359376, "step": 10465 }, { "epoch": 0.09053099411159436, "grad_norm": 4.662727751300974, "learning_rate": 5.9697173698378244e-06, "loss": 0.18662109375, "step": 10470 }, { "epoch": 0.09057422763313763, "grad_norm": 18.21174666427264, "learning_rate": 5.969688488509892e-06, "loss": 0.1083953857421875, "step": 10475 }, { "epoch": 0.09061746115468089, "grad_norm": 3.388179419089267, "learning_rate": 5.96965959348602e-06, "loss": 0.164361572265625, "step": 10480 }, { "epoch": 0.09066069467622416, "grad_norm": 4.113757103549368, "learning_rate": 5.969630684766339e-06, "loss": 0.20550918579101562, "step": 10485 }, { "epoch": 0.09070392819776742, "grad_norm": 12.1004781788531, "learning_rate": 5.969601762350985e-06, "loss": 0.2097198486328125, "step": 10490 }, { "epoch": 0.09074716171931069, "grad_norm": 4.763761862600791, "learning_rate": 5.969572826240089e-06, "loss": 0.1769989013671875, "step": 10495 }, { "epoch": 0.09079039524085394, "grad_norm": 34.3534914502328, "learning_rate": 5.969543876433785e-06, "loss": 0.526513671875, "step": 10500 }, { "epoch": 0.09083362876239721, "grad_norm": 11.544015176844438, "learning_rate": 5.969514912932208e-06, "loss": 0.7408660888671875, "step": 10505 }, { "epoch": 0.09087686228394047, "grad_norm": 1.0467327150134424, "learning_rate": 5.96948593573549e-06, "loss": 0.1937835693359375, "step": 10510 }, { "epoch": 0.09092009580548374, "grad_norm": 5.9279457302367735, "learning_rate": 5.969456944843767e-06, "loss": 0.227593994140625, "step": 10515 }, { "epoch": 0.090963329327027, "grad_norm": 18.707923215459623, "learning_rate": 5.969427940257169e-06, "loss": 0.11622314453125, "step": 10520 }, { "epoch": 0.09100656284857027, "grad_norm": 22.567887782078575, "learning_rate": 5.9693989219758325e-06, "loss": 0.162158203125, "step": 10525 }, { "epoch": 0.09104979637011353, "grad_norm": 1.0878813890719359, "learning_rate": 5.9693698899998905e-06, "loss": 0.324066162109375, "step": 10530 }, { "epoch": 0.0910930298916568, "grad_norm": 10.502579866553743, "learning_rate": 5.9693408443294764e-06, "loss": 0.28399658203125, "step": 10535 }, { "epoch": 0.09113626341320005, "grad_norm": 5.602423146247638, "learning_rate": 5.969311784964725e-06, "loss": 0.14930496215820313, "step": 10540 }, { "epoch": 0.09117949693474332, "grad_norm": 1.5788455517202047, "learning_rate": 5.969282711905771e-06, "loss": 0.0379486083984375, "step": 10545 }, { "epoch": 0.09122273045628658, "grad_norm": 4.6409996435549115, "learning_rate": 5.969253625152746e-06, "loss": 0.28699302673339844, "step": 10550 }, { "epoch": 0.09126596397782985, "grad_norm": 49.357151487082064, "learning_rate": 5.969224524705786e-06, "loss": 0.1654449462890625, "step": 10555 }, { "epoch": 0.09130919749937311, "grad_norm": 16.403506475934968, "learning_rate": 5.969195410565026e-06, "loss": 0.109478759765625, "step": 10560 }, { "epoch": 0.09135243102091638, "grad_norm": 3.544123381708635, "learning_rate": 5.969166282730598e-06, "loss": 0.07073974609375, "step": 10565 }, { "epoch": 0.09139566454245965, "grad_norm": 31.692136245364388, "learning_rate": 5.9691371412026375e-06, "loss": 0.597576904296875, "step": 10570 }, { "epoch": 0.0914388980640029, "grad_norm": 12.307433383365717, "learning_rate": 5.969107985981279e-06, "loss": 0.2949951171875, "step": 10575 }, { "epoch": 0.09148213158554618, "grad_norm": 11.552336355223755, "learning_rate": 5.969078817066657e-06, "loss": 0.26842041015625, "step": 10580 }, { "epoch": 0.09152536510708943, "grad_norm": 16.561364328211823, "learning_rate": 5.969049634458906e-06, "loss": 0.3057373046875, "step": 10585 }, { "epoch": 0.0915685986286327, "grad_norm": 128.6052387458721, "learning_rate": 5.96902043815816e-06, "loss": 0.70699462890625, "step": 10590 }, { "epoch": 0.09161183215017596, "grad_norm": 15.958080191483708, "learning_rate": 5.968991228164554e-06, "loss": 0.06835403442382812, "step": 10595 }, { "epoch": 0.09165506567171923, "grad_norm": 19.73277635721897, "learning_rate": 5.9689620044782235e-06, "loss": 0.07478866577148438, "step": 10600 }, { "epoch": 0.09169829919326249, "grad_norm": 1.8497161105042994, "learning_rate": 5.9689327670993024e-06, "loss": 0.09700469970703125, "step": 10605 }, { "epoch": 0.09174153271480576, "grad_norm": 20.851842354326536, "learning_rate": 5.968903516027925e-06, "loss": 0.394384765625, "step": 10610 }, { "epoch": 0.09178476623634901, "grad_norm": 3.6623099848358005, "learning_rate": 5.968874251264227e-06, "loss": 0.19924850463867189, "step": 10615 }, { "epoch": 0.09182799975789228, "grad_norm": 8.587309169893203, "learning_rate": 5.968844972808344e-06, "loss": 0.156756591796875, "step": 10620 }, { "epoch": 0.09187123327943554, "grad_norm": 5.903600646733417, "learning_rate": 5.96881568066041e-06, "loss": 0.2144317626953125, "step": 10625 }, { "epoch": 0.09191446680097881, "grad_norm": 18.676568521936968, "learning_rate": 5.96878637482056e-06, "loss": 0.333056640625, "step": 10630 }, { "epoch": 0.09195770032252207, "grad_norm": 8.828216394336314, "learning_rate": 5.968757055288931e-06, "loss": 0.369610595703125, "step": 10635 }, { "epoch": 0.09200093384406534, "grad_norm": 21.754575400051575, "learning_rate": 5.968727722065655e-06, "loss": 0.1058135986328125, "step": 10640 }, { "epoch": 0.0920441673656086, "grad_norm": 34.24555158836114, "learning_rate": 5.96869837515087e-06, "loss": 0.2478759765625, "step": 10645 }, { "epoch": 0.09208740088715187, "grad_norm": 34.35463748100826, "learning_rate": 5.9686690145447105e-06, "loss": 0.17297897338867188, "step": 10650 }, { "epoch": 0.09213063440869512, "grad_norm": 8.431864730998306, "learning_rate": 5.968639640247311e-06, "loss": 0.18477935791015626, "step": 10655 }, { "epoch": 0.09217386793023839, "grad_norm": 3.799310931901873, "learning_rate": 5.9686102522588095e-06, "loss": 0.08853378295898437, "step": 10660 }, { "epoch": 0.09221710145178165, "grad_norm": 1.3598348992890146, "learning_rate": 5.968580850579338e-06, "loss": 0.16551971435546875, "step": 10665 }, { "epoch": 0.09226033497332492, "grad_norm": 3.337427674029632, "learning_rate": 5.968551435209035e-06, "loss": 0.1767547607421875, "step": 10670 }, { "epoch": 0.09230356849486818, "grad_norm": 8.940915047412103, "learning_rate": 5.968522006148034e-06, "loss": 0.174560546875, "step": 10675 }, { "epoch": 0.09234680201641145, "grad_norm": 0.21012209760363365, "learning_rate": 5.968492563396472e-06, "loss": 0.17929229736328126, "step": 10680 }, { "epoch": 0.0923900355379547, "grad_norm": 8.370111299742403, "learning_rate": 5.968463106954486e-06, "loss": 0.290179443359375, "step": 10685 }, { "epoch": 0.09243326905949797, "grad_norm": 18.50348726176823, "learning_rate": 5.96843363682221e-06, "loss": 0.194915771484375, "step": 10690 }, { "epoch": 0.09247650258104123, "grad_norm": 25.660947949936673, "learning_rate": 5.968404152999779e-06, "loss": 0.27781829833984373, "step": 10695 }, { "epoch": 0.0925197361025845, "grad_norm": 18.91639808554413, "learning_rate": 5.968374655487332e-06, "loss": 0.274603271484375, "step": 10700 }, { "epoch": 0.09256296962412776, "grad_norm": 25.24935944289936, "learning_rate": 5.968345144285002e-06, "loss": 0.13546142578125, "step": 10705 }, { "epoch": 0.09260620314567103, "grad_norm": 11.179174629904738, "learning_rate": 5.968315619392928e-06, "loss": 0.151739501953125, "step": 10710 }, { "epoch": 0.09264943666721429, "grad_norm": 5.139165077414567, "learning_rate": 5.968286080811244e-06, "loss": 0.182611083984375, "step": 10715 }, { "epoch": 0.09269267018875756, "grad_norm": 4.976584301107463, "learning_rate": 5.968256528540086e-06, "loss": 0.09311370849609375, "step": 10720 }, { "epoch": 0.09273590371030081, "grad_norm": 4.06625203268348, "learning_rate": 5.968226962579592e-06, "loss": 0.19109039306640624, "step": 10725 }, { "epoch": 0.09277913723184408, "grad_norm": 5.408940778081732, "learning_rate": 5.968197382929898e-06, "loss": 0.3177040100097656, "step": 10730 }, { "epoch": 0.09282237075338734, "grad_norm": 37.84503343087337, "learning_rate": 5.968167789591139e-06, "loss": 0.414642333984375, "step": 10735 }, { "epoch": 0.09286560427493061, "grad_norm": 12.77348728113622, "learning_rate": 5.9681381825634526e-06, "loss": 0.09077301025390624, "step": 10740 }, { "epoch": 0.09290883779647387, "grad_norm": 5.9875831917922255, "learning_rate": 5.968108561846975e-06, "loss": 0.365380859375, "step": 10745 }, { "epoch": 0.09295207131801714, "grad_norm": 1.3336020042932037, "learning_rate": 5.968078927441843e-06, "loss": 0.186395263671875, "step": 10750 }, { "epoch": 0.09299530483956041, "grad_norm": 57.74844853491077, "learning_rate": 5.968049279348194e-06, "loss": 0.405126953125, "step": 10755 }, { "epoch": 0.09303853836110366, "grad_norm": 1.4098635686782715, "learning_rate": 5.968019617566163e-06, "loss": 0.22788848876953124, "step": 10760 }, { "epoch": 0.09308177188264694, "grad_norm": 12.11563577771485, "learning_rate": 5.967989942095889e-06, "loss": 0.118548583984375, "step": 10765 }, { "epoch": 0.09312500540419019, "grad_norm": 21.18175878599244, "learning_rate": 5.967960252937507e-06, "loss": 0.320819091796875, "step": 10770 }, { "epoch": 0.09316823892573346, "grad_norm": 14.596562906008522, "learning_rate": 5.9679305500911544e-06, "loss": 0.108636474609375, "step": 10775 }, { "epoch": 0.09321147244727672, "grad_norm": 118.8760705862863, "learning_rate": 5.967900833556967e-06, "loss": 0.21087188720703126, "step": 10780 }, { "epoch": 0.09325470596881999, "grad_norm": 14.530107090488645, "learning_rate": 5.967871103335086e-06, "loss": 0.256646728515625, "step": 10785 }, { "epoch": 0.09329793949036325, "grad_norm": 1.2444148256417595, "learning_rate": 5.967841359425644e-06, "loss": 0.25800285339355467, "step": 10790 }, { "epoch": 0.09334117301190652, "grad_norm": 3.297646663645346, "learning_rate": 5.96781160182878e-06, "loss": 0.1445526123046875, "step": 10795 }, { "epoch": 0.09338440653344977, "grad_norm": 11.329252305968776, "learning_rate": 5.967781830544631e-06, "loss": 0.2526611328125, "step": 10800 }, { "epoch": 0.09342764005499304, "grad_norm": 18.02388189659987, "learning_rate": 5.967752045573336e-06, "loss": 0.3261688232421875, "step": 10805 }, { "epoch": 0.0934708735765363, "grad_norm": 4.662629584702919, "learning_rate": 5.9677222469150294e-06, "loss": 0.219061279296875, "step": 10810 }, { "epoch": 0.09351410709807957, "grad_norm": 11.995702499573596, "learning_rate": 5.96769243456985e-06, "loss": 0.1838623046875, "step": 10815 }, { "epoch": 0.09355734061962283, "grad_norm": 21.93086782372639, "learning_rate": 5.967662608537936e-06, "loss": 0.2133087158203125, "step": 10820 }, { "epoch": 0.0936005741411661, "grad_norm": 2.701559316373073, "learning_rate": 5.967632768819424e-06, "loss": 0.1073883056640625, "step": 10825 }, { "epoch": 0.09364380766270936, "grad_norm": 51.07935863690244, "learning_rate": 5.967602915414451e-06, "loss": 0.3155517578125, "step": 10830 }, { "epoch": 0.09368704118425263, "grad_norm": 0.5855393830985224, "learning_rate": 5.9675730483231565e-06, "loss": 0.2326202392578125, "step": 10835 }, { "epoch": 0.09373027470579588, "grad_norm": 35.69536798217053, "learning_rate": 5.967543167545677e-06, "loss": 0.26532821655273436, "step": 10840 }, { "epoch": 0.09377350822733915, "grad_norm": 18.641763848456748, "learning_rate": 5.967513273082151e-06, "loss": 0.3183837890625, "step": 10845 }, { "epoch": 0.09381674174888241, "grad_norm": 14.682726127858501, "learning_rate": 5.967483364932716e-06, "loss": 0.08067779541015625, "step": 10850 }, { "epoch": 0.09385997527042568, "grad_norm": 27.337566364464823, "learning_rate": 5.96745344309751e-06, "loss": 0.2781364440917969, "step": 10855 }, { "epoch": 0.09390320879196894, "grad_norm": 17.93366420939978, "learning_rate": 5.967423507576671e-06, "loss": 0.3287506103515625, "step": 10860 }, { "epoch": 0.09394644231351221, "grad_norm": 32.092037174153035, "learning_rate": 5.967393558370335e-06, "loss": 0.36141357421875, "step": 10865 }, { "epoch": 0.09398967583505546, "grad_norm": 0.35643304407525445, "learning_rate": 5.967363595478645e-06, "loss": 0.18786392211914063, "step": 10870 }, { "epoch": 0.09403290935659873, "grad_norm": 13.894300870054485, "learning_rate": 5.9673336189017345e-06, "loss": 0.1823028564453125, "step": 10875 }, { "epoch": 0.09407614287814199, "grad_norm": 29.339778815123818, "learning_rate": 5.967303628639744e-06, "loss": 0.2994140625, "step": 10880 }, { "epoch": 0.09411937639968526, "grad_norm": 15.004984083338309, "learning_rate": 5.967273624692812e-06, "loss": 0.32806396484375, "step": 10885 }, { "epoch": 0.09416260992122852, "grad_norm": 2.7506828572645454, "learning_rate": 5.967243607061075e-06, "loss": 0.0924163818359375, "step": 10890 }, { "epoch": 0.09420584344277179, "grad_norm": 20.34950760900633, "learning_rate": 5.967213575744673e-06, "loss": 0.375299072265625, "step": 10895 }, { "epoch": 0.09424907696431505, "grad_norm": 6.086109686303235, "learning_rate": 5.967183530743745e-06, "loss": 0.20730743408203126, "step": 10900 }, { "epoch": 0.09429231048585832, "grad_norm": 0.8425020707480004, "learning_rate": 5.967153472058428e-06, "loss": 0.18608016967773439, "step": 10905 }, { "epoch": 0.09433554400740157, "grad_norm": 1.6271440861961926, "learning_rate": 5.967123399688861e-06, "loss": 0.24061737060546876, "step": 10910 }, { "epoch": 0.09437877752894484, "grad_norm": 22.074333666445487, "learning_rate": 5.967093313635185e-06, "loss": 0.2891571044921875, "step": 10915 }, { "epoch": 0.0944220110504881, "grad_norm": 11.436213619111916, "learning_rate": 5.967063213897535e-06, "loss": 0.09263458251953124, "step": 10920 }, { "epoch": 0.09446524457203137, "grad_norm": 46.23190384933622, "learning_rate": 5.967033100476053e-06, "loss": 0.21239013671875, "step": 10925 }, { "epoch": 0.09450847809357463, "grad_norm": 43.059544206517174, "learning_rate": 5.9670029733708745e-06, "loss": 0.2544219970703125, "step": 10930 }, { "epoch": 0.0945517116151179, "grad_norm": 25.000070035244253, "learning_rate": 5.9669728325821415e-06, "loss": 0.1285003662109375, "step": 10935 }, { "epoch": 0.09459494513666117, "grad_norm": 2.2330345300568313, "learning_rate": 5.966942678109993e-06, "loss": 0.092156982421875, "step": 10940 }, { "epoch": 0.09463817865820442, "grad_norm": 8.624466288046658, "learning_rate": 5.966912509954566e-06, "loss": 0.19739990234375, "step": 10945 }, { "epoch": 0.0946814121797477, "grad_norm": 42.59232120463804, "learning_rate": 5.966882328116e-06, "loss": 0.32579345703125, "step": 10950 }, { "epoch": 0.09472464570129095, "grad_norm": 14.995741852092388, "learning_rate": 5.966852132594436e-06, "loss": 0.46714324951171876, "step": 10955 }, { "epoch": 0.09476787922283422, "grad_norm": 18.45897391163704, "learning_rate": 5.9668219233900114e-06, "loss": 0.1115570068359375, "step": 10960 }, { "epoch": 0.09481111274437748, "grad_norm": 6.323016974735605, "learning_rate": 5.9667917005028675e-06, "loss": 0.11275482177734375, "step": 10965 }, { "epoch": 0.09485434626592075, "grad_norm": 21.76244314944868, "learning_rate": 5.966761463933141e-06, "loss": 0.304351806640625, "step": 10970 }, { "epoch": 0.094897579787464, "grad_norm": 27.740532471306395, "learning_rate": 5.9667312136809734e-06, "loss": 0.340191650390625, "step": 10975 }, { "epoch": 0.09494081330900728, "grad_norm": 31.18226210171813, "learning_rate": 5.966700949746504e-06, "loss": 0.1060211181640625, "step": 10980 }, { "epoch": 0.09498404683055053, "grad_norm": 14.486319120234988, "learning_rate": 5.966670672129871e-06, "loss": 0.1795745849609375, "step": 10985 }, { "epoch": 0.0950272803520938, "grad_norm": 10.085734096495747, "learning_rate": 5.966640380831216e-06, "loss": 0.116619873046875, "step": 10990 }, { "epoch": 0.09507051387363706, "grad_norm": 2.217603104508091, "learning_rate": 5.966610075850676e-06, "loss": 0.2626251220703125, "step": 10995 }, { "epoch": 0.09511374739518033, "grad_norm": 19.08447800401769, "learning_rate": 5.9665797571883944e-06, "loss": 0.17601394653320312, "step": 11000 }, { "epoch": 0.09515698091672359, "grad_norm": 2.8136060183188802, "learning_rate": 5.966549424844508e-06, "loss": 0.14471054077148438, "step": 11005 }, { "epoch": 0.09520021443826686, "grad_norm": 9.91647264579428, "learning_rate": 5.966519078819158e-06, "loss": 0.40673828125, "step": 11010 }, { "epoch": 0.09524344795981012, "grad_norm": 3.052262003671378, "learning_rate": 5.966488719112484e-06, "loss": 0.17502288818359374, "step": 11015 }, { "epoch": 0.09528668148135339, "grad_norm": 14.021603892203572, "learning_rate": 5.966458345724626e-06, "loss": 0.383050537109375, "step": 11020 }, { "epoch": 0.09532991500289664, "grad_norm": 4.381139778195937, "learning_rate": 5.966427958655725e-06, "loss": 0.026935577392578125, "step": 11025 }, { "epoch": 0.09537314852443991, "grad_norm": 13.715796100567646, "learning_rate": 5.96639755790592e-06, "loss": 0.1532867431640625, "step": 11030 }, { "epoch": 0.09541638204598317, "grad_norm": 6.141865838614443, "learning_rate": 5.9663671434753524e-06, "loss": 0.366839599609375, "step": 11035 }, { "epoch": 0.09545961556752644, "grad_norm": 4.0209271272695535, "learning_rate": 5.96633671536416e-06, "loss": 0.2506229400634766, "step": 11040 }, { "epoch": 0.0955028490890697, "grad_norm": 0.9672819720733797, "learning_rate": 5.966306273572486e-06, "loss": 0.311517333984375, "step": 11045 }, { "epoch": 0.09554608261061297, "grad_norm": 34.20023428344363, "learning_rate": 5.966275818100468e-06, "loss": 0.24703369140625, "step": 11050 }, { "epoch": 0.09558931613215622, "grad_norm": 3.9334120364827063, "learning_rate": 5.9662453489482495e-06, "loss": 0.0628173828125, "step": 11055 }, { "epoch": 0.0956325496536995, "grad_norm": 0.36081490230755603, "learning_rate": 5.96621486611597e-06, "loss": 0.22961769104003907, "step": 11060 }, { "epoch": 0.09567578317524275, "grad_norm": 4.217244888740181, "learning_rate": 5.9661843696037686e-06, "loss": 0.3397979736328125, "step": 11065 }, { "epoch": 0.09571901669678602, "grad_norm": 34.98007200094545, "learning_rate": 5.966153859411787e-06, "loss": 0.798895263671875, "step": 11070 }, { "epoch": 0.09576225021832928, "grad_norm": 3.529745163198932, "learning_rate": 5.9661233355401664e-06, "loss": 0.193487548828125, "step": 11075 }, { "epoch": 0.09580548373987255, "grad_norm": 6.26437981295316, "learning_rate": 5.966092797989046e-06, "loss": 0.212127685546875, "step": 11080 }, { "epoch": 0.0958487172614158, "grad_norm": 30.057568650744606, "learning_rate": 5.966062246758569e-06, "loss": 0.227362060546875, "step": 11085 }, { "epoch": 0.09589195078295908, "grad_norm": 17.948528650364448, "learning_rate": 5.966031681848875e-06, "loss": 0.36948089599609374, "step": 11090 }, { "epoch": 0.09593518430450233, "grad_norm": 14.476412587216323, "learning_rate": 5.966001103260105e-06, "loss": 0.08181724548339844, "step": 11095 }, { "epoch": 0.0959784178260456, "grad_norm": 0.5691132919724332, "learning_rate": 5.965970510992399e-06, "loss": 0.4029083251953125, "step": 11100 }, { "epoch": 0.09602165134758886, "grad_norm": 6.969025835617112, "learning_rate": 5.965939905045899e-06, "loss": 0.07400360107421874, "step": 11105 }, { "epoch": 0.09606488486913213, "grad_norm": 14.283911486244486, "learning_rate": 5.965909285420747e-06, "loss": 0.46168212890625, "step": 11110 }, { "epoch": 0.09610811839067539, "grad_norm": 6.122472483664171, "learning_rate": 5.965878652117083e-06, "loss": 0.29747314453125, "step": 11115 }, { "epoch": 0.09615135191221866, "grad_norm": 5.315216228179876, "learning_rate": 5.965848005135049e-06, "loss": 0.056634521484375, "step": 11120 }, { "epoch": 0.09619458543376193, "grad_norm": 16.252205663044247, "learning_rate": 5.9658173444747865e-06, "loss": 0.4088165283203125, "step": 11125 }, { "epoch": 0.09623781895530518, "grad_norm": 9.6761521958295, "learning_rate": 5.965786670136436e-06, "loss": 0.110498046875, "step": 11130 }, { "epoch": 0.09628105247684846, "grad_norm": 24.973494138348695, "learning_rate": 5.965755982120139e-06, "loss": 0.3236083984375, "step": 11135 }, { "epoch": 0.09632428599839171, "grad_norm": 0.9163245763054789, "learning_rate": 5.965725280426038e-06, "loss": 0.07764701843261719, "step": 11140 }, { "epoch": 0.09636751951993498, "grad_norm": 8.31240659501708, "learning_rate": 5.965694565054274e-06, "loss": 0.10275726318359375, "step": 11145 }, { "epoch": 0.09641075304147824, "grad_norm": 10.158700087783274, "learning_rate": 5.965663836004989e-06, "loss": 0.21644134521484376, "step": 11150 }, { "epoch": 0.09645398656302151, "grad_norm": 5.214776278912615, "learning_rate": 5.965633093278324e-06, "loss": 0.339447021484375, "step": 11155 }, { "epoch": 0.09649722008456477, "grad_norm": 20.37899060243129, "learning_rate": 5.96560233687442e-06, "loss": 0.114215087890625, "step": 11160 }, { "epoch": 0.09654045360610804, "grad_norm": 50.78445699764798, "learning_rate": 5.965571566793423e-06, "loss": 0.1928955078125, "step": 11165 }, { "epoch": 0.0965836871276513, "grad_norm": 9.639242029709798, "learning_rate": 5.96554078303547e-06, "loss": 0.1246307373046875, "step": 11170 }, { "epoch": 0.09662692064919456, "grad_norm": 12.56267411365289, "learning_rate": 5.965509985600706e-06, "loss": 0.10624847412109376, "step": 11175 }, { "epoch": 0.09667015417073782, "grad_norm": 2.3871792793586883, "learning_rate": 5.96547917448927e-06, "loss": 0.062945556640625, "step": 11180 }, { "epoch": 0.09671338769228109, "grad_norm": 4.184982214132474, "learning_rate": 5.965448349701308e-06, "loss": 0.554241943359375, "step": 11185 }, { "epoch": 0.09675662121382435, "grad_norm": 4.959647968038856, "learning_rate": 5.965417511236959e-06, "loss": 0.1071044921875, "step": 11190 }, { "epoch": 0.09679985473536762, "grad_norm": 16.290214695165517, "learning_rate": 5.9653866590963674e-06, "loss": 0.457354736328125, "step": 11195 }, { "epoch": 0.09684308825691088, "grad_norm": 74.46927028692775, "learning_rate": 5.965355793279674e-06, "loss": 0.44307861328125, "step": 11200 }, { "epoch": 0.09688632177845415, "grad_norm": 3.392167966348929, "learning_rate": 5.965324913787022e-06, "loss": 0.13047447204589843, "step": 11205 }, { "epoch": 0.0969295552999974, "grad_norm": 31.41912185076217, "learning_rate": 5.965294020618554e-06, "loss": 0.4222076416015625, "step": 11210 }, { "epoch": 0.09697278882154067, "grad_norm": 9.2048456590762, "learning_rate": 5.9652631137744115e-06, "loss": 0.09909515380859375, "step": 11215 }, { "epoch": 0.09701602234308393, "grad_norm": 41.670796528735586, "learning_rate": 5.965232193254737e-06, "loss": 0.23180694580078126, "step": 11220 }, { "epoch": 0.0970592558646272, "grad_norm": 3.27275427185236, "learning_rate": 5.965201259059675e-06, "loss": 0.229095458984375, "step": 11225 }, { "epoch": 0.09710248938617046, "grad_norm": 5.396139480014384, "learning_rate": 5.965170311189366e-06, "loss": 0.1114654541015625, "step": 11230 }, { "epoch": 0.09714572290771373, "grad_norm": 11.233904686052798, "learning_rate": 5.965139349643954e-06, "loss": 0.07423553466796876, "step": 11235 }, { "epoch": 0.09718895642925698, "grad_norm": 30.741625230980027, "learning_rate": 5.965108374423579e-06, "loss": 0.17598876953125, "step": 11240 }, { "epoch": 0.09723218995080025, "grad_norm": 0.7712586309736568, "learning_rate": 5.965077385528389e-06, "loss": 0.32967529296875, "step": 11245 }, { "epoch": 0.09727542347234351, "grad_norm": 1.4020050685158312, "learning_rate": 5.965046382958522e-06, "loss": 0.253424072265625, "step": 11250 }, { "epoch": 0.09731865699388678, "grad_norm": 1.4878613531832356, "learning_rate": 5.9650153667141255e-06, "loss": 0.2963531494140625, "step": 11255 }, { "epoch": 0.09736189051543004, "grad_norm": 19.46391358808716, "learning_rate": 5.964984336795338e-06, "loss": 0.27333221435546873, "step": 11260 }, { "epoch": 0.09740512403697331, "grad_norm": 12.871955234348793, "learning_rate": 5.964953293202306e-06, "loss": 0.07375640869140625, "step": 11265 }, { "epoch": 0.09744835755851657, "grad_norm": 13.69838145903411, "learning_rate": 5.964922235935171e-06, "loss": 0.12761688232421875, "step": 11270 }, { "epoch": 0.09749159108005984, "grad_norm": 21.08231282159423, "learning_rate": 5.964891164994076e-06, "loss": 0.15586166381835936, "step": 11275 }, { "epoch": 0.09753482460160309, "grad_norm": 24.656234744876976, "learning_rate": 5.964860080379166e-06, "loss": 0.214501953125, "step": 11280 }, { "epoch": 0.09757805812314636, "grad_norm": 4.936137447211636, "learning_rate": 5.964828982090582e-06, "loss": 0.10721435546875, "step": 11285 }, { "epoch": 0.09762129164468962, "grad_norm": 28.301307584331344, "learning_rate": 5.9647978701284705e-06, "loss": 0.45546875, "step": 11290 }, { "epoch": 0.09766452516623289, "grad_norm": 38.27651878032326, "learning_rate": 5.964766744492972e-06, "loss": 0.3871337890625, "step": 11295 }, { "epoch": 0.09770775868777615, "grad_norm": 9.832346583555532, "learning_rate": 5.964735605184231e-06, "loss": 0.12260055541992188, "step": 11300 }, { "epoch": 0.09775099220931942, "grad_norm": 0.8740099454980151, "learning_rate": 5.964704452202391e-06, "loss": 0.522760009765625, "step": 11305 }, { "epoch": 0.09779422573086269, "grad_norm": 43.94663870358126, "learning_rate": 5.964673285547597e-06, "loss": 0.38824462890625, "step": 11310 }, { "epoch": 0.09783745925240594, "grad_norm": 9.46035777735044, "learning_rate": 5.964642105219992e-06, "loss": 0.4113250732421875, "step": 11315 }, { "epoch": 0.09788069277394922, "grad_norm": 34.31436477622707, "learning_rate": 5.964610911219719e-06, "loss": 0.518487548828125, "step": 11320 }, { "epoch": 0.09792392629549247, "grad_norm": 10.72095041833851, "learning_rate": 5.9645797035469234e-06, "loss": 0.2099395751953125, "step": 11325 }, { "epoch": 0.09796715981703574, "grad_norm": 8.34882642226623, "learning_rate": 5.964548482201747e-06, "loss": 0.2945098876953125, "step": 11330 }, { "epoch": 0.098010393338579, "grad_norm": 7.18979275319046, "learning_rate": 5.9645172471843345e-06, "loss": 0.2141021728515625, "step": 11335 }, { "epoch": 0.09805362686012227, "grad_norm": 8.244947278198522, "learning_rate": 5.964485998494831e-06, "loss": 0.167913818359375, "step": 11340 }, { "epoch": 0.09809686038166553, "grad_norm": 18.453706202960202, "learning_rate": 5.964454736133381e-06, "loss": 0.48656463623046875, "step": 11345 }, { "epoch": 0.0981400939032088, "grad_norm": 17.806523395524568, "learning_rate": 5.964423460100127e-06, "loss": 0.195294189453125, "step": 11350 }, { "epoch": 0.09818332742475205, "grad_norm": 8.387358352246638, "learning_rate": 5.964392170395214e-06, "loss": 0.21206512451171874, "step": 11355 }, { "epoch": 0.09822656094629532, "grad_norm": 22.22498089071214, "learning_rate": 5.964360867018786e-06, "loss": 0.34420166015625, "step": 11360 }, { "epoch": 0.09826979446783858, "grad_norm": 4.960428992591674, "learning_rate": 5.964329549970987e-06, "loss": 0.17017822265625, "step": 11365 }, { "epoch": 0.09831302798938185, "grad_norm": 9.238831116963397, "learning_rate": 5.964298219251963e-06, "loss": 0.219183349609375, "step": 11370 }, { "epoch": 0.09835626151092511, "grad_norm": 2.633742063538309, "learning_rate": 5.964266874861857e-06, "loss": 0.11542205810546875, "step": 11375 }, { "epoch": 0.09839949503246838, "grad_norm": 447.6339852007769, "learning_rate": 5.9642355168008155e-06, "loss": 0.4739105224609375, "step": 11380 }, { "epoch": 0.09844272855401164, "grad_norm": 5.640248366076667, "learning_rate": 5.96420414506898e-06, "loss": 0.5085693359375, "step": 11385 }, { "epoch": 0.0984859620755549, "grad_norm": 6.170318486215109, "learning_rate": 5.964172759666498e-06, "loss": 0.14119873046875, "step": 11390 }, { "epoch": 0.09852919559709816, "grad_norm": 28.17107682086224, "learning_rate": 5.964141360593512e-06, "loss": 0.45689697265625, "step": 11395 }, { "epoch": 0.09857242911864143, "grad_norm": 13.600134868871894, "learning_rate": 5.964109947850169e-06, "loss": 0.2526100158691406, "step": 11400 }, { "epoch": 0.09861566264018469, "grad_norm": 6.230089181118798, "learning_rate": 5.964078521436612e-06, "loss": 0.31651153564453127, "step": 11405 }, { "epoch": 0.09865889616172796, "grad_norm": 13.19223877544901, "learning_rate": 5.964047081352987e-06, "loss": 0.3903076171875, "step": 11410 }, { "epoch": 0.09870212968327122, "grad_norm": 29.938598138995523, "learning_rate": 5.96401562759944e-06, "loss": 0.2411041259765625, "step": 11415 }, { "epoch": 0.09874536320481449, "grad_norm": 51.43461178308336, "learning_rate": 5.963984160176113e-06, "loss": 0.2597381591796875, "step": 11420 }, { "epoch": 0.09878859672635774, "grad_norm": 0.6354162718441687, "learning_rate": 5.963952679083154e-06, "loss": 0.1461456298828125, "step": 11425 }, { "epoch": 0.09883183024790101, "grad_norm": 40.956612742644644, "learning_rate": 5.963921184320707e-06, "loss": 0.3413330078125, "step": 11430 }, { "epoch": 0.09887506376944427, "grad_norm": 6.731686269419159, "learning_rate": 5.9638896758889176e-06, "loss": 0.19240951538085938, "step": 11435 }, { "epoch": 0.09891829729098754, "grad_norm": 16.556068163705582, "learning_rate": 5.96385815378793e-06, "loss": 0.17764892578125, "step": 11440 }, { "epoch": 0.0989615308125308, "grad_norm": 14.916319041167007, "learning_rate": 5.963826618017891e-06, "loss": 0.327984619140625, "step": 11445 }, { "epoch": 0.09900476433407407, "grad_norm": 2.4986846299917405, "learning_rate": 5.963795068578946e-06, "loss": 0.154840087890625, "step": 11450 }, { "epoch": 0.09904799785561733, "grad_norm": 6.997906285328299, "learning_rate": 5.96376350547124e-06, "loss": 0.20682621002197266, "step": 11455 }, { "epoch": 0.0990912313771606, "grad_norm": 6.031324785937273, "learning_rate": 5.9637319286949185e-06, "loss": 0.15311698913574218, "step": 11460 }, { "epoch": 0.09913446489870385, "grad_norm": 26.606564216405367, "learning_rate": 5.963700338250127e-06, "loss": 0.385247802734375, "step": 11465 }, { "epoch": 0.09917769842024712, "grad_norm": 4.0171633408301, "learning_rate": 5.9636687341370114e-06, "loss": 0.19349822998046876, "step": 11470 }, { "epoch": 0.09922093194179038, "grad_norm": 50.48041002386682, "learning_rate": 5.963637116355717e-06, "loss": 0.5343170166015625, "step": 11475 }, { "epoch": 0.09926416546333365, "grad_norm": 29.12556895405444, "learning_rate": 5.96360548490639e-06, "loss": 0.222705078125, "step": 11480 }, { "epoch": 0.09930739898487691, "grad_norm": 2.6027265506063295, "learning_rate": 5.963573839789178e-06, "loss": 0.3203216552734375, "step": 11485 }, { "epoch": 0.09935063250642018, "grad_norm": 16.58017714287846, "learning_rate": 5.9635421810042235e-06, "loss": 0.355908203125, "step": 11490 }, { "epoch": 0.09939386602796345, "grad_norm": 27.578400058147697, "learning_rate": 5.963510508551675e-06, "loss": 0.2780754089355469, "step": 11495 }, { "epoch": 0.0994370995495067, "grad_norm": 5.74769411242234, "learning_rate": 5.963478822431679e-06, "loss": 0.22577285766601562, "step": 11500 }, { "epoch": 0.09948033307104998, "grad_norm": 11.22076856671369, "learning_rate": 5.963447122644379e-06, "loss": 0.061647796630859376, "step": 11505 }, { "epoch": 0.09952356659259323, "grad_norm": 6.877039803383222, "learning_rate": 5.963415409189923e-06, "loss": 0.14226951599121093, "step": 11510 }, { "epoch": 0.0995668001141365, "grad_norm": 6.508111388451497, "learning_rate": 5.9633836820684575e-06, "loss": 0.085333251953125, "step": 11515 }, { "epoch": 0.09961003363567976, "grad_norm": 5.647468283362691, "learning_rate": 5.963351941280129e-06, "loss": 0.1194976806640625, "step": 11520 }, { "epoch": 0.09965326715722303, "grad_norm": 10.714478110389798, "learning_rate": 5.9633201868250816e-06, "loss": 0.100177001953125, "step": 11525 }, { "epoch": 0.09969650067876629, "grad_norm": 7.717793071978543, "learning_rate": 5.963288418703464e-06, "loss": 0.30455322265625, "step": 11530 }, { "epoch": 0.09973973420030956, "grad_norm": 1.765557216470102, "learning_rate": 5.9632566369154215e-06, "loss": 0.131201171875, "step": 11535 }, { "epoch": 0.09978296772185281, "grad_norm": 18.6428730752494, "learning_rate": 5.963224841461102e-06, "loss": 0.2486328125, "step": 11540 }, { "epoch": 0.09982620124339608, "grad_norm": 9.710149279014562, "learning_rate": 5.9631930323406505e-06, "loss": 0.19013671875, "step": 11545 }, { "epoch": 0.09986943476493934, "grad_norm": 3.211320167420063, "learning_rate": 5.9631612095542155e-06, "loss": 0.39586944580078126, "step": 11550 }, { "epoch": 0.09991266828648261, "grad_norm": 3.911151582444777, "learning_rate": 5.963129373101942e-06, "loss": 0.074945068359375, "step": 11555 }, { "epoch": 0.09995590180802587, "grad_norm": 2.2543190740731474, "learning_rate": 5.963097522983979e-06, "loss": 0.05845947265625, "step": 11560 }, { "epoch": 0.09999913532956914, "grad_norm": 9.83108162370555, "learning_rate": 5.963065659200471e-06, "loss": 0.20525054931640624, "step": 11565 }, { "epoch": 0.1000423688511124, "grad_norm": 16.760766760258026, "learning_rate": 5.963033781751566e-06, "loss": 0.202923583984375, "step": 11570 }, { "epoch": 0.10008560237265567, "grad_norm": 32.865535576247815, "learning_rate": 5.963001890637411e-06, "loss": 0.150592041015625, "step": 11575 }, { "epoch": 0.10012883589419892, "grad_norm": 1.6594371398304077, "learning_rate": 5.962969985858154e-06, "loss": 0.18271408081054688, "step": 11580 }, { "epoch": 0.1001720694157422, "grad_norm": 6.986121578481402, "learning_rate": 5.962938067413941e-06, "loss": 0.11644287109375, "step": 11585 }, { "epoch": 0.10021530293728545, "grad_norm": 8.465924109768578, "learning_rate": 5.962906135304918e-06, "loss": 0.40005836486816404, "step": 11590 }, { "epoch": 0.10025853645882872, "grad_norm": 2.4312392884938046, "learning_rate": 5.962874189531235e-06, "loss": 0.226409912109375, "step": 11595 }, { "epoch": 0.10030176998037198, "grad_norm": 10.623793599014498, "learning_rate": 5.962842230093037e-06, "loss": 0.18336029052734376, "step": 11600 }, { "epoch": 0.10034500350191525, "grad_norm": 1.3599961389284991, "learning_rate": 5.9628102569904736e-06, "loss": 0.24530181884765626, "step": 11605 }, { "epoch": 0.1003882370234585, "grad_norm": 36.99854286525842, "learning_rate": 5.962778270223691e-06, "loss": 0.3929107666015625, "step": 11610 }, { "epoch": 0.10043147054500177, "grad_norm": 3.774808672257255, "learning_rate": 5.962746269792837e-06, "loss": 0.07096405029296875, "step": 11615 }, { "epoch": 0.10047470406654503, "grad_norm": 1.650418220207846, "learning_rate": 5.962714255698058e-06, "loss": 0.406475830078125, "step": 11620 }, { "epoch": 0.1005179375880883, "grad_norm": 12.688173123193945, "learning_rate": 5.962682227939503e-06, "loss": 0.110980224609375, "step": 11625 }, { "epoch": 0.10056117110963156, "grad_norm": 30.843642758582373, "learning_rate": 5.9626501865173195e-06, "loss": 0.3956298828125, "step": 11630 }, { "epoch": 0.10060440463117483, "grad_norm": 27.83335931064527, "learning_rate": 5.962618131431655e-06, "loss": 0.139202880859375, "step": 11635 }, { "epoch": 0.10064763815271809, "grad_norm": 5.1889605841998065, "learning_rate": 5.962586062682658e-06, "loss": 0.26844940185546873, "step": 11640 }, { "epoch": 0.10069087167426136, "grad_norm": 0.7248540713682983, "learning_rate": 5.962553980270475e-06, "loss": 0.137066650390625, "step": 11645 }, { "epoch": 0.10073410519580461, "grad_norm": 19.44805175594182, "learning_rate": 5.962521884195256e-06, "loss": 0.309515380859375, "step": 11650 }, { "epoch": 0.10077733871734788, "grad_norm": 1.1694489388201315, "learning_rate": 5.962489774457147e-06, "loss": 0.5263885498046875, "step": 11655 }, { "epoch": 0.10082057223889114, "grad_norm": 36.6272559611016, "learning_rate": 5.962457651056297e-06, "loss": 0.140673828125, "step": 11660 }, { "epoch": 0.10086380576043441, "grad_norm": 3.1015341370923903, "learning_rate": 5.9624255139928535e-06, "loss": 0.48671875, "step": 11665 }, { "epoch": 0.10090703928197767, "grad_norm": 9.00063167788232, "learning_rate": 5.962393363266967e-06, "loss": 0.06314697265625, "step": 11670 }, { "epoch": 0.10095027280352094, "grad_norm": 35.7817573531049, "learning_rate": 5.962361198878781e-06, "loss": 0.24818572998046876, "step": 11675 }, { "epoch": 0.10099350632506421, "grad_norm": 22.26534552548358, "learning_rate": 5.96232902082845e-06, "loss": 0.26058197021484375, "step": 11680 }, { "epoch": 0.10103673984660747, "grad_norm": 2.946322483763616, "learning_rate": 5.962296829116118e-06, "loss": 0.23836898803710938, "step": 11685 }, { "epoch": 0.10107997336815074, "grad_norm": 5.077245868675985, "learning_rate": 5.962264623741935e-06, "loss": 0.1366912841796875, "step": 11690 }, { "epoch": 0.10112320688969399, "grad_norm": 0.09004250762199657, "learning_rate": 5.962232404706049e-06, "loss": 0.1607666015625, "step": 11695 }, { "epoch": 0.10116644041123726, "grad_norm": 6.69466389375152, "learning_rate": 5.96220017200861e-06, "loss": 0.20267410278320314, "step": 11700 }, { "epoch": 0.10120967393278052, "grad_norm": 8.814666801521572, "learning_rate": 5.962167925649765e-06, "loss": 0.152728271484375, "step": 11705 }, { "epoch": 0.10125290745432379, "grad_norm": 3.6405115765548097, "learning_rate": 5.9621356656296624e-06, "loss": 0.22076187133789063, "step": 11710 }, { "epoch": 0.10129614097586705, "grad_norm": 6.120857152865463, "learning_rate": 5.962103391948453e-06, "loss": 0.15755615234375, "step": 11715 }, { "epoch": 0.10133937449741032, "grad_norm": 9.01349669195407, "learning_rate": 5.962071104606284e-06, "loss": 0.39583892822265626, "step": 11720 }, { "epoch": 0.10138260801895357, "grad_norm": 10.943696854767362, "learning_rate": 5.962038803603304e-06, "loss": 0.072772216796875, "step": 11725 }, { "epoch": 0.10142584154049684, "grad_norm": 18.932222501256096, "learning_rate": 5.962006488939663e-06, "loss": 0.60211181640625, "step": 11730 }, { "epoch": 0.1014690750620401, "grad_norm": 23.324841717039174, "learning_rate": 5.961974160615511e-06, "loss": 0.219091796875, "step": 11735 }, { "epoch": 0.10151230858358337, "grad_norm": 9.651473192863492, "learning_rate": 5.961941818630995e-06, "loss": 0.15439071655273437, "step": 11740 }, { "epoch": 0.10155554210512663, "grad_norm": 7.795204543914089, "learning_rate": 5.961909462986265e-06, "loss": 0.130181884765625, "step": 11745 }, { "epoch": 0.1015987756266699, "grad_norm": 24.44352098757401, "learning_rate": 5.961877093681471e-06, "loss": 0.191790771484375, "step": 11750 }, { "epoch": 0.10164200914821316, "grad_norm": 24.312274118332805, "learning_rate": 5.961844710716761e-06, "loss": 0.1782958984375, "step": 11755 }, { "epoch": 0.10168524266975643, "grad_norm": 32.53836324152453, "learning_rate": 5.961812314092285e-06, "loss": 0.202874755859375, "step": 11760 }, { "epoch": 0.10172847619129968, "grad_norm": 35.67485736263889, "learning_rate": 5.961779903808192e-06, "loss": 0.201483154296875, "step": 11765 }, { "epoch": 0.10177170971284295, "grad_norm": 3.6992471434811303, "learning_rate": 5.961747479864632e-06, "loss": 0.11949462890625, "step": 11770 }, { "epoch": 0.10181494323438621, "grad_norm": 18.64646026049998, "learning_rate": 5.9617150422617545e-06, "loss": 0.160845947265625, "step": 11775 }, { "epoch": 0.10185817675592948, "grad_norm": 23.60589093764736, "learning_rate": 5.961682590999709e-06, "loss": 0.2531005859375, "step": 11780 }, { "epoch": 0.10190141027747274, "grad_norm": 4.70084856524608, "learning_rate": 5.961650126078644e-06, "loss": 0.1257863998413086, "step": 11785 }, { "epoch": 0.10194464379901601, "grad_norm": 59.19169039921359, "learning_rate": 5.961617647498712e-06, "loss": 0.3987457275390625, "step": 11790 }, { "epoch": 0.10198787732055926, "grad_norm": 12.121379659990552, "learning_rate": 5.9615851552600606e-06, "loss": 0.13670654296875, "step": 11795 }, { "epoch": 0.10203111084210253, "grad_norm": 11.183582643014798, "learning_rate": 5.9615526493628396e-06, "loss": 0.16344451904296875, "step": 11800 }, { "epoch": 0.10207434436364579, "grad_norm": 2.800980876453373, "learning_rate": 5.9615201298072e-06, "loss": 0.26824951171875, "step": 11805 }, { "epoch": 0.10211757788518906, "grad_norm": 35.20799837754747, "learning_rate": 5.961487596593291e-06, "loss": 0.28357696533203125, "step": 11810 }, { "epoch": 0.10216081140673232, "grad_norm": 6.529259035006286, "learning_rate": 5.9614550497212624e-06, "loss": 0.280517578125, "step": 11815 }, { "epoch": 0.10220404492827559, "grad_norm": 23.697494767303073, "learning_rate": 5.961422489191266e-06, "loss": 0.171405029296875, "step": 11820 }, { "epoch": 0.10224727844981885, "grad_norm": 15.306023387514125, "learning_rate": 5.961389915003449e-06, "loss": 0.16708221435546874, "step": 11825 }, { "epoch": 0.10229051197136212, "grad_norm": 17.041585321877857, "learning_rate": 5.961357327157965e-06, "loss": 0.11093215942382813, "step": 11830 }, { "epoch": 0.10233374549290537, "grad_norm": 9.5820216463447, "learning_rate": 5.961324725654962e-06, "loss": 0.6031723022460938, "step": 11835 }, { "epoch": 0.10237697901444864, "grad_norm": 58.13788334889465, "learning_rate": 5.9612921104945905e-06, "loss": 0.18541259765625, "step": 11840 }, { "epoch": 0.1024202125359919, "grad_norm": 0.4028053981577167, "learning_rate": 5.961259481677003e-06, "loss": 0.12896957397460937, "step": 11845 }, { "epoch": 0.10246344605753517, "grad_norm": 20.72937578600682, "learning_rate": 5.961226839202348e-06, "loss": 0.310650634765625, "step": 11850 }, { "epoch": 0.10250667957907843, "grad_norm": 6.664439104800788, "learning_rate": 5.961194183070775e-06, "loss": 0.18602142333984376, "step": 11855 }, { "epoch": 0.1025499131006217, "grad_norm": 37.41292771082435, "learning_rate": 5.961161513282437e-06, "loss": 0.5351669311523437, "step": 11860 }, { "epoch": 0.10259314662216497, "grad_norm": 9.860174381361556, "learning_rate": 5.961128829837483e-06, "loss": 0.16126480102539062, "step": 11865 }, { "epoch": 0.10263638014370823, "grad_norm": 9.110328066974127, "learning_rate": 5.961096132736067e-06, "loss": 0.14526748657226562, "step": 11870 }, { "epoch": 0.1026796136652515, "grad_norm": 7.973950535869868, "learning_rate": 5.961063421978335e-06, "loss": 0.1557159423828125, "step": 11875 }, { "epoch": 0.10272284718679475, "grad_norm": 0.503471457783695, "learning_rate": 5.961030697564441e-06, "loss": 0.17359619140625, "step": 11880 }, { "epoch": 0.10276608070833802, "grad_norm": 13.301017881352925, "learning_rate": 5.960997959494534e-06, "loss": 0.267620849609375, "step": 11885 }, { "epoch": 0.10280931422988128, "grad_norm": 36.00957855447813, "learning_rate": 5.960965207768767e-06, "loss": 0.27962646484375, "step": 11890 }, { "epoch": 0.10285254775142455, "grad_norm": 14.97624415335922, "learning_rate": 5.96093244238729e-06, "loss": 0.2462158203125, "step": 11895 }, { "epoch": 0.1028957812729678, "grad_norm": 0.8727577124573204, "learning_rate": 5.960899663350254e-06, "loss": 0.253350830078125, "step": 11900 }, { "epoch": 0.10293901479451108, "grad_norm": 18.212615763473597, "learning_rate": 5.960866870657811e-06, "loss": 0.48667449951171876, "step": 11905 }, { "epoch": 0.10298224831605433, "grad_norm": 30.90080974080515, "learning_rate": 5.960834064310111e-06, "loss": 0.174053955078125, "step": 11910 }, { "epoch": 0.1030254818375976, "grad_norm": 1.4235719719727162, "learning_rate": 5.9608012443073065e-06, "loss": 0.1605010986328125, "step": 11915 }, { "epoch": 0.10306871535914086, "grad_norm": 1.619071364279314, "learning_rate": 5.960768410649547e-06, "loss": 0.262548828125, "step": 11920 }, { "epoch": 0.10311194888068413, "grad_norm": 82.87874116730684, "learning_rate": 5.960735563336987e-06, "loss": 0.435772705078125, "step": 11925 }, { "epoch": 0.10315518240222739, "grad_norm": 8.488219276706921, "learning_rate": 5.960702702369775e-06, "loss": 0.2079833984375, "step": 11930 }, { "epoch": 0.10319841592377066, "grad_norm": 1.5669815090475796, "learning_rate": 5.960669827748064e-06, "loss": 0.2022064208984375, "step": 11935 }, { "epoch": 0.10324164944531392, "grad_norm": 5.941731132865248, "learning_rate": 5.960636939472005e-06, "loss": 0.2036346435546875, "step": 11940 }, { "epoch": 0.10328488296685719, "grad_norm": 1.3957341170792443, "learning_rate": 5.960604037541751e-06, "loss": 0.07937164306640625, "step": 11945 }, { "epoch": 0.10332811648840044, "grad_norm": 19.11011598855508, "learning_rate": 5.960571121957452e-06, "loss": 0.220263671875, "step": 11950 }, { "epoch": 0.10337135000994371, "grad_norm": 3.454016177238377, "learning_rate": 5.96053819271926e-06, "loss": 0.06893768310546874, "step": 11955 }, { "epoch": 0.10341458353148697, "grad_norm": 20.93127886627543, "learning_rate": 5.960505249827329e-06, "loss": 0.3408447265625, "step": 11960 }, { "epoch": 0.10345781705303024, "grad_norm": 9.177934511649198, "learning_rate": 5.960472293281808e-06, "loss": 0.1470123291015625, "step": 11965 }, { "epoch": 0.1035010505745735, "grad_norm": 23.593874862109985, "learning_rate": 5.960439323082852e-06, "loss": 0.138134765625, "step": 11970 }, { "epoch": 0.10354428409611677, "grad_norm": 37.837175150460915, "learning_rate": 5.960406339230611e-06, "loss": 0.21967010498046874, "step": 11975 }, { "epoch": 0.10358751761766002, "grad_norm": 0.8941325068027005, "learning_rate": 5.960373341725236e-06, "loss": 0.682196044921875, "step": 11980 }, { "epoch": 0.1036307511392033, "grad_norm": 3.7650703801845866, "learning_rate": 5.960340330566882e-06, "loss": 0.07244873046875, "step": 11985 }, { "epoch": 0.10367398466074655, "grad_norm": 2.3848627214522975, "learning_rate": 5.960307305755699e-06, "loss": 0.2591064453125, "step": 11990 }, { "epoch": 0.10371721818228982, "grad_norm": 32.751146128087605, "learning_rate": 5.960274267291841e-06, "loss": 0.710107421875, "step": 11995 }, { "epoch": 0.10376045170383308, "grad_norm": 21.634478284257067, "learning_rate": 5.96024121517546e-06, "loss": 0.2479400634765625, "step": 12000 }, { "epoch": 0.10380368522537635, "grad_norm": 5.953707013181535, "learning_rate": 5.960208149406707e-06, "loss": 0.5508621215820313, "step": 12005 }, { "epoch": 0.1038469187469196, "grad_norm": 2.6921828613207723, "learning_rate": 5.9601750699857365e-06, "loss": 0.1415863037109375, "step": 12010 }, { "epoch": 0.10389015226846288, "grad_norm": 4.262682496092229, "learning_rate": 5.960141976912701e-06, "loss": 0.10245513916015625, "step": 12015 }, { "epoch": 0.10393338579000613, "grad_norm": 0.4752715518472241, "learning_rate": 5.960108870187751e-06, "loss": 0.13344573974609375, "step": 12020 }, { "epoch": 0.1039766193115494, "grad_norm": 20.40452510011408, "learning_rate": 5.96007574981104e-06, "loss": 0.0983154296875, "step": 12025 }, { "epoch": 0.10401985283309266, "grad_norm": 21.69925870041268, "learning_rate": 5.960042615782722e-06, "loss": 0.13265380859375, "step": 12030 }, { "epoch": 0.10406308635463593, "grad_norm": 10.505496579406984, "learning_rate": 5.960009468102949e-06, "loss": 0.055791473388671874, "step": 12035 }, { "epoch": 0.10410631987617919, "grad_norm": 1.2120700610659398, "learning_rate": 5.959976306771873e-06, "loss": 0.5840423583984375, "step": 12040 }, { "epoch": 0.10414955339772246, "grad_norm": 30.043172468822522, "learning_rate": 5.959943131789649e-06, "loss": 0.41475982666015626, "step": 12045 }, { "epoch": 0.10419278691926573, "grad_norm": 35.990571663111865, "learning_rate": 5.959909943156429e-06, "loss": 0.170721435546875, "step": 12050 }, { "epoch": 0.10423602044080899, "grad_norm": 0.4992814655198375, "learning_rate": 5.9598767408723645e-06, "loss": 0.28007659912109373, "step": 12055 }, { "epoch": 0.10427925396235226, "grad_norm": 36.41704063161946, "learning_rate": 5.95984352493761e-06, "loss": 0.3255279541015625, "step": 12060 }, { "epoch": 0.10432248748389551, "grad_norm": 8.48226596130869, "learning_rate": 5.959810295352321e-06, "loss": 0.5492156982421875, "step": 12065 }, { "epoch": 0.10436572100543878, "grad_norm": 52.56244616267764, "learning_rate": 5.959777052116646e-06, "loss": 0.6015678405761719, "step": 12070 }, { "epoch": 0.10440895452698204, "grad_norm": 3.067494226207724, "learning_rate": 5.9597437952307415e-06, "loss": 0.240966796875, "step": 12075 }, { "epoch": 0.10445218804852531, "grad_norm": 15.066147034660668, "learning_rate": 5.959710524694761e-06, "loss": 0.167388916015625, "step": 12080 }, { "epoch": 0.10449542157006857, "grad_norm": 14.561716748054355, "learning_rate": 5.959677240508856e-06, "loss": 0.4870597839355469, "step": 12085 }, { "epoch": 0.10453865509161184, "grad_norm": 3.2698546507098953, "learning_rate": 5.959643942673182e-06, "loss": 0.218017578125, "step": 12090 }, { "epoch": 0.1045818886131551, "grad_norm": 12.19101787332391, "learning_rate": 5.95961063118789e-06, "loss": 0.14471435546875, "step": 12095 }, { "epoch": 0.10462512213469836, "grad_norm": 0.7723888910248614, "learning_rate": 5.959577306053138e-06, "loss": 0.12786865234375, "step": 12100 }, { "epoch": 0.10466835565624162, "grad_norm": 2.473249905508043, "learning_rate": 5.959543967269075e-06, "loss": 0.15948715209960937, "step": 12105 }, { "epoch": 0.10471158917778489, "grad_norm": 11.849522080665047, "learning_rate": 5.959510614835857e-06, "loss": 0.09007759094238281, "step": 12110 }, { "epoch": 0.10475482269932815, "grad_norm": 0.8781729230585562, "learning_rate": 5.959477248753637e-06, "loss": 0.1706329345703125, "step": 12115 }, { "epoch": 0.10479805622087142, "grad_norm": 24.828494889456667, "learning_rate": 5.959443869022571e-06, "loss": 0.192535400390625, "step": 12120 }, { "epoch": 0.10484128974241468, "grad_norm": 18.94239813312701, "learning_rate": 5.95941047564281e-06, "loss": 0.21183929443359376, "step": 12125 }, { "epoch": 0.10488452326395795, "grad_norm": 27.06060932850381, "learning_rate": 5.95937706861451e-06, "loss": 0.355206298828125, "step": 12130 }, { "epoch": 0.1049277567855012, "grad_norm": 6.752373881425471, "learning_rate": 5.959343647937824e-06, "loss": 0.04214019775390625, "step": 12135 }, { "epoch": 0.10497099030704447, "grad_norm": 24.587518650556312, "learning_rate": 5.9593102136129075e-06, "loss": 0.06853179931640625, "step": 12140 }, { "epoch": 0.10501422382858773, "grad_norm": 13.762748490364713, "learning_rate": 5.959276765639913e-06, "loss": 0.1755218505859375, "step": 12145 }, { "epoch": 0.105057457350131, "grad_norm": 14.93963276212016, "learning_rate": 5.9592433040189956e-06, "loss": 0.28036956787109374, "step": 12150 }, { "epoch": 0.10510069087167426, "grad_norm": 35.48816712635642, "learning_rate": 5.95920982875031e-06, "loss": 0.4624656677246094, "step": 12155 }, { "epoch": 0.10514392439321753, "grad_norm": 10.59375877846719, "learning_rate": 5.95917633983401e-06, "loss": 0.215380859375, "step": 12160 }, { "epoch": 0.10518715791476078, "grad_norm": 4.3855620219812, "learning_rate": 5.95914283727025e-06, "loss": 0.1049041748046875, "step": 12165 }, { "epoch": 0.10523039143630405, "grad_norm": 29.917914910431282, "learning_rate": 5.959109321059184e-06, "loss": 0.265399169921875, "step": 12170 }, { "epoch": 0.10527362495784731, "grad_norm": 23.001774705294594, "learning_rate": 5.959075791200969e-06, "loss": 0.2013641357421875, "step": 12175 }, { "epoch": 0.10531685847939058, "grad_norm": 24.16745038210624, "learning_rate": 5.959042247695757e-06, "loss": 0.4964012145996094, "step": 12180 }, { "epoch": 0.10536009200093384, "grad_norm": 4.051811837383696, "learning_rate": 5.959008690543704e-06, "loss": 0.0985260009765625, "step": 12185 }, { "epoch": 0.10540332552247711, "grad_norm": 5.3599138152472, "learning_rate": 5.958975119744963e-06, "loss": 0.3102745056152344, "step": 12190 }, { "epoch": 0.10544655904402037, "grad_norm": 8.277449210870289, "learning_rate": 5.9589415352996915e-06, "loss": 0.1022735595703125, "step": 12195 }, { "epoch": 0.10548979256556364, "grad_norm": 44.85459372892811, "learning_rate": 5.9589079372080424e-06, "loss": 0.49610595703125, "step": 12200 }, { "epoch": 0.1055330260871069, "grad_norm": 14.308739621201003, "learning_rate": 5.958874325470172e-06, "loss": 0.154315185546875, "step": 12205 }, { "epoch": 0.10557625960865016, "grad_norm": 4.190644558069299, "learning_rate": 5.958840700086234e-06, "loss": 0.2715972900390625, "step": 12210 }, { "epoch": 0.10561949313019342, "grad_norm": 0.8828122476536281, "learning_rate": 5.958807061056385e-06, "loss": 0.16395111083984376, "step": 12215 }, { "epoch": 0.10566272665173669, "grad_norm": 11.810342218519935, "learning_rate": 5.958773408380779e-06, "loss": 0.328179931640625, "step": 12220 }, { "epoch": 0.10570596017327995, "grad_norm": 16.00879163708655, "learning_rate": 5.958739742059572e-06, "loss": 0.42861328125, "step": 12225 }, { "epoch": 0.10574919369482322, "grad_norm": 6.573204508044176, "learning_rate": 5.958706062092917e-06, "loss": 0.1602386474609375, "step": 12230 }, { "epoch": 0.10579242721636649, "grad_norm": 15.044751468474809, "learning_rate": 5.958672368480972e-06, "loss": 0.1012603759765625, "step": 12235 }, { "epoch": 0.10583566073790975, "grad_norm": 36.3934077789277, "learning_rate": 5.9586386612238915e-06, "loss": 0.22796630859375, "step": 12240 }, { "epoch": 0.10587889425945302, "grad_norm": 36.49242414005973, "learning_rate": 5.958604940321831e-06, "loss": 0.2229095458984375, "step": 12245 }, { "epoch": 0.10592212778099627, "grad_norm": 9.626079131878504, "learning_rate": 5.958571205774946e-06, "loss": 0.21823959350585936, "step": 12250 }, { "epoch": 0.10596536130253954, "grad_norm": 0.11411544684355648, "learning_rate": 5.958537457583393e-06, "loss": 0.09373550415039063, "step": 12255 }, { "epoch": 0.1060085948240828, "grad_norm": 28.20137108651056, "learning_rate": 5.958503695747325e-06, "loss": 0.3178466796875, "step": 12260 }, { "epoch": 0.10605182834562607, "grad_norm": 27.87750633428315, "learning_rate": 5.9584699202669e-06, "loss": 0.41124839782714845, "step": 12265 }, { "epoch": 0.10609506186716933, "grad_norm": 3.0005554828768743, "learning_rate": 5.958436131142273e-06, "loss": 0.11150436401367188, "step": 12270 }, { "epoch": 0.1061382953887126, "grad_norm": 0.7371447957346965, "learning_rate": 5.9584023283736e-06, "loss": 0.14984664916992188, "step": 12275 }, { "epoch": 0.10618152891025585, "grad_norm": 5.956270694093625, "learning_rate": 5.958368511961037e-06, "loss": 0.28236236572265627, "step": 12280 }, { "epoch": 0.10622476243179912, "grad_norm": 2.3147256021217353, "learning_rate": 5.9583346819047406e-06, "loss": 0.15153579711914061, "step": 12285 }, { "epoch": 0.10626799595334238, "grad_norm": 30.11179813985326, "learning_rate": 5.958300838204864e-06, "loss": 0.41706695556640627, "step": 12290 }, { "epoch": 0.10631122947488565, "grad_norm": 0.4313864330899434, "learning_rate": 5.9582669808615675e-06, "loss": 0.1411346435546875, "step": 12295 }, { "epoch": 0.10635446299642891, "grad_norm": 1.0609615584035768, "learning_rate": 5.958233109875004e-06, "loss": 0.21763687133789061, "step": 12300 }, { "epoch": 0.10639769651797218, "grad_norm": 3.920586313495099, "learning_rate": 5.958199225245331e-06, "loss": 0.48209228515625, "step": 12305 }, { "epoch": 0.10644093003951544, "grad_norm": 3.1171578793629258, "learning_rate": 5.9581653269727045e-06, "loss": 0.4880340576171875, "step": 12310 }, { "epoch": 0.1064841635610587, "grad_norm": 9.126546486334457, "learning_rate": 5.958131415057281e-06, "loss": 0.0609405517578125, "step": 12315 }, { "epoch": 0.10652739708260196, "grad_norm": 9.937274317174825, "learning_rate": 5.958097489499217e-06, "loss": 0.223760986328125, "step": 12320 }, { "epoch": 0.10657063060414523, "grad_norm": 6.394570377122387, "learning_rate": 5.958063550298668e-06, "loss": 0.0718231201171875, "step": 12325 }, { "epoch": 0.10661386412568849, "grad_norm": 1.2085728596143102, "learning_rate": 5.958029597455792e-06, "loss": 0.1781494140625, "step": 12330 }, { "epoch": 0.10665709764723176, "grad_norm": 6.523385759032611, "learning_rate": 5.957995630970745e-06, "loss": 0.0662384033203125, "step": 12335 }, { "epoch": 0.10670033116877502, "grad_norm": 2.7030185141520193, "learning_rate": 5.957961650843682e-06, "loss": 0.38602294921875, "step": 12340 }, { "epoch": 0.10674356469031829, "grad_norm": 1.3360826085828774, "learning_rate": 5.9579276570747625e-06, "loss": 0.236474609375, "step": 12345 }, { "epoch": 0.10678679821186154, "grad_norm": 24.53701458841381, "learning_rate": 5.957893649664142e-06, "loss": 0.1592376708984375, "step": 12350 }, { "epoch": 0.10683003173340481, "grad_norm": 8.552910755504078, "learning_rate": 5.957859628611978e-06, "loss": 0.11676025390625, "step": 12355 }, { "epoch": 0.10687326525494807, "grad_norm": 6.299685430696489, "learning_rate": 5.957825593918425e-06, "loss": 0.146917724609375, "step": 12360 }, { "epoch": 0.10691649877649134, "grad_norm": 17.996226495054124, "learning_rate": 5.957791545583643e-06, "loss": 0.092889404296875, "step": 12365 }, { "epoch": 0.1069597322980346, "grad_norm": 15.679079448887125, "learning_rate": 5.957757483607788e-06, "loss": 0.072808837890625, "step": 12370 }, { "epoch": 0.10700296581957787, "grad_norm": 31.721682269329662, "learning_rate": 5.957723407991016e-06, "loss": 0.1587890625, "step": 12375 }, { "epoch": 0.10704619934112113, "grad_norm": 33.7753845655353, "learning_rate": 5.957689318733486e-06, "loss": 0.2508056640625, "step": 12380 }, { "epoch": 0.1070894328626644, "grad_norm": 72.31117831014895, "learning_rate": 5.957655215835353e-06, "loss": 0.295135498046875, "step": 12385 }, { "epoch": 0.10713266638420765, "grad_norm": 16.504647355771368, "learning_rate": 5.957621099296776e-06, "loss": 0.0836944580078125, "step": 12390 }, { "epoch": 0.10717589990575092, "grad_norm": 20.346117217353413, "learning_rate": 5.957586969117912e-06, "loss": 0.11571807861328125, "step": 12395 }, { "epoch": 0.10721913342729418, "grad_norm": 3.196159736406, "learning_rate": 5.957552825298918e-06, "loss": 0.12113494873046875, "step": 12400 }, { "epoch": 0.10726236694883745, "grad_norm": 42.74099302135066, "learning_rate": 5.957518667839951e-06, "loss": 0.3410614013671875, "step": 12405 }, { "epoch": 0.10730560047038071, "grad_norm": 16.07511230129486, "learning_rate": 5.9574844967411706e-06, "loss": 0.077789306640625, "step": 12410 }, { "epoch": 0.10734883399192398, "grad_norm": 1.1583250078570326, "learning_rate": 5.957450312002733e-06, "loss": 0.04654998779296875, "step": 12415 }, { "epoch": 0.10739206751346725, "grad_norm": 31.266923400183146, "learning_rate": 5.9574161136247955e-06, "loss": 0.17808990478515624, "step": 12420 }, { "epoch": 0.1074353010350105, "grad_norm": 11.482250227107055, "learning_rate": 5.957381901607515e-06, "loss": 0.20477294921875, "step": 12425 }, { "epoch": 0.10747853455655378, "grad_norm": 13.952767729892717, "learning_rate": 5.957347675951053e-06, "loss": 0.08818206787109376, "step": 12430 }, { "epoch": 0.10752176807809703, "grad_norm": 2.336001895196664, "learning_rate": 5.9573134366555626e-06, "loss": 0.23535194396972656, "step": 12435 }, { "epoch": 0.1075650015996403, "grad_norm": 8.634428940366302, "learning_rate": 5.957279183721205e-06, "loss": 0.15908203125, "step": 12440 }, { "epoch": 0.10760823512118356, "grad_norm": 2.078556174059676, "learning_rate": 5.957244917148136e-06, "loss": 0.28960342407226564, "step": 12445 }, { "epoch": 0.10765146864272683, "grad_norm": 12.096028590542831, "learning_rate": 5.9572106369365156e-06, "loss": 0.1212432861328125, "step": 12450 }, { "epoch": 0.10769470216427009, "grad_norm": 51.33479915902762, "learning_rate": 5.957176343086501e-06, "loss": 0.472064208984375, "step": 12455 }, { "epoch": 0.10773793568581336, "grad_norm": 20.39294906284195, "learning_rate": 5.95714203559825e-06, "loss": 0.4736961364746094, "step": 12460 }, { "epoch": 0.10778116920735661, "grad_norm": 59.84375507576566, "learning_rate": 5.957107714471923e-06, "loss": 0.2668975830078125, "step": 12465 }, { "epoch": 0.10782440272889988, "grad_norm": 2.5451675979797668, "learning_rate": 5.957073379707675e-06, "loss": 0.08891487121582031, "step": 12470 }, { "epoch": 0.10786763625044314, "grad_norm": 6.48659838969534, "learning_rate": 5.957039031305666e-06, "loss": 0.282940673828125, "step": 12475 }, { "epoch": 0.10791086977198641, "grad_norm": 20.38357722331134, "learning_rate": 5.957004669266054e-06, "loss": 0.15087890625, "step": 12480 }, { "epoch": 0.10795410329352967, "grad_norm": 7.179524377043161, "learning_rate": 5.9569702935889975e-06, "loss": 0.07814178466796876, "step": 12485 }, { "epoch": 0.10799733681507294, "grad_norm": 4.706046388873745, "learning_rate": 5.956935904274657e-06, "loss": 0.29364013671875, "step": 12490 }, { "epoch": 0.1080405703366162, "grad_norm": 0.08786924052385708, "learning_rate": 5.956901501323187e-06, "loss": 0.114227294921875, "step": 12495 }, { "epoch": 0.10808380385815947, "grad_norm": 1.4450857135649273, "learning_rate": 5.95686708473475e-06, "loss": 0.10728759765625, "step": 12500 }, { "epoch": 0.10812703737970272, "grad_norm": 0.6928847642280505, "learning_rate": 5.956832654509503e-06, "loss": 0.10894775390625, "step": 12505 }, { "epoch": 0.108170270901246, "grad_norm": 27.289021600538877, "learning_rate": 5.956798210647605e-06, "loss": 0.28443145751953125, "step": 12510 }, { "epoch": 0.10821350442278925, "grad_norm": 9.423562793497144, "learning_rate": 5.9567637531492154e-06, "loss": 0.17379150390625, "step": 12515 }, { "epoch": 0.10825673794433252, "grad_norm": 8.538362927638415, "learning_rate": 5.956729282014492e-06, "loss": 0.17109375, "step": 12520 }, { "epoch": 0.10829997146587578, "grad_norm": 0.3103194473092233, "learning_rate": 5.956694797243595e-06, "loss": 0.13835525512695312, "step": 12525 }, { "epoch": 0.10834320498741905, "grad_norm": 5.266395274416729, "learning_rate": 5.956660298836683e-06, "loss": 0.1847991943359375, "step": 12530 }, { "epoch": 0.1083864385089623, "grad_norm": 46.65997208744191, "learning_rate": 5.956625786793915e-06, "loss": 0.2995170593261719, "step": 12535 }, { "epoch": 0.10842967203050558, "grad_norm": 3.1885977639755367, "learning_rate": 5.956591261115449e-06, "loss": 0.30315093994140624, "step": 12540 }, { "epoch": 0.10847290555204883, "grad_norm": 0.7378064894475093, "learning_rate": 5.9565567218014455e-06, "loss": 0.24802703857421876, "step": 12545 }, { "epoch": 0.1085161390735921, "grad_norm": 22.45409069148834, "learning_rate": 5.956522168852065e-06, "loss": 0.21468887329101563, "step": 12550 }, { "epoch": 0.10855937259513536, "grad_norm": 1.052770267266094, "learning_rate": 5.9564876022674635e-06, "loss": 0.077783203125, "step": 12555 }, { "epoch": 0.10860260611667863, "grad_norm": 53.918374713335055, "learning_rate": 5.956453022047804e-06, "loss": 0.3211334228515625, "step": 12560 }, { "epoch": 0.10864583963822189, "grad_norm": 5.970496781643749, "learning_rate": 5.956418428193244e-06, "loss": 0.4175506591796875, "step": 12565 }, { "epoch": 0.10868907315976516, "grad_norm": 1.335286267926237, "learning_rate": 5.956383820703943e-06, "loss": 0.10736198425292968, "step": 12570 }, { "epoch": 0.10873230668130841, "grad_norm": 2.0404533508397598, "learning_rate": 5.956349199580062e-06, "loss": 0.17579193115234376, "step": 12575 }, { "epoch": 0.10877554020285168, "grad_norm": 16.117703705334815, "learning_rate": 5.956314564821759e-06, "loss": 0.1603271484375, "step": 12580 }, { "epoch": 0.10881877372439494, "grad_norm": 5.870443483672114, "learning_rate": 5.956279916429195e-06, "loss": 0.230963134765625, "step": 12585 }, { "epoch": 0.10886200724593821, "grad_norm": 8.56297868469695, "learning_rate": 5.9562452544025286e-06, "loss": 0.18236083984375, "step": 12590 }, { "epoch": 0.10890524076748147, "grad_norm": 17.978527180315396, "learning_rate": 5.95621057874192e-06, "loss": 0.53734130859375, "step": 12595 }, { "epoch": 0.10894847428902474, "grad_norm": 53.00567562450656, "learning_rate": 5.956175889447531e-06, "loss": 0.32432861328125, "step": 12600 }, { "epoch": 0.10899170781056801, "grad_norm": 15.34224689442147, "learning_rate": 5.956141186519518e-06, "loss": 0.256964111328125, "step": 12605 }, { "epoch": 0.10903494133211127, "grad_norm": 30.58082486451185, "learning_rate": 5.956106469958044e-06, "loss": 0.1087646484375, "step": 12610 }, { "epoch": 0.10907817485365454, "grad_norm": 23.686175980635237, "learning_rate": 5.956071739763268e-06, "loss": 0.59501953125, "step": 12615 }, { "epoch": 0.10912140837519779, "grad_norm": 12.67598260499715, "learning_rate": 5.956036995935351e-06, "loss": 0.48013916015625, "step": 12620 }, { "epoch": 0.10916464189674106, "grad_norm": 34.099720383380046, "learning_rate": 5.956002238474452e-06, "loss": 0.265283203125, "step": 12625 }, { "epoch": 0.10920787541828432, "grad_norm": 19.376068607820905, "learning_rate": 5.955967467380732e-06, "loss": 0.1585784912109375, "step": 12630 }, { "epoch": 0.10925110893982759, "grad_norm": 55.62838823250057, "learning_rate": 5.955932682654351e-06, "loss": 0.213922119140625, "step": 12635 }, { "epoch": 0.10929434246137085, "grad_norm": 40.17435256713296, "learning_rate": 5.95589788429547e-06, "loss": 0.1549224853515625, "step": 12640 }, { "epoch": 0.10933757598291412, "grad_norm": 15.48210166978832, "learning_rate": 5.955863072304249e-06, "loss": 0.1380706787109375, "step": 12645 }, { "epoch": 0.10938080950445737, "grad_norm": 12.476592418087575, "learning_rate": 5.9558282466808485e-06, "loss": 0.2277923583984375, "step": 12650 }, { "epoch": 0.10942404302600064, "grad_norm": 49.94069099662398, "learning_rate": 5.95579340742543e-06, "loss": 0.30487060546875, "step": 12655 }, { "epoch": 0.1094672765475439, "grad_norm": 23.309299654085205, "learning_rate": 5.955758554538153e-06, "loss": 0.5010498046875, "step": 12660 }, { "epoch": 0.10951051006908717, "grad_norm": 1.001101954814316, "learning_rate": 5.9557236880191785e-06, "loss": 0.09618682861328125, "step": 12665 }, { "epoch": 0.10955374359063043, "grad_norm": 10.877623447495749, "learning_rate": 5.955688807868668e-06, "loss": 0.08385009765625, "step": 12670 }, { "epoch": 0.1095969771121737, "grad_norm": 2.3444019343282356, "learning_rate": 5.955653914086782e-06, "loss": 0.082427978515625, "step": 12675 }, { "epoch": 0.10964021063371696, "grad_norm": 30.624796745510032, "learning_rate": 5.955619006673681e-06, "loss": 0.43690338134765627, "step": 12680 }, { "epoch": 0.10968344415526023, "grad_norm": 3.709695123438368, "learning_rate": 5.955584085629526e-06, "loss": 0.37744903564453125, "step": 12685 }, { "epoch": 0.10972667767680348, "grad_norm": 17.089967471078364, "learning_rate": 5.955549150954479e-06, "loss": 0.3346588134765625, "step": 12690 }, { "epoch": 0.10976991119834675, "grad_norm": 5.174322950565259, "learning_rate": 5.9555142026487e-06, "loss": 0.2467529296875, "step": 12695 }, { "epoch": 0.10981314471989001, "grad_norm": 8.830534933543657, "learning_rate": 5.9554792407123505e-06, "loss": 0.08062286376953125, "step": 12700 }, { "epoch": 0.10985637824143328, "grad_norm": 26.893801403781012, "learning_rate": 5.955444265145592e-06, "loss": 0.164227294921875, "step": 12705 }, { "epoch": 0.10989961176297654, "grad_norm": 13.738680478165781, "learning_rate": 5.955409275948586e-06, "loss": 0.68111572265625, "step": 12710 }, { "epoch": 0.10994284528451981, "grad_norm": 8.767334511549956, "learning_rate": 5.955374273121493e-06, "loss": 0.09996185302734376, "step": 12715 }, { "epoch": 0.10998607880606306, "grad_norm": 22.111734158027698, "learning_rate": 5.955339256664476e-06, "loss": 0.22621574401855468, "step": 12720 }, { "epoch": 0.11002931232760634, "grad_norm": 17.882124353338636, "learning_rate": 5.9553042265776945e-06, "loss": 0.422393798828125, "step": 12725 }, { "epoch": 0.11007254584914959, "grad_norm": 15.720525343813446, "learning_rate": 5.955269182861311e-06, "loss": 0.3568267822265625, "step": 12730 }, { "epoch": 0.11011577937069286, "grad_norm": 27.163420637431567, "learning_rate": 5.9552341255154875e-06, "loss": 0.7912689208984375, "step": 12735 }, { "epoch": 0.11015901289223612, "grad_norm": 0.18705285938453725, "learning_rate": 5.955199054540386e-06, "loss": 0.03962554931640625, "step": 12740 }, { "epoch": 0.11020224641377939, "grad_norm": 5.969066903984986, "learning_rate": 5.9551639699361665e-06, "loss": 0.13333740234375, "step": 12745 }, { "epoch": 0.11024547993532265, "grad_norm": 4.977989317542804, "learning_rate": 5.955128871702993e-06, "loss": 0.20752983093261718, "step": 12750 }, { "epoch": 0.11028871345686592, "grad_norm": 15.886228031407356, "learning_rate": 5.955093759841026e-06, "loss": 0.720733642578125, "step": 12755 }, { "epoch": 0.11033194697840917, "grad_norm": 40.10741426734803, "learning_rate": 5.9550586343504265e-06, "loss": 0.23146896362304686, "step": 12760 }, { "epoch": 0.11037518049995244, "grad_norm": 52.27637953870251, "learning_rate": 5.9550234952313585e-06, "loss": 0.293731689453125, "step": 12765 }, { "epoch": 0.1104184140214957, "grad_norm": 20.476200744222442, "learning_rate": 5.954988342483984e-06, "loss": 0.279083251953125, "step": 12770 }, { "epoch": 0.11046164754303897, "grad_norm": 29.26137247187255, "learning_rate": 5.954953176108463e-06, "loss": 0.19505996704101564, "step": 12775 }, { "epoch": 0.11050488106458223, "grad_norm": 1.6781801747611185, "learning_rate": 5.95491799610496e-06, "loss": 0.30159759521484375, "step": 12780 }, { "epoch": 0.1105481145861255, "grad_norm": 17.699323540146572, "learning_rate": 5.954882802473636e-06, "loss": 0.23468017578125, "step": 12785 }, { "epoch": 0.11059134810766877, "grad_norm": 15.334231044657379, "learning_rate": 5.9548475952146535e-06, "loss": 0.2248046875, "step": 12790 }, { "epoch": 0.11063458162921203, "grad_norm": 51.89925909236781, "learning_rate": 5.954812374328176e-06, "loss": 0.6193328857421875, "step": 12795 }, { "epoch": 0.1106778151507553, "grad_norm": 22.846369587142696, "learning_rate": 5.954777139814363e-06, "loss": 0.36855087280273435, "step": 12800 }, { "epoch": 0.11072104867229855, "grad_norm": 0.9623682658067356, "learning_rate": 5.95474189167338e-06, "loss": 0.14196395874023438, "step": 12805 }, { "epoch": 0.11076428219384182, "grad_norm": 2.1836780494082144, "learning_rate": 5.954706629905389e-06, "loss": 0.0643096923828125, "step": 12810 }, { "epoch": 0.11080751571538508, "grad_norm": 13.996384661729834, "learning_rate": 5.954671354510551e-06, "loss": 0.5015289306640625, "step": 12815 }, { "epoch": 0.11085074923692835, "grad_norm": 12.742310639179955, "learning_rate": 5.954636065489031e-06, "loss": 0.2370758056640625, "step": 12820 }, { "epoch": 0.11089398275847161, "grad_norm": 4.480968396765759, "learning_rate": 5.954600762840989e-06, "loss": 0.165960693359375, "step": 12825 }, { "epoch": 0.11093721628001488, "grad_norm": 0.3466477294430131, "learning_rate": 5.9545654465665904e-06, "loss": 0.118267822265625, "step": 12830 }, { "epoch": 0.11098044980155813, "grad_norm": 1.766762844309071, "learning_rate": 5.9545301166659965e-06, "loss": 0.15927734375, "step": 12835 }, { "epoch": 0.1110236833231014, "grad_norm": 16.744277948340468, "learning_rate": 5.9544947731393715e-06, "loss": 0.262847900390625, "step": 12840 }, { "epoch": 0.11106691684464466, "grad_norm": 29.759975904138326, "learning_rate": 5.954459415986877e-06, "loss": 0.269940185546875, "step": 12845 }, { "epoch": 0.11111015036618793, "grad_norm": 32.71798075480026, "learning_rate": 5.9544240452086776e-06, "loss": 0.153082275390625, "step": 12850 }, { "epoch": 0.11115338388773119, "grad_norm": 13.676415568032331, "learning_rate": 5.954388660804935e-06, "loss": 0.092047119140625, "step": 12855 }, { "epoch": 0.11119661740927446, "grad_norm": 2.034282913677631, "learning_rate": 5.954353262775813e-06, "loss": 0.1228546142578125, "step": 12860 }, { "epoch": 0.11123985093081772, "grad_norm": 14.864706215220183, "learning_rate": 5.954317851121475e-06, "loss": 0.21545333862304689, "step": 12865 }, { "epoch": 0.11128308445236099, "grad_norm": 24.990393084372457, "learning_rate": 5.954282425842084e-06, "loss": 0.2750885009765625, "step": 12870 }, { "epoch": 0.11132631797390424, "grad_norm": 10.091687928112272, "learning_rate": 5.954246986937803e-06, "loss": 0.1601593017578125, "step": 12875 }, { "epoch": 0.11136955149544751, "grad_norm": 43.45282246279171, "learning_rate": 5.954211534408796e-06, "loss": 0.17961273193359376, "step": 12880 }, { "epoch": 0.11141278501699077, "grad_norm": 6.40559060508729, "learning_rate": 5.954176068255228e-06, "loss": 0.275341796875, "step": 12885 }, { "epoch": 0.11145601853853404, "grad_norm": 24.316235667351638, "learning_rate": 5.954140588477259e-06, "loss": 0.39622802734375, "step": 12890 }, { "epoch": 0.1114992520600773, "grad_norm": 3.920977040390832, "learning_rate": 5.954105095075055e-06, "loss": 0.2016357421875, "step": 12895 }, { "epoch": 0.11154248558162057, "grad_norm": 3.4327067960228823, "learning_rate": 5.9540695880487795e-06, "loss": 0.1202484130859375, "step": 12900 }, { "epoch": 0.11158571910316382, "grad_norm": 22.633794922218925, "learning_rate": 5.954034067398597e-06, "loss": 0.33217849731445315, "step": 12905 }, { "epoch": 0.1116289526247071, "grad_norm": 0.044876444870319, "learning_rate": 5.9539985331246694e-06, "loss": 0.3057838439941406, "step": 12910 }, { "epoch": 0.11167218614625035, "grad_norm": 3.7449405601402956, "learning_rate": 5.9539629852271615e-06, "loss": 0.30372467041015627, "step": 12915 }, { "epoch": 0.11171541966779362, "grad_norm": 13.696605522561608, "learning_rate": 5.953927423706238e-06, "loss": 0.3780029296875, "step": 12920 }, { "epoch": 0.11175865318933688, "grad_norm": 1.0064721165782835, "learning_rate": 5.953891848562062e-06, "loss": 0.25426902770996096, "step": 12925 }, { "epoch": 0.11180188671088015, "grad_norm": 5.0942145766208204, "learning_rate": 5.953856259794798e-06, "loss": 0.04747772216796875, "step": 12930 }, { "epoch": 0.1118451202324234, "grad_norm": 46.19033370040082, "learning_rate": 5.953820657404609e-06, "loss": 0.5302947998046875, "step": 12935 }, { "epoch": 0.11188835375396668, "grad_norm": 11.78249435246842, "learning_rate": 5.953785041391661e-06, "loss": 0.1372955322265625, "step": 12940 }, { "epoch": 0.11193158727550993, "grad_norm": 3.311445109198975, "learning_rate": 5.9537494117561176e-06, "loss": 0.2192138671875, "step": 12945 }, { "epoch": 0.1119748207970532, "grad_norm": 19.48549309410215, "learning_rate": 5.953713768498142e-06, "loss": 0.35552978515625, "step": 12950 }, { "epoch": 0.11201805431859646, "grad_norm": 3.5624229324000694, "learning_rate": 5.9536781116179005e-06, "loss": 0.09289016723632812, "step": 12955 }, { "epoch": 0.11206128784013973, "grad_norm": 12.812068539478478, "learning_rate": 5.953642441115556e-06, "loss": 0.09473648071289062, "step": 12960 }, { "epoch": 0.11210452136168299, "grad_norm": 37.90175639386, "learning_rate": 5.953606756991273e-06, "loss": 0.27135009765625, "step": 12965 }, { "epoch": 0.11214775488322626, "grad_norm": 0.24273297730175786, "learning_rate": 5.953571059245218e-06, "loss": 0.050922393798828125, "step": 12970 }, { "epoch": 0.11219098840476953, "grad_norm": 12.497402204335213, "learning_rate": 5.9535353478775535e-06, "loss": 0.086322021484375, "step": 12975 }, { "epoch": 0.11223422192631279, "grad_norm": 27.764730710876055, "learning_rate": 5.953499622888445e-06, "loss": 0.2442047119140625, "step": 12980 }, { "epoch": 0.11227745544785606, "grad_norm": 44.18545003741528, "learning_rate": 5.953463884278057e-06, "loss": 0.220220947265625, "step": 12985 }, { "epoch": 0.11232068896939931, "grad_norm": 27.611129140992325, "learning_rate": 5.953428132046555e-06, "loss": 0.4059318542480469, "step": 12990 }, { "epoch": 0.11236392249094258, "grad_norm": 6.045093285545886, "learning_rate": 5.953392366194104e-06, "loss": 0.290118408203125, "step": 12995 }, { "epoch": 0.11240715601248584, "grad_norm": 6.760442356205357, "learning_rate": 5.953356586720866e-06, "loss": 0.18145751953125, "step": 13000 }, { "epoch": 0.11245038953402911, "grad_norm": 12.511903948739192, "learning_rate": 5.95332079362701e-06, "loss": 0.264361572265625, "step": 13005 }, { "epoch": 0.11249362305557237, "grad_norm": 27.216801757382534, "learning_rate": 5.953284986912699e-06, "loss": 0.360552978515625, "step": 13010 }, { "epoch": 0.11253685657711564, "grad_norm": 1.5328996139717566, "learning_rate": 5.9532491665781e-06, "loss": 0.32605743408203125, "step": 13015 }, { "epoch": 0.1125800900986589, "grad_norm": 7.912911842165785, "learning_rate": 5.9532133326233744e-06, "loss": 0.08908920288085938, "step": 13020 }, { "epoch": 0.11262332362020216, "grad_norm": 6.527245859035964, "learning_rate": 5.953177485048692e-06, "loss": 0.24391555786132812, "step": 13025 }, { "epoch": 0.11266655714174542, "grad_norm": 29.59196440490035, "learning_rate": 5.953141623854215e-06, "loss": 0.170025634765625, "step": 13030 }, { "epoch": 0.11270979066328869, "grad_norm": 6.31455110952459, "learning_rate": 5.95310574904011e-06, "loss": 0.14867095947265624, "step": 13035 }, { "epoch": 0.11275302418483195, "grad_norm": 4.1941868117037995, "learning_rate": 5.953069860606543e-06, "loss": 0.0916290283203125, "step": 13040 }, { "epoch": 0.11279625770637522, "grad_norm": 23.32977623050154, "learning_rate": 5.953033958553677e-06, "loss": 0.13050155639648436, "step": 13045 }, { "epoch": 0.11283949122791848, "grad_norm": 2.82259738083482, "learning_rate": 5.952998042881681e-06, "loss": 0.03600006103515625, "step": 13050 }, { "epoch": 0.11288272474946175, "grad_norm": 10.093261978660511, "learning_rate": 5.952962113590718e-06, "loss": 0.225238037109375, "step": 13055 }, { "epoch": 0.112925958271005, "grad_norm": 1.2548791681649716, "learning_rate": 5.952926170680954e-06, "loss": 0.2664459228515625, "step": 13060 }, { "epoch": 0.11296919179254827, "grad_norm": 0.9943338843382233, "learning_rate": 5.952890214152556e-06, "loss": 0.0475067138671875, "step": 13065 }, { "epoch": 0.11301242531409153, "grad_norm": 10.720345067818391, "learning_rate": 5.95285424400569e-06, "loss": 0.23472900390625, "step": 13070 }, { "epoch": 0.1130556588356348, "grad_norm": 14.383465492916935, "learning_rate": 5.952818260240521e-06, "loss": 0.2162933349609375, "step": 13075 }, { "epoch": 0.11309889235717806, "grad_norm": 2.6613890529131923, "learning_rate": 5.9527822628572145e-06, "loss": 0.02064361572265625, "step": 13080 }, { "epoch": 0.11314212587872133, "grad_norm": 22.596600363862418, "learning_rate": 5.952746251855938e-06, "loss": 0.326739501953125, "step": 13085 }, { "epoch": 0.11318535940026458, "grad_norm": 1.2710077268234055, "learning_rate": 5.9527102272368555e-06, "loss": 0.07425689697265625, "step": 13090 }, { "epoch": 0.11322859292180786, "grad_norm": 0.23201983392223952, "learning_rate": 5.952674189000136e-06, "loss": 0.22129364013671876, "step": 13095 }, { "epoch": 0.11327182644335111, "grad_norm": 5.780660633096228, "learning_rate": 5.952638137145942e-06, "loss": 0.24470291137695313, "step": 13100 }, { "epoch": 0.11331505996489438, "grad_norm": 30.255044958911046, "learning_rate": 5.952602071674442e-06, "loss": 0.238568115234375, "step": 13105 }, { "epoch": 0.11335829348643764, "grad_norm": 5.848887302252407, "learning_rate": 5.952565992585804e-06, "loss": 0.15269775390625, "step": 13110 }, { "epoch": 0.11340152700798091, "grad_norm": 25.345377779285414, "learning_rate": 5.952529899880191e-06, "loss": 0.078436279296875, "step": 13115 }, { "epoch": 0.11344476052952417, "grad_norm": 32.80904740692052, "learning_rate": 5.952493793557771e-06, "loss": 0.210040283203125, "step": 13120 }, { "epoch": 0.11348799405106744, "grad_norm": 18.27826762453483, "learning_rate": 5.952457673618711e-06, "loss": 0.4144561767578125, "step": 13125 }, { "epoch": 0.1135312275726107, "grad_norm": 18.950378752527797, "learning_rate": 5.952421540063177e-06, "loss": 0.22125091552734374, "step": 13130 }, { "epoch": 0.11357446109415396, "grad_norm": 31.13113085245855, "learning_rate": 5.9523853928913365e-06, "loss": 0.40699615478515627, "step": 13135 }, { "epoch": 0.11361769461569722, "grad_norm": 0.14954078799188278, "learning_rate": 5.9523492321033546e-06, "loss": 0.17119140625, "step": 13140 }, { "epoch": 0.11366092813724049, "grad_norm": 13.260425592100422, "learning_rate": 5.952313057699399e-06, "loss": 0.11300048828125, "step": 13145 }, { "epoch": 0.11370416165878375, "grad_norm": 6.200713329179805, "learning_rate": 5.952276869679636e-06, "loss": 0.41906585693359377, "step": 13150 }, { "epoch": 0.11374739518032702, "grad_norm": 20.615863047162772, "learning_rate": 5.952240668044234e-06, "loss": 0.5018600463867188, "step": 13155 }, { "epoch": 0.11379062870187029, "grad_norm": 9.635565822904216, "learning_rate": 5.952204452793358e-06, "loss": 0.11567611694335937, "step": 13160 }, { "epoch": 0.11383386222341355, "grad_norm": 22.05208128920924, "learning_rate": 5.952168223927177e-06, "loss": 0.5306877136230469, "step": 13165 }, { "epoch": 0.11387709574495682, "grad_norm": 0.7585443419931235, "learning_rate": 5.952131981445856e-06, "loss": 0.5132888793945313, "step": 13170 }, { "epoch": 0.11392032926650007, "grad_norm": 1.0722643181379086, "learning_rate": 5.9520957253495635e-06, "loss": 0.16079025268554686, "step": 13175 }, { "epoch": 0.11396356278804334, "grad_norm": 33.81598290370876, "learning_rate": 5.952059455638466e-06, "loss": 0.451275634765625, "step": 13180 }, { "epoch": 0.1140067963095866, "grad_norm": 25.92116384071318, "learning_rate": 5.952023172312731e-06, "loss": 0.14677734375, "step": 13185 }, { "epoch": 0.11405002983112987, "grad_norm": 18.112895237976016, "learning_rate": 5.9519868753725275e-06, "loss": 0.14207916259765624, "step": 13190 }, { "epoch": 0.11409326335267313, "grad_norm": 3.3900082247105896, "learning_rate": 5.95195056481802e-06, "loss": 0.0764373779296875, "step": 13195 }, { "epoch": 0.1141364968742164, "grad_norm": 1.4500098361067297, "learning_rate": 5.9519142406493775e-06, "loss": 0.144830322265625, "step": 13200 }, { "epoch": 0.11417973039575965, "grad_norm": 6.4737510634839115, "learning_rate": 5.9518779028667665e-06, "loss": 0.12152099609375, "step": 13205 }, { "epoch": 0.11422296391730292, "grad_norm": 23.060380116270753, "learning_rate": 5.951841551470357e-06, "loss": 0.14736328125, "step": 13210 }, { "epoch": 0.11426619743884618, "grad_norm": 2.4828072109754022, "learning_rate": 5.9518051864603144e-06, "loss": 0.43280029296875, "step": 13215 }, { "epoch": 0.11430943096038945, "grad_norm": 2.3239494672489993, "learning_rate": 5.951768807836808e-06, "loss": 0.30771484375, "step": 13220 }, { "epoch": 0.11435266448193271, "grad_norm": 12.650303806808838, "learning_rate": 5.951732415600004e-06, "loss": 0.27330322265625, "step": 13225 }, { "epoch": 0.11439589800347598, "grad_norm": 34.04212441775144, "learning_rate": 5.95169600975007e-06, "loss": 0.2455352783203125, "step": 13230 }, { "epoch": 0.11443913152501924, "grad_norm": 17.61570596030048, "learning_rate": 5.951659590287176e-06, "loss": 0.12211990356445312, "step": 13235 }, { "epoch": 0.1144823650465625, "grad_norm": 38.04164242004995, "learning_rate": 5.951623157211489e-06, "loss": 0.32288818359375, "step": 13240 }, { "epoch": 0.11452559856810576, "grad_norm": 3.965211371924213, "learning_rate": 5.951586710523176e-06, "loss": 0.057689666748046875, "step": 13245 }, { "epoch": 0.11456883208964903, "grad_norm": 1.7081870976947051, "learning_rate": 5.951550250222405e-06, "loss": 0.13840789794921876, "step": 13250 }, { "epoch": 0.11461206561119229, "grad_norm": 1.1424957890472929, "learning_rate": 5.951513776309347e-06, "loss": 0.16192626953125, "step": 13255 }, { "epoch": 0.11465529913273556, "grad_norm": 60.6265810809121, "learning_rate": 5.951477288784167e-06, "loss": 0.329803466796875, "step": 13260 }, { "epoch": 0.11469853265427882, "grad_norm": 9.034787375619835, "learning_rate": 5.951440787647035e-06, "loss": 0.084234619140625, "step": 13265 }, { "epoch": 0.11474176617582209, "grad_norm": 8.890598063830044, "learning_rate": 5.95140427289812e-06, "loss": 0.427374267578125, "step": 13270 }, { "epoch": 0.11478499969736534, "grad_norm": 14.679932649377523, "learning_rate": 5.9513677445375874e-06, "loss": 0.25331573486328124, "step": 13275 }, { "epoch": 0.11482823321890862, "grad_norm": 25.36348748645272, "learning_rate": 5.951331202565608e-06, "loss": 0.390069580078125, "step": 13280 }, { "epoch": 0.11487146674045187, "grad_norm": 23.65873079585341, "learning_rate": 5.95129464698235e-06, "loss": 0.089276123046875, "step": 13285 }, { "epoch": 0.11491470026199514, "grad_norm": 1.9099432003992973, "learning_rate": 5.951258077787982e-06, "loss": 0.16594619750976564, "step": 13290 }, { "epoch": 0.1149579337835384, "grad_norm": 1.3670667622422181, "learning_rate": 5.951221494982673e-06, "loss": 0.1529205322265625, "step": 13295 }, { "epoch": 0.11500116730508167, "grad_norm": 23.348887554814365, "learning_rate": 5.951184898566591e-06, "loss": 0.2866851806640625, "step": 13300 }, { "epoch": 0.11504440082662493, "grad_norm": 23.156000215969563, "learning_rate": 5.951148288539905e-06, "loss": 0.4106536865234375, "step": 13305 }, { "epoch": 0.1150876343481682, "grad_norm": 5.2703355395925175, "learning_rate": 5.951111664902783e-06, "loss": 0.484307861328125, "step": 13310 }, { "epoch": 0.11513086786971145, "grad_norm": 37.72704758444498, "learning_rate": 5.951075027655395e-06, "loss": 0.1615081787109375, "step": 13315 }, { "epoch": 0.11517410139125472, "grad_norm": 34.96714032105794, "learning_rate": 5.951038376797911e-06, "loss": 0.3218505859375, "step": 13320 }, { "epoch": 0.11521733491279798, "grad_norm": 34.51811401269841, "learning_rate": 5.9510017123304976e-06, "loss": 0.812158203125, "step": 13325 }, { "epoch": 0.11526056843434125, "grad_norm": 8.101129558112467, "learning_rate": 5.950965034253326e-06, "loss": 0.4771484375, "step": 13330 }, { "epoch": 0.11530380195588451, "grad_norm": 68.69360152516997, "learning_rate": 5.950928342566563e-06, "loss": 0.37457275390625, "step": 13335 }, { "epoch": 0.11534703547742778, "grad_norm": 12.153654215816339, "learning_rate": 5.95089163727038e-06, "loss": 0.3000244140625, "step": 13340 }, { "epoch": 0.11539026899897105, "grad_norm": 17.90314381418786, "learning_rate": 5.950854918364946e-06, "loss": 0.5798828125, "step": 13345 }, { "epoch": 0.1154335025205143, "grad_norm": 43.26473710968844, "learning_rate": 5.95081818585043e-06, "loss": 0.47142333984375, "step": 13350 }, { "epoch": 0.11547673604205758, "grad_norm": 12.813537655155478, "learning_rate": 5.950781439727001e-06, "loss": 0.3190399169921875, "step": 13355 }, { "epoch": 0.11551996956360083, "grad_norm": 12.980772707202837, "learning_rate": 5.950744679994828e-06, "loss": 0.19301605224609375, "step": 13360 }, { "epoch": 0.1155632030851441, "grad_norm": 8.370792603465182, "learning_rate": 5.9507079066540815e-06, "loss": 0.0705596923828125, "step": 13365 }, { "epoch": 0.11560643660668736, "grad_norm": 17.75758948702746, "learning_rate": 5.950671119704931e-06, "loss": 0.36999359130859377, "step": 13370 }, { "epoch": 0.11564967012823063, "grad_norm": 9.46179890431785, "learning_rate": 5.9506343191475454e-06, "loss": 0.509527587890625, "step": 13375 }, { "epoch": 0.11569290364977389, "grad_norm": 4.0928550891456625, "learning_rate": 5.950597504982096e-06, "loss": 0.187103271484375, "step": 13380 }, { "epoch": 0.11573613717131716, "grad_norm": 10.746195986024556, "learning_rate": 5.950560677208752e-06, "loss": 0.11446189880371094, "step": 13385 }, { "epoch": 0.11577937069286041, "grad_norm": 11.819796992403763, "learning_rate": 5.950523835827682e-06, "loss": 0.153131103515625, "step": 13390 }, { "epoch": 0.11582260421440368, "grad_norm": 2.066051057296743, "learning_rate": 5.950486980839057e-06, "loss": 0.1314788818359375, "step": 13395 }, { "epoch": 0.11586583773594694, "grad_norm": 25.85283940289774, "learning_rate": 5.950450112243046e-06, "loss": 0.15613861083984376, "step": 13400 }, { "epoch": 0.11590907125749021, "grad_norm": 8.299113568241706, "learning_rate": 5.950413230039821e-06, "loss": 0.3007293701171875, "step": 13405 }, { "epoch": 0.11595230477903347, "grad_norm": 128.19845261176147, "learning_rate": 5.95037633422955e-06, "loss": 0.2497406005859375, "step": 13410 }, { "epoch": 0.11599553830057674, "grad_norm": 25.00122727517647, "learning_rate": 5.950339424812405e-06, "loss": 0.1418609619140625, "step": 13415 }, { "epoch": 0.11603877182212, "grad_norm": 9.244621258677848, "learning_rate": 5.950302501788555e-06, "loss": 0.078594970703125, "step": 13420 }, { "epoch": 0.11608200534366327, "grad_norm": 23.255400932200615, "learning_rate": 5.95026556515817e-06, "loss": 0.1132720947265625, "step": 13425 }, { "epoch": 0.11612523886520652, "grad_norm": 8.02338768663515, "learning_rate": 5.95022861492142e-06, "loss": 0.308233642578125, "step": 13430 }, { "epoch": 0.1161684723867498, "grad_norm": 14.991222836933915, "learning_rate": 5.950191651078478e-06, "loss": 0.168505859375, "step": 13435 }, { "epoch": 0.11621170590829305, "grad_norm": 20.378370028054324, "learning_rate": 5.950154673629513e-06, "loss": 0.1874176025390625, "step": 13440 }, { "epoch": 0.11625493942983632, "grad_norm": 18.09655933946649, "learning_rate": 5.9501176825746945e-06, "loss": 0.154779052734375, "step": 13445 }, { "epoch": 0.11629817295137958, "grad_norm": 15.766242591382222, "learning_rate": 5.9500806779141934e-06, "loss": 0.24238967895507812, "step": 13450 }, { "epoch": 0.11634140647292285, "grad_norm": 1.2608222541043714, "learning_rate": 5.9500436596481825e-06, "loss": 0.241510009765625, "step": 13455 }, { "epoch": 0.1163846399944661, "grad_norm": 7.37590767209123, "learning_rate": 5.950006627776829e-06, "loss": 0.219110107421875, "step": 13460 }, { "epoch": 0.11642787351600938, "grad_norm": 16.15850681435082, "learning_rate": 5.949969582300307e-06, "loss": 0.23169403076171874, "step": 13465 }, { "epoch": 0.11647110703755263, "grad_norm": 12.56484942272245, "learning_rate": 5.949932523218786e-06, "loss": 0.09032745361328125, "step": 13470 }, { "epoch": 0.1165143405590959, "grad_norm": 1.6699268166981707, "learning_rate": 5.949895450532435e-06, "loss": 0.10404815673828124, "step": 13475 }, { "epoch": 0.11655757408063916, "grad_norm": 3.4073612771361588, "learning_rate": 5.9498583642414295e-06, "loss": 0.080938720703125, "step": 13480 }, { "epoch": 0.11660080760218243, "grad_norm": 43.91224897876333, "learning_rate": 5.949821264345936e-06, "loss": 0.3005401611328125, "step": 13485 }, { "epoch": 0.11664404112372569, "grad_norm": 2.4578992212647774, "learning_rate": 5.949784150846128e-06, "loss": 0.57674560546875, "step": 13490 }, { "epoch": 0.11668727464526896, "grad_norm": 10.78978094737179, "learning_rate": 5.949747023742176e-06, "loss": 0.31781005859375, "step": 13495 }, { "epoch": 0.11673050816681221, "grad_norm": 28.408191414219598, "learning_rate": 5.949709883034252e-06, "loss": 0.182061767578125, "step": 13500 }, { "epoch": 0.11677374168835548, "grad_norm": 0.5044550669022092, "learning_rate": 5.949672728722526e-06, "loss": 0.30053863525390623, "step": 13505 }, { "epoch": 0.11681697520989874, "grad_norm": 43.036058244344915, "learning_rate": 5.94963556080717e-06, "loss": 0.2886322021484375, "step": 13510 }, { "epoch": 0.11686020873144201, "grad_norm": 50.84204816703431, "learning_rate": 5.949598379288356e-06, "loss": 0.5115150451660156, "step": 13515 }, { "epoch": 0.11690344225298527, "grad_norm": 21.678503853366966, "learning_rate": 5.949561184166254e-06, "loss": 0.12724456787109376, "step": 13520 }, { "epoch": 0.11694667577452854, "grad_norm": 27.06631877417214, "learning_rate": 5.949523975441037e-06, "loss": 0.25278167724609374, "step": 13525 }, { "epoch": 0.11698990929607181, "grad_norm": 19.622010506497478, "learning_rate": 5.9494867531128765e-06, "loss": 0.3244762420654297, "step": 13530 }, { "epoch": 0.11703314281761507, "grad_norm": 13.415140822181002, "learning_rate": 5.949449517181943e-06, "loss": 0.13793859481811524, "step": 13535 }, { "epoch": 0.11707637633915834, "grad_norm": 17.871631070823238, "learning_rate": 5.9494122676484095e-06, "loss": 0.3647552490234375, "step": 13540 }, { "epoch": 0.11711960986070159, "grad_norm": 24.708506869574684, "learning_rate": 5.949375004512447e-06, "loss": 0.16846923828125, "step": 13545 }, { "epoch": 0.11716284338224486, "grad_norm": 34.02261216651143, "learning_rate": 5.949337727774228e-06, "loss": 0.252703857421875, "step": 13550 }, { "epoch": 0.11720607690378812, "grad_norm": 0.8196414180604751, "learning_rate": 5.949300437433924e-06, "loss": 0.0651947021484375, "step": 13555 }, { "epoch": 0.11724931042533139, "grad_norm": 19.354596320242567, "learning_rate": 5.949263133491706e-06, "loss": 0.61287841796875, "step": 13560 }, { "epoch": 0.11729254394687465, "grad_norm": 2.9881438221250383, "learning_rate": 5.949225815947748e-06, "loss": 0.295343017578125, "step": 13565 }, { "epoch": 0.11733577746841792, "grad_norm": 1.5235343058685624, "learning_rate": 5.94918848480222e-06, "loss": 0.07382888793945312, "step": 13570 }, { "epoch": 0.11737901098996117, "grad_norm": 10.614219480289117, "learning_rate": 5.949151140055296e-06, "loss": 0.29315185546875, "step": 13575 }, { "epoch": 0.11742224451150444, "grad_norm": 4.417950195371763, "learning_rate": 5.949113781707147e-06, "loss": 0.206976318359375, "step": 13580 }, { "epoch": 0.1174654780330477, "grad_norm": 26.006551243472764, "learning_rate": 5.949076409757947e-06, "loss": 0.49956550598144533, "step": 13585 }, { "epoch": 0.11750871155459097, "grad_norm": 0.4685326031678949, "learning_rate": 5.949039024207867e-06, "loss": 0.07391128540039063, "step": 13590 }, { "epoch": 0.11755194507613423, "grad_norm": 0.5773808243903257, "learning_rate": 5.949001625057079e-06, "loss": 0.25977783203125, "step": 13595 }, { "epoch": 0.1175951785976775, "grad_norm": 0.6390091463925853, "learning_rate": 5.948964212305756e-06, "loss": 0.1332843780517578, "step": 13600 }, { "epoch": 0.11763841211922076, "grad_norm": 2.4119825730972484, "learning_rate": 5.948926785954071e-06, "loss": 0.1530517578125, "step": 13605 }, { "epoch": 0.11768164564076403, "grad_norm": 14.533662485773736, "learning_rate": 5.9488893460021955e-06, "loss": 0.0958282470703125, "step": 13610 }, { "epoch": 0.11772487916230728, "grad_norm": 33.263169952602325, "learning_rate": 5.948851892450304e-06, "loss": 0.196099853515625, "step": 13615 }, { "epoch": 0.11776811268385055, "grad_norm": 0.8364355114670619, "learning_rate": 5.948814425298567e-06, "loss": 0.2344635009765625, "step": 13620 }, { "epoch": 0.11781134620539381, "grad_norm": 6.092294870168676, "learning_rate": 5.9487769445471595e-06, "loss": 0.3187976837158203, "step": 13625 }, { "epoch": 0.11785457972693708, "grad_norm": 6.198622122356674, "learning_rate": 5.9487394501962525e-06, "loss": 0.2656768798828125, "step": 13630 }, { "epoch": 0.11789781324848034, "grad_norm": 4.034893335415473, "learning_rate": 5.948701942246021e-06, "loss": 0.156646728515625, "step": 13635 }, { "epoch": 0.11794104677002361, "grad_norm": 14.77443975407244, "learning_rate": 5.948664420696635e-06, "loss": 0.1833770751953125, "step": 13640 }, { "epoch": 0.11798428029156686, "grad_norm": 18.73526096950516, "learning_rate": 5.94862688554827e-06, "loss": 0.30411376953125, "step": 13645 }, { "epoch": 0.11802751381311014, "grad_norm": 5.979221185424452, "learning_rate": 5.948589336801099e-06, "loss": 0.45250473022460935, "step": 13650 }, { "epoch": 0.11807074733465339, "grad_norm": 24.03896559237069, "learning_rate": 5.948551774455294e-06, "loss": 0.286236572265625, "step": 13655 }, { "epoch": 0.11811398085619666, "grad_norm": 17.134483505062683, "learning_rate": 5.9485141985110285e-06, "loss": 0.1561767578125, "step": 13660 }, { "epoch": 0.11815721437773992, "grad_norm": 3.021760715698225, "learning_rate": 5.948476608968476e-06, "loss": 0.08900222778320313, "step": 13665 }, { "epoch": 0.11820044789928319, "grad_norm": 0.7224295062000115, "learning_rate": 5.948439005827809e-06, "loss": 0.04128341674804688, "step": 13670 }, { "epoch": 0.11824368142082645, "grad_norm": 42.81612698030383, "learning_rate": 5.948401389089204e-06, "loss": 0.20619964599609375, "step": 13675 }, { "epoch": 0.11828691494236972, "grad_norm": 4.333093077196887, "learning_rate": 5.9483637587528316e-06, "loss": 0.08975830078125, "step": 13680 }, { "epoch": 0.11833014846391297, "grad_norm": 54.3226340558362, "learning_rate": 5.948326114818866e-06, "loss": 0.5503662109375, "step": 13685 }, { "epoch": 0.11837338198545624, "grad_norm": 31.572950946451797, "learning_rate": 5.948288457287479e-06, "loss": 0.316851806640625, "step": 13690 }, { "epoch": 0.1184166155069995, "grad_norm": 13.647870192661562, "learning_rate": 5.948250786158849e-06, "loss": 0.227532958984375, "step": 13695 }, { "epoch": 0.11845984902854277, "grad_norm": 32.534328311551924, "learning_rate": 5.948213101433146e-06, "loss": 0.209228515625, "step": 13700 }, { "epoch": 0.11850308255008603, "grad_norm": 5.79812160396483, "learning_rate": 5.948175403110544e-06, "loss": 0.02569427490234375, "step": 13705 }, { "epoch": 0.1185463160716293, "grad_norm": 14.415694456087168, "learning_rate": 5.948137691191217e-06, "loss": 0.1663299560546875, "step": 13710 }, { "epoch": 0.11858954959317257, "grad_norm": 3.6888600794957855, "learning_rate": 5.948099965675341e-06, "loss": 0.21384124755859374, "step": 13715 }, { "epoch": 0.11863278311471583, "grad_norm": 9.935897589632269, "learning_rate": 5.948062226563088e-06, "loss": 0.1625335693359375, "step": 13720 }, { "epoch": 0.1186760166362591, "grad_norm": 4.248459018010456, "learning_rate": 5.9480244738546315e-06, "loss": 0.17657394409179689, "step": 13725 }, { "epoch": 0.11871925015780235, "grad_norm": 33.57655902280627, "learning_rate": 5.947986707550148e-06, "loss": 0.31884765625, "step": 13730 }, { "epoch": 0.11876248367934562, "grad_norm": 3.2563050806020213, "learning_rate": 5.9479489276498085e-06, "loss": 0.149017333984375, "step": 13735 }, { "epoch": 0.11880571720088888, "grad_norm": 6.135157343192234, "learning_rate": 5.947911134153791e-06, "loss": 0.5144332885742188, "step": 13740 }, { "epoch": 0.11884895072243215, "grad_norm": 13.677224094128656, "learning_rate": 5.947873327062267e-06, "loss": 0.37874755859375, "step": 13745 }, { "epoch": 0.11889218424397541, "grad_norm": 2.475153506219695, "learning_rate": 5.947835506375412e-06, "loss": 0.1259521484375, "step": 13750 }, { "epoch": 0.11893541776551868, "grad_norm": 32.71727962575067, "learning_rate": 5.9477976720934e-06, "loss": 0.192919921875, "step": 13755 }, { "epoch": 0.11897865128706193, "grad_norm": 38.971154528130064, "learning_rate": 5.947759824216404e-06, "loss": 0.2991546630859375, "step": 13760 }, { "epoch": 0.1190218848086052, "grad_norm": 16.5333439722211, "learning_rate": 5.947721962744603e-06, "loss": 0.1004638671875, "step": 13765 }, { "epoch": 0.11906511833014846, "grad_norm": 0.8750285115742303, "learning_rate": 5.947684087678167e-06, "loss": 0.2514251708984375, "step": 13770 }, { "epoch": 0.11910835185169173, "grad_norm": 14.820688816161748, "learning_rate": 5.947646199017273e-06, "loss": 0.3454315185546875, "step": 13775 }, { "epoch": 0.11915158537323499, "grad_norm": 7.598727932841654, "learning_rate": 5.947608296762095e-06, "loss": 0.28279571533203124, "step": 13780 }, { "epoch": 0.11919481889477826, "grad_norm": 6.922588595906437, "learning_rate": 5.947570380912809e-06, "loss": 0.05346488952636719, "step": 13785 }, { "epoch": 0.11923805241632152, "grad_norm": 6.09169959842249, "learning_rate": 5.947532451469587e-06, "loss": 0.07196197509765626, "step": 13790 }, { "epoch": 0.11928128593786479, "grad_norm": 26.027437294512424, "learning_rate": 5.947494508432606e-06, "loss": 0.5828323364257812, "step": 13795 }, { "epoch": 0.11932451945940804, "grad_norm": 14.624170114282718, "learning_rate": 5.947456551802042e-06, "loss": 0.3032264709472656, "step": 13800 }, { "epoch": 0.11936775298095131, "grad_norm": 9.32711998993455, "learning_rate": 5.947418581578068e-06, "loss": 0.1793914794921875, "step": 13805 }, { "epoch": 0.11941098650249457, "grad_norm": 0.505901138069074, "learning_rate": 5.947380597760861e-06, "loss": 0.29556808471679685, "step": 13810 }, { "epoch": 0.11945422002403784, "grad_norm": 8.972097296701921, "learning_rate": 5.947342600350594e-06, "loss": 0.1718414306640625, "step": 13815 }, { "epoch": 0.1194974535455811, "grad_norm": 4.94322760586074, "learning_rate": 5.947304589347443e-06, "loss": 0.133123779296875, "step": 13820 }, { "epoch": 0.11954068706712437, "grad_norm": 0.7444262171600347, "learning_rate": 5.947266564751585e-06, "loss": 0.1864593505859375, "step": 13825 }, { "epoch": 0.11958392058866762, "grad_norm": 21.474215885844057, "learning_rate": 5.947228526563194e-06, "loss": 0.09166259765625, "step": 13830 }, { "epoch": 0.1196271541102109, "grad_norm": 1.0983818381959507, "learning_rate": 5.947190474782444e-06, "loss": 0.568304443359375, "step": 13835 }, { "epoch": 0.11967038763175415, "grad_norm": 7.830113519702243, "learning_rate": 5.9471524094095125e-06, "loss": 0.354473876953125, "step": 13840 }, { "epoch": 0.11971362115329742, "grad_norm": 18.262485602276143, "learning_rate": 5.947114330444575e-06, "loss": 0.482257080078125, "step": 13845 }, { "epoch": 0.11975685467484068, "grad_norm": 15.788433577705089, "learning_rate": 5.947076237887806e-06, "loss": 0.1284271240234375, "step": 13850 }, { "epoch": 0.11980008819638395, "grad_norm": 10.959715845787535, "learning_rate": 5.947038131739383e-06, "loss": 0.1308868408203125, "step": 13855 }, { "epoch": 0.1198433217179272, "grad_norm": 1.487958195989044, "learning_rate": 5.9470000119994795e-06, "loss": 0.21090087890625, "step": 13860 }, { "epoch": 0.11988655523947048, "grad_norm": 4.913285852083083, "learning_rate": 5.946961878668272e-06, "loss": 0.1037994384765625, "step": 13865 }, { "epoch": 0.11992978876101373, "grad_norm": 1.7212382775449908, "learning_rate": 5.9469237317459374e-06, "loss": 0.1819610595703125, "step": 13870 }, { "epoch": 0.119973022282557, "grad_norm": 21.146189964715788, "learning_rate": 5.946885571232651e-06, "loss": 0.457391357421875, "step": 13875 }, { "epoch": 0.12001625580410026, "grad_norm": 1.8054010359644168, "learning_rate": 5.946847397128589e-06, "loss": 0.21758880615234374, "step": 13880 }, { "epoch": 0.12005948932564353, "grad_norm": 30.843777507861628, "learning_rate": 5.946809209433927e-06, "loss": 0.23138885498046874, "step": 13885 }, { "epoch": 0.12010272284718679, "grad_norm": 4.5336327832419565, "learning_rate": 5.946771008148841e-06, "loss": 0.06295166015625, "step": 13890 }, { "epoch": 0.12014595636873006, "grad_norm": 4.519232075079036, "learning_rate": 5.946732793273508e-06, "loss": 0.139404296875, "step": 13895 }, { "epoch": 0.12018918989027333, "grad_norm": 36.719509959112486, "learning_rate": 5.946694564808102e-06, "loss": 0.288482666015625, "step": 13900 }, { "epoch": 0.12023242341181659, "grad_norm": 4.257066357736013, "learning_rate": 5.946656322752803e-06, "loss": 0.6545852661132813, "step": 13905 }, { "epoch": 0.12027565693335986, "grad_norm": 8.01332357532078, "learning_rate": 5.9466180671077845e-06, "loss": 0.2817626953125, "step": 13910 }, { "epoch": 0.12031889045490311, "grad_norm": 11.724294791626079, "learning_rate": 5.946579797873224e-06, "loss": 0.375640869140625, "step": 13915 }, { "epoch": 0.12036212397644638, "grad_norm": 1.7013535879422232, "learning_rate": 5.946541515049299e-06, "loss": 0.28953399658203127, "step": 13920 }, { "epoch": 0.12040535749798964, "grad_norm": 28.395337631529653, "learning_rate": 5.946503218636183e-06, "loss": 0.31351318359375, "step": 13925 }, { "epoch": 0.12044859101953291, "grad_norm": 22.84625654646657, "learning_rate": 5.946464908634055e-06, "loss": 0.596551513671875, "step": 13930 }, { "epoch": 0.12049182454107617, "grad_norm": 6.409056679569571, "learning_rate": 5.946426585043092e-06, "loss": 0.2879669189453125, "step": 13935 }, { "epoch": 0.12053505806261944, "grad_norm": 41.44914911530297, "learning_rate": 5.946388247863469e-06, "loss": 0.424017333984375, "step": 13940 }, { "epoch": 0.1205782915841627, "grad_norm": 5.977753429087851, "learning_rate": 5.946349897095365e-06, "loss": 0.2609130859375, "step": 13945 }, { "epoch": 0.12062152510570597, "grad_norm": 14.85951176126837, "learning_rate": 5.946311532738955e-06, "loss": 0.05631103515625, "step": 13950 }, { "epoch": 0.12066475862724922, "grad_norm": 18.243505495725394, "learning_rate": 5.946273154794416e-06, "loss": 0.103759765625, "step": 13955 }, { "epoch": 0.12070799214879249, "grad_norm": 4.40255115581594, "learning_rate": 5.946234763261926e-06, "loss": 0.18023300170898438, "step": 13960 }, { "epoch": 0.12075122567033575, "grad_norm": 2.56668628227149, "learning_rate": 5.946196358141661e-06, "loss": 0.23841552734375, "step": 13965 }, { "epoch": 0.12079445919187902, "grad_norm": 5.228896143101051, "learning_rate": 5.946157939433799e-06, "loss": 0.11327590942382812, "step": 13970 }, { "epoch": 0.12083769271342228, "grad_norm": 6.492272368594153, "learning_rate": 5.946119507138518e-06, "loss": 0.24498481750488282, "step": 13975 }, { "epoch": 0.12088092623496555, "grad_norm": 0.5858632907264495, "learning_rate": 5.946081061255993e-06, "loss": 0.107135009765625, "step": 13980 }, { "epoch": 0.1209241597565088, "grad_norm": 42.99721019381831, "learning_rate": 5.9460426017864025e-06, "loss": 0.13059816360473633, "step": 13985 }, { "epoch": 0.12096739327805207, "grad_norm": 9.601177400021204, "learning_rate": 5.946004128729924e-06, "loss": 0.0852294921875, "step": 13990 }, { "epoch": 0.12101062679959533, "grad_norm": 22.78620998707532, "learning_rate": 5.945965642086735e-06, "loss": 0.130377197265625, "step": 13995 }, { "epoch": 0.1210538603211386, "grad_norm": 1.7646694313429403, "learning_rate": 5.945927141857013e-06, "loss": 0.12686614990234374, "step": 14000 }, { "epoch": 0.12109709384268186, "grad_norm": 17.832134883259766, "learning_rate": 5.945888628040935e-06, "loss": 0.270196533203125, "step": 14005 }, { "epoch": 0.12114032736422513, "grad_norm": 4.393115090201885, "learning_rate": 5.945850100638679e-06, "loss": 0.1709197998046875, "step": 14010 }, { "epoch": 0.12118356088576838, "grad_norm": 45.93782094888231, "learning_rate": 5.945811559650423e-06, "loss": 0.36376495361328126, "step": 14015 }, { "epoch": 0.12122679440731166, "grad_norm": 89.04475977200924, "learning_rate": 5.945773005076344e-06, "loss": 0.326300048828125, "step": 14020 }, { "epoch": 0.12127002792885491, "grad_norm": 4.914577283271504, "learning_rate": 5.94573443691662e-06, "loss": 0.1940399169921875, "step": 14025 }, { "epoch": 0.12131326145039818, "grad_norm": 2.287573880805592, "learning_rate": 5.945695855171429e-06, "loss": 0.0759185791015625, "step": 14030 }, { "epoch": 0.12135649497194144, "grad_norm": 16.39400681281723, "learning_rate": 5.945657259840949e-06, "loss": 0.17054672241210939, "step": 14035 }, { "epoch": 0.12139972849348471, "grad_norm": 4.868495579834552, "learning_rate": 5.945618650925358e-06, "loss": 0.28471832275390624, "step": 14040 }, { "epoch": 0.12144296201502797, "grad_norm": 6.5747306204959575, "learning_rate": 5.945580028424836e-06, "loss": 0.11967620849609376, "step": 14045 }, { "epoch": 0.12148619553657124, "grad_norm": 22.89474121708083, "learning_rate": 5.945541392339556e-06, "loss": 0.17161407470703124, "step": 14050 }, { "epoch": 0.1215294290581145, "grad_norm": 0.7466861599188523, "learning_rate": 5.945502742669701e-06, "loss": 0.09027481079101562, "step": 14055 }, { "epoch": 0.12157266257965776, "grad_norm": 13.696477961930016, "learning_rate": 5.945464079415448e-06, "loss": 0.10335311889648438, "step": 14060 }, { "epoch": 0.12161589610120102, "grad_norm": 8.52395565052339, "learning_rate": 5.945425402576974e-06, "loss": 0.4436500549316406, "step": 14065 }, { "epoch": 0.12165912962274429, "grad_norm": 7.6662038555214655, "learning_rate": 5.945386712154458e-06, "loss": 0.138739013671875, "step": 14070 }, { "epoch": 0.12170236314428755, "grad_norm": 6.764118045359735, "learning_rate": 5.94534800814808e-06, "loss": 0.0570587158203125, "step": 14075 }, { "epoch": 0.12174559666583082, "grad_norm": 1.7658117242486726, "learning_rate": 5.945309290558016e-06, "loss": 0.206060791015625, "step": 14080 }, { "epoch": 0.12178883018737409, "grad_norm": 5.722249830802554, "learning_rate": 5.945270559384446e-06, "loss": 0.1626708984375, "step": 14085 }, { "epoch": 0.12183206370891735, "grad_norm": 3.384728874852423, "learning_rate": 5.9452318146275505e-06, "loss": 0.124310302734375, "step": 14090 }, { "epoch": 0.12187529723046062, "grad_norm": 15.87947984644811, "learning_rate": 5.945193056287504e-06, "loss": 0.08677978515625, "step": 14095 }, { "epoch": 0.12191853075200387, "grad_norm": 13.31999612953753, "learning_rate": 5.945154284364488e-06, "loss": 0.045554733276367186, "step": 14100 }, { "epoch": 0.12196176427354714, "grad_norm": 4.350399448601812, "learning_rate": 5.945115498858681e-06, "loss": 0.04995651245117187, "step": 14105 }, { "epoch": 0.1220049977950904, "grad_norm": 24.152198977417466, "learning_rate": 5.945076699770262e-06, "loss": 0.1757843017578125, "step": 14110 }, { "epoch": 0.12204823131663367, "grad_norm": 30.987170670906792, "learning_rate": 5.9450378870994075e-06, "loss": 0.21940345764160157, "step": 14115 }, { "epoch": 0.12209146483817693, "grad_norm": 39.98918854874564, "learning_rate": 5.9449990608463e-06, "loss": 0.4396339416503906, "step": 14120 }, { "epoch": 0.1221346983597202, "grad_norm": 34.61751927061697, "learning_rate": 5.944960221011117e-06, "loss": 0.3210426330566406, "step": 14125 }, { "epoch": 0.12217793188126345, "grad_norm": 4.511703125348702, "learning_rate": 5.944921367594037e-06, "loss": 0.0866973876953125, "step": 14130 }, { "epoch": 0.12222116540280673, "grad_norm": 6.924623536803289, "learning_rate": 5.944882500595241e-06, "loss": 0.17210006713867188, "step": 14135 }, { "epoch": 0.12226439892434998, "grad_norm": 5.282313984231836, "learning_rate": 5.944843620014905e-06, "loss": 0.2490570068359375, "step": 14140 }, { "epoch": 0.12230763244589325, "grad_norm": 26.293164300472416, "learning_rate": 5.944804725853212e-06, "loss": 0.3813201904296875, "step": 14145 }, { "epoch": 0.12235086596743651, "grad_norm": 21.448928177702427, "learning_rate": 5.944765818110339e-06, "loss": 0.3851470947265625, "step": 14150 }, { "epoch": 0.12239409948897978, "grad_norm": 2.8354048037804125, "learning_rate": 5.944726896786468e-06, "loss": 0.3708740234375, "step": 14155 }, { "epoch": 0.12243733301052304, "grad_norm": 0.46894644983186423, "learning_rate": 5.9446879618817756e-06, "loss": 0.09022445678710937, "step": 14160 }, { "epoch": 0.1224805665320663, "grad_norm": 16.65241334007879, "learning_rate": 5.9446490133964415e-06, "loss": 0.30999755859375, "step": 14165 }, { "epoch": 0.12252380005360956, "grad_norm": 21.65848181499693, "learning_rate": 5.944610051330648e-06, "loss": 0.20634994506835938, "step": 14170 }, { "epoch": 0.12256703357515283, "grad_norm": 13.452759703986398, "learning_rate": 5.944571075684572e-06, "loss": 0.09903564453125, "step": 14175 }, { "epoch": 0.12261026709669609, "grad_norm": 2.966725504059389, "learning_rate": 5.944532086458395e-06, "loss": 0.08476715087890625, "step": 14180 }, { "epoch": 0.12265350061823936, "grad_norm": 2.1712966022629874, "learning_rate": 5.944493083652297e-06, "loss": 0.8437990188598633, "step": 14185 }, { "epoch": 0.12269673413978262, "grad_norm": 10.643388335488833, "learning_rate": 5.944454067266455e-06, "loss": 0.10001068115234375, "step": 14190 }, { "epoch": 0.12273996766132589, "grad_norm": 3.5321097622729822, "learning_rate": 5.944415037301053e-06, "loss": 0.540966796875, "step": 14195 }, { "epoch": 0.12278320118286914, "grad_norm": 1.688962710808847, "learning_rate": 5.944375993756268e-06, "loss": 0.2068866729736328, "step": 14200 }, { "epoch": 0.12282643470441242, "grad_norm": 11.485655958511698, "learning_rate": 5.9443369366322814e-06, "loss": 0.28828125, "step": 14205 }, { "epoch": 0.12286966822595567, "grad_norm": 11.74497222482383, "learning_rate": 5.944297865929272e-06, "loss": 0.163525390625, "step": 14210 }, { "epoch": 0.12291290174749894, "grad_norm": 4.3218977811639885, "learning_rate": 5.944258781647422e-06, "loss": 0.221624755859375, "step": 14215 }, { "epoch": 0.1229561352690422, "grad_norm": 1.3011222900605488, "learning_rate": 5.944219683786911e-06, "loss": 0.015833282470703126, "step": 14220 }, { "epoch": 0.12299936879058547, "grad_norm": 7.775517633502911, "learning_rate": 5.944180572347919e-06, "loss": 0.13116188049316407, "step": 14225 }, { "epoch": 0.12304260231212873, "grad_norm": 5.000744884249007, "learning_rate": 5.944141447330625e-06, "loss": 0.1968994140625, "step": 14230 }, { "epoch": 0.123085835833672, "grad_norm": 12.829238680305117, "learning_rate": 5.944102308735212e-06, "loss": 0.1867431640625, "step": 14235 }, { "epoch": 0.12312906935521525, "grad_norm": 8.210735955261901, "learning_rate": 5.944063156561858e-06, "loss": 0.089190673828125, "step": 14240 }, { "epoch": 0.12317230287675852, "grad_norm": 29.772436599701507, "learning_rate": 5.944023990810747e-06, "loss": 0.18637542724609374, "step": 14245 }, { "epoch": 0.12321553639830178, "grad_norm": 11.696387555896818, "learning_rate": 5.943984811482056e-06, "loss": 0.31256103515625, "step": 14250 }, { "epoch": 0.12325876991984505, "grad_norm": 0.1306761216576402, "learning_rate": 5.943945618575968e-06, "loss": 0.11428794860839844, "step": 14255 }, { "epoch": 0.12330200344138831, "grad_norm": 29.689230602454945, "learning_rate": 5.943906412092663e-06, "loss": 0.4296539306640625, "step": 14260 }, { "epoch": 0.12334523696293158, "grad_norm": 8.25858867908835, "learning_rate": 5.943867192032321e-06, "loss": 0.20177230834960938, "step": 14265 }, { "epoch": 0.12338847048447485, "grad_norm": 4.3195821617981975, "learning_rate": 5.943827958395123e-06, "loss": 0.07075958251953125, "step": 14270 }, { "epoch": 0.1234317040060181, "grad_norm": 10.419381002720124, "learning_rate": 5.9437887111812515e-06, "loss": 0.0815521240234375, "step": 14275 }, { "epoch": 0.12347493752756138, "grad_norm": 3.1034310786983945, "learning_rate": 5.943749450390886e-06, "loss": 0.2497100830078125, "step": 14280 }, { "epoch": 0.12351817104910463, "grad_norm": 6.043356443412548, "learning_rate": 5.943710176024209e-06, "loss": 0.431103515625, "step": 14285 }, { "epoch": 0.1235614045706479, "grad_norm": 14.846422438879197, "learning_rate": 5.9436708880814005e-06, "loss": 0.2263916015625, "step": 14290 }, { "epoch": 0.12360463809219116, "grad_norm": 29.515311922202326, "learning_rate": 5.943631586562642e-06, "loss": 0.2375457763671875, "step": 14295 }, { "epoch": 0.12364787161373443, "grad_norm": 3.643733898983005, "learning_rate": 5.943592271468114e-06, "loss": 0.2208740234375, "step": 14300 }, { "epoch": 0.12369110513527769, "grad_norm": 1.271275644976522, "learning_rate": 5.943552942797999e-06, "loss": 0.1202484130859375, "step": 14305 }, { "epoch": 0.12373433865682096, "grad_norm": 20.149386602835786, "learning_rate": 5.943513600552479e-06, "loss": 0.1759765625, "step": 14310 }, { "epoch": 0.12377757217836421, "grad_norm": 15.427288877764397, "learning_rate": 5.943474244731733e-06, "loss": 0.247943115234375, "step": 14315 }, { "epoch": 0.12382080569990749, "grad_norm": 13.236735250155636, "learning_rate": 5.943434875335944e-06, "loss": 0.2180450439453125, "step": 14320 }, { "epoch": 0.12386403922145074, "grad_norm": 34.064460563329185, "learning_rate": 5.943395492365294e-06, "loss": 0.16140613555908204, "step": 14325 }, { "epoch": 0.12390727274299401, "grad_norm": 17.520125422233537, "learning_rate": 5.9433560958199645e-06, "loss": 0.38096160888671876, "step": 14330 }, { "epoch": 0.12395050626453727, "grad_norm": 21.11130028412074, "learning_rate": 5.943316685700136e-06, "loss": 0.126458740234375, "step": 14335 }, { "epoch": 0.12399373978608054, "grad_norm": 2.753541619714424, "learning_rate": 5.943277262005991e-06, "loss": 0.0809967041015625, "step": 14340 }, { "epoch": 0.1240369733076238, "grad_norm": 1.816143615032317, "learning_rate": 5.943237824737711e-06, "loss": 0.1245391845703125, "step": 14345 }, { "epoch": 0.12408020682916707, "grad_norm": 1.0046148561397505, "learning_rate": 5.943198373895479e-06, "loss": 0.40610198974609374, "step": 14350 }, { "epoch": 0.12412344035071032, "grad_norm": 6.2965237759622, "learning_rate": 5.943158909479477e-06, "loss": 0.13099212646484376, "step": 14355 }, { "epoch": 0.1241666738722536, "grad_norm": 15.39707474483752, "learning_rate": 5.943119431489885e-06, "loss": 0.10918426513671875, "step": 14360 }, { "epoch": 0.12420990739379685, "grad_norm": 3.3392740409377577, "learning_rate": 5.943079939926885e-06, "loss": 0.18876953125, "step": 14365 }, { "epoch": 0.12425314091534012, "grad_norm": 33.95323001891189, "learning_rate": 5.9430404347906625e-06, "loss": 0.46908111572265626, "step": 14370 }, { "epoch": 0.12429637443688338, "grad_norm": 5.856507579211151, "learning_rate": 5.943000916081396e-06, "loss": 0.028191375732421874, "step": 14375 }, { "epoch": 0.12433960795842665, "grad_norm": 8.254955364787575, "learning_rate": 5.94296138379927e-06, "loss": 0.241326904296875, "step": 14380 }, { "epoch": 0.1243828414799699, "grad_norm": 20.81218076786984, "learning_rate": 5.942921837944467e-06, "loss": 0.1828582763671875, "step": 14385 }, { "epoch": 0.12442607500151318, "grad_norm": 0.5562274647378451, "learning_rate": 5.942882278517168e-06, "loss": 0.2639739990234375, "step": 14390 }, { "epoch": 0.12446930852305643, "grad_norm": 13.714585396659524, "learning_rate": 5.942842705517554e-06, "loss": 0.180950927734375, "step": 14395 }, { "epoch": 0.1245125420445997, "grad_norm": 0.7163156808339507, "learning_rate": 5.942803118945812e-06, "loss": 0.22217483520507814, "step": 14400 }, { "epoch": 0.12455577556614296, "grad_norm": 4.438726941821817, "learning_rate": 5.942763518802121e-06, "loss": 0.329962158203125, "step": 14405 }, { "epoch": 0.12459900908768623, "grad_norm": 0.8918994147715151, "learning_rate": 5.942723905086665e-06, "loss": 0.15106964111328125, "step": 14410 }, { "epoch": 0.12464224260922949, "grad_norm": 4.232742232877931, "learning_rate": 5.942684277799626e-06, "loss": 0.15064697265625, "step": 14415 }, { "epoch": 0.12468547613077276, "grad_norm": 1.9832336636554564, "learning_rate": 5.942644636941187e-06, "loss": 0.2016357421875, "step": 14420 }, { "epoch": 0.12472870965231601, "grad_norm": 1.2864918307021984, "learning_rate": 5.942604982511531e-06, "loss": 0.1395294189453125, "step": 14425 }, { "epoch": 0.12477194317385928, "grad_norm": 41.313693468762594, "learning_rate": 5.942565314510842e-06, "loss": 0.3254058837890625, "step": 14430 }, { "epoch": 0.12481517669540254, "grad_norm": 0.6343026231982956, "learning_rate": 5.942525632939301e-06, "loss": 0.18028106689453124, "step": 14435 }, { "epoch": 0.12485841021694581, "grad_norm": 14.56879526104336, "learning_rate": 5.9424859377970914e-06, "loss": 0.128277587890625, "step": 14440 }, { "epoch": 0.12490164373848907, "grad_norm": 5.0355956683382805, "learning_rate": 5.9424462290843975e-06, "loss": 0.29796142578125, "step": 14445 }, { "epoch": 0.12494487726003234, "grad_norm": 30.046545241632494, "learning_rate": 5.9424065068014005e-06, "loss": 0.696868896484375, "step": 14450 }, { "epoch": 0.12498811078157561, "grad_norm": 1.9420318368382494, "learning_rate": 5.942366770948286e-06, "loss": 0.103851318359375, "step": 14455 }, { "epoch": 0.12503134430311888, "grad_norm": 3.8193174476238676, "learning_rate": 5.9423270215252345e-06, "loss": 0.186785888671875, "step": 14460 }, { "epoch": 0.12507457782466214, "grad_norm": 6.332401855624656, "learning_rate": 5.942287258532431e-06, "loss": 0.23977279663085938, "step": 14465 }, { "epoch": 0.1251178113462054, "grad_norm": 39.70347408771592, "learning_rate": 5.94224748197006e-06, "loss": 0.758319091796875, "step": 14470 }, { "epoch": 0.12516104486774865, "grad_norm": 10.954179735808623, "learning_rate": 5.942207691838302e-06, "loss": 0.29161376953125, "step": 14475 }, { "epoch": 0.12520427838929193, "grad_norm": 33.14283396326635, "learning_rate": 5.942167888137343e-06, "loss": 0.550439453125, "step": 14480 }, { "epoch": 0.1252475119108352, "grad_norm": 0.6044096577402993, "learning_rate": 5.942128070867366e-06, "loss": 0.15700302124023438, "step": 14485 }, { "epoch": 0.12529074543237845, "grad_norm": 29.608661764293274, "learning_rate": 5.942088240028553e-06, "loss": 0.2116485595703125, "step": 14490 }, { "epoch": 0.1253339789539217, "grad_norm": 23.65958613339211, "learning_rate": 5.942048395621091e-06, "loss": 0.5627792358398438, "step": 14495 }, { "epoch": 0.125377212475465, "grad_norm": 42.675499346574924, "learning_rate": 5.94200853764516e-06, "loss": 0.5800552368164062, "step": 14500 }, { "epoch": 0.12542044599700825, "grad_norm": 4.220429630135452, "learning_rate": 5.941968666100946e-06, "loss": 0.1303497314453125, "step": 14505 }, { "epoch": 0.1254636795185515, "grad_norm": 25.702971804157524, "learning_rate": 5.941928780988632e-06, "loss": 0.11903419494628906, "step": 14510 }, { "epoch": 0.12550691304009476, "grad_norm": 19.677237111272817, "learning_rate": 5.941888882308403e-06, "loss": 0.168817138671875, "step": 14515 }, { "epoch": 0.12555014656163804, "grad_norm": 2.0723227847141197, "learning_rate": 5.9418489700604426e-06, "loss": 0.36098175048828124, "step": 14520 }, { "epoch": 0.1255933800831813, "grad_norm": 21.092150201049055, "learning_rate": 5.941809044244935e-06, "loss": 0.27558555603027346, "step": 14525 }, { "epoch": 0.12563661360472456, "grad_norm": 27.151648654120113, "learning_rate": 5.941769104862063e-06, "loss": 0.34094085693359377, "step": 14530 }, { "epoch": 0.1256798471262678, "grad_norm": 3.2477024321102563, "learning_rate": 5.941729151912013e-06, "loss": 0.464703369140625, "step": 14535 }, { "epoch": 0.1257230806478111, "grad_norm": 0.85159506073136, "learning_rate": 5.9416891853949675e-06, "loss": 0.17237548828125, "step": 14540 }, { "epoch": 0.12576631416935435, "grad_norm": 49.1201630109314, "learning_rate": 5.941649205311111e-06, "loss": 0.337689208984375, "step": 14545 }, { "epoch": 0.1258095476908976, "grad_norm": 0.8260037454996431, "learning_rate": 5.941609211660629e-06, "loss": 0.14111061096191407, "step": 14550 }, { "epoch": 0.12585278121244087, "grad_norm": 61.01327134708362, "learning_rate": 5.941569204443704e-06, "loss": 0.28720703125, "step": 14555 }, { "epoch": 0.12589601473398415, "grad_norm": 70.60954339261139, "learning_rate": 5.941529183660523e-06, "loss": 0.2680530548095703, "step": 14560 }, { "epoch": 0.1259392482555274, "grad_norm": 9.28644581574521, "learning_rate": 5.9414891493112695e-06, "loss": 0.085516357421875, "step": 14565 }, { "epoch": 0.12598248177707067, "grad_norm": 0.9693976839387545, "learning_rate": 5.941449101396127e-06, "loss": 0.15120925903320312, "step": 14570 }, { "epoch": 0.12602571529861392, "grad_norm": 12.47008188279637, "learning_rate": 5.941409039915282e-06, "loss": 0.1227783203125, "step": 14575 }, { "epoch": 0.1260689488201572, "grad_norm": 1.9246570749611254, "learning_rate": 5.941368964868918e-06, "loss": 0.55291748046875, "step": 14580 }, { "epoch": 0.12611218234170046, "grad_norm": 7.621468248456648, "learning_rate": 5.94132887625722e-06, "loss": 0.059600830078125, "step": 14585 }, { "epoch": 0.12615541586324372, "grad_norm": 15.913100924460794, "learning_rate": 5.941288774080374e-06, "loss": 0.24041748046875, "step": 14590 }, { "epoch": 0.12619864938478698, "grad_norm": 54.85298183213921, "learning_rate": 5.941248658338563e-06, "loss": 0.41272697448730467, "step": 14595 }, { "epoch": 0.12624188290633026, "grad_norm": 23.52875625196378, "learning_rate": 5.941208529031974e-06, "loss": 0.150457763671875, "step": 14600 }, { "epoch": 0.12628511642787352, "grad_norm": 0.5572826351883255, "learning_rate": 5.94116838616079e-06, "loss": 0.09504985809326172, "step": 14605 }, { "epoch": 0.12632834994941677, "grad_norm": 54.80219600482582, "learning_rate": 5.941128229725198e-06, "loss": 0.547332763671875, "step": 14610 }, { "epoch": 0.12637158347096003, "grad_norm": 23.371456856340984, "learning_rate": 5.941088059725383e-06, "loss": 0.249908447265625, "step": 14615 }, { "epoch": 0.12641481699250331, "grad_norm": 0.44674391248650325, "learning_rate": 5.941047876161528e-06, "loss": 0.13267440795898439, "step": 14620 }, { "epoch": 0.12645805051404657, "grad_norm": 13.666461546305326, "learning_rate": 5.941007679033822e-06, "loss": 0.31432571411132815, "step": 14625 }, { "epoch": 0.12650128403558983, "grad_norm": 1.3950414668225721, "learning_rate": 5.940967468342448e-06, "loss": 0.18843994140625, "step": 14630 }, { "epoch": 0.12654451755713308, "grad_norm": 25.20466570919541, "learning_rate": 5.940927244087591e-06, "loss": 0.17603759765625, "step": 14635 }, { "epoch": 0.12658775107867637, "grad_norm": 9.131994629498793, "learning_rate": 5.9408870062694385e-06, "loss": 0.15153045654296876, "step": 14640 }, { "epoch": 0.12663098460021963, "grad_norm": 0.2500165556578089, "learning_rate": 5.940846754888174e-06, "loss": 0.18265838623046876, "step": 14645 }, { "epoch": 0.12667421812176288, "grad_norm": 14.21455492430344, "learning_rate": 5.940806489943985e-06, "loss": 0.2299163818359375, "step": 14650 }, { "epoch": 0.12671745164330617, "grad_norm": 2.33519610079509, "learning_rate": 5.940766211437057e-06, "loss": 0.037164306640625, "step": 14655 }, { "epoch": 0.12676068516484942, "grad_norm": 19.145816112334245, "learning_rate": 5.940725919367573e-06, "loss": 0.15041961669921874, "step": 14660 }, { "epoch": 0.12680391868639268, "grad_norm": 22.74153153072243, "learning_rate": 5.940685613735722e-06, "loss": 0.246539306640625, "step": 14665 }, { "epoch": 0.12684715220793594, "grad_norm": 1.6158968136185476, "learning_rate": 5.940645294541689e-06, "loss": 0.20788230895996093, "step": 14670 }, { "epoch": 0.12689038572947922, "grad_norm": 21.036226599077594, "learning_rate": 5.9406049617856595e-06, "loss": 0.17583770751953126, "step": 14675 }, { "epoch": 0.12693361925102248, "grad_norm": 0.4134413919843054, "learning_rate": 5.94056461546782e-06, "loss": 0.2562347412109375, "step": 14680 }, { "epoch": 0.12697685277256573, "grad_norm": 46.84681599887378, "learning_rate": 5.9405242555883556e-06, "loss": 0.2282867431640625, "step": 14685 }, { "epoch": 0.127020086294109, "grad_norm": 11.431561804037612, "learning_rate": 5.940483882147453e-06, "loss": 0.3000679016113281, "step": 14690 }, { "epoch": 0.12706331981565228, "grad_norm": 17.496922559829628, "learning_rate": 5.9404434951453e-06, "loss": 0.2761512756347656, "step": 14695 }, { "epoch": 0.12710655333719553, "grad_norm": 18.611998393179697, "learning_rate": 5.9404030945820805e-06, "loss": 0.10152587890625, "step": 14700 }, { "epoch": 0.1271497868587388, "grad_norm": 8.50667576646865, "learning_rate": 5.940362680457982e-06, "loss": 0.24488372802734376, "step": 14705 }, { "epoch": 0.12719302038028205, "grad_norm": 2.9203808937950875, "learning_rate": 5.94032225277319e-06, "loss": 0.0431549072265625, "step": 14710 }, { "epoch": 0.12723625390182533, "grad_norm": 13.129073104120945, "learning_rate": 5.940281811527892e-06, "loss": 0.1070098876953125, "step": 14715 }, { "epoch": 0.1272794874233686, "grad_norm": 35.38266885628262, "learning_rate": 5.940241356722275e-06, "loss": 0.4693634033203125, "step": 14720 }, { "epoch": 0.12732272094491184, "grad_norm": 4.0023114185021775, "learning_rate": 5.940200888356524e-06, "loss": 0.08883628845214844, "step": 14725 }, { "epoch": 0.1273659544664551, "grad_norm": 16.2990481972515, "learning_rate": 5.940160406430826e-06, "loss": 0.2516265869140625, "step": 14730 }, { "epoch": 0.12740918798799838, "grad_norm": 0.40383109664983224, "learning_rate": 5.940119910945368e-06, "loss": 0.09950103759765624, "step": 14735 }, { "epoch": 0.12745242150954164, "grad_norm": 0.362470804466614, "learning_rate": 5.940079401900337e-06, "loss": 0.34006805419921876, "step": 14740 }, { "epoch": 0.1274956550310849, "grad_norm": 6.001341302340899, "learning_rate": 5.940038879295919e-06, "loss": 0.101568603515625, "step": 14745 }, { "epoch": 0.12753888855262815, "grad_norm": 3.178945478759964, "learning_rate": 5.939998343132303e-06, "loss": 0.2732666015625, "step": 14750 }, { "epoch": 0.12758212207417144, "grad_norm": 8.898561011385189, "learning_rate": 5.939957793409673e-06, "loss": 0.27951202392578123, "step": 14755 }, { "epoch": 0.1276253555957147, "grad_norm": 7.00568814733897, "learning_rate": 5.939917230128218e-06, "loss": 0.1552490234375, "step": 14760 }, { "epoch": 0.12766858911725795, "grad_norm": 19.259246801643076, "learning_rate": 5.939876653288124e-06, "loss": 0.361688232421875, "step": 14765 }, { "epoch": 0.1277118226388012, "grad_norm": 6.889745686484791, "learning_rate": 5.939836062889579e-06, "loss": 0.3530998229980469, "step": 14770 }, { "epoch": 0.1277550561603445, "grad_norm": 33.953654028297066, "learning_rate": 5.93979545893277e-06, "loss": 0.169354248046875, "step": 14775 }, { "epoch": 0.12779828968188775, "grad_norm": 15.610378312038257, "learning_rate": 5.939754841417884e-06, "loss": 0.1358856201171875, "step": 14780 }, { "epoch": 0.127841523203431, "grad_norm": 2.972347658392523, "learning_rate": 5.939714210345108e-06, "loss": 0.1329986572265625, "step": 14785 }, { "epoch": 0.12788475672497426, "grad_norm": 29.854168985608492, "learning_rate": 5.9396735657146305e-06, "loss": 0.26641387939453126, "step": 14790 }, { "epoch": 0.12792799024651755, "grad_norm": 1.1372693927812765, "learning_rate": 5.9396329075266375e-06, "loss": 0.09199085235595703, "step": 14795 }, { "epoch": 0.1279712237680608, "grad_norm": 36.67232909022184, "learning_rate": 5.939592235781318e-06, "loss": 0.432611083984375, "step": 14800 }, { "epoch": 0.12801445728960406, "grad_norm": 4.1654959316363085, "learning_rate": 5.939551550478859e-06, "loss": 0.20751628875732422, "step": 14805 }, { "epoch": 0.12805769081114732, "grad_norm": 22.147139955363652, "learning_rate": 5.939510851619447e-06, "loss": 0.1642852783203125, "step": 14810 }, { "epoch": 0.1281009243326906, "grad_norm": 4.495696872550088, "learning_rate": 5.939470139203272e-06, "loss": 0.076910400390625, "step": 14815 }, { "epoch": 0.12814415785423386, "grad_norm": 19.02393276496489, "learning_rate": 5.939429413230519e-06, "loss": 0.129901123046875, "step": 14820 }, { "epoch": 0.12818739137577712, "grad_norm": 1.7416011162636469, "learning_rate": 5.939388673701378e-06, "loss": 0.1289337158203125, "step": 14825 }, { "epoch": 0.1282306248973204, "grad_norm": 9.822327243137677, "learning_rate": 5.939347920616036e-06, "loss": 0.30933837890625, "step": 14830 }, { "epoch": 0.12827385841886366, "grad_norm": 9.083017204327287, "learning_rate": 5.939307153974682e-06, "loss": 0.4836090087890625, "step": 14835 }, { "epoch": 0.1283170919404069, "grad_norm": 13.550695783388427, "learning_rate": 5.9392663737775025e-06, "loss": 0.19510459899902344, "step": 14840 }, { "epoch": 0.12836032546195017, "grad_norm": 2.2290238197863723, "learning_rate": 5.939225580024686e-06, "loss": 0.2306610107421875, "step": 14845 }, { "epoch": 0.12840355898349345, "grad_norm": 19.193761925392476, "learning_rate": 5.9391847727164205e-06, "loss": 0.19164886474609374, "step": 14850 }, { "epoch": 0.1284467925050367, "grad_norm": 32.10423203017397, "learning_rate": 5.939143951852896e-06, "loss": 0.3881561279296875, "step": 14855 }, { "epoch": 0.12849002602657997, "grad_norm": 0.49938760463588705, "learning_rate": 5.939103117434297e-06, "loss": 0.22271728515625, "step": 14860 }, { "epoch": 0.12853325954812322, "grad_norm": 3.660707652538978, "learning_rate": 5.939062269460816e-06, "loss": 0.1581329345703125, "step": 14865 }, { "epoch": 0.1285764930696665, "grad_norm": 11.743578083902154, "learning_rate": 5.939021407932639e-06, "loss": 0.07793655395507812, "step": 14870 }, { "epoch": 0.12861972659120977, "grad_norm": 11.40097337686374, "learning_rate": 5.938980532849955e-06, "loss": 0.16099853515625, "step": 14875 }, { "epoch": 0.12866296011275302, "grad_norm": 9.48329604469777, "learning_rate": 5.938939644212952e-06, "loss": 0.3118743896484375, "step": 14880 }, { "epoch": 0.12870619363429628, "grad_norm": 59.6846329899196, "learning_rate": 5.9388987420218195e-06, "loss": 0.1582183837890625, "step": 14885 }, { "epoch": 0.12874942715583956, "grad_norm": 4.268502374571657, "learning_rate": 5.938857826276746e-06, "loss": 0.21883468627929686, "step": 14890 }, { "epoch": 0.12879266067738282, "grad_norm": 0.508537353146423, "learning_rate": 5.93881689697792e-06, "loss": 0.1600860595703125, "step": 14895 }, { "epoch": 0.12883589419892608, "grad_norm": 0.9516034163715622, "learning_rate": 5.938775954125529e-06, "loss": 0.17303237915039063, "step": 14900 }, { "epoch": 0.12887912772046933, "grad_norm": 20.752389607645945, "learning_rate": 5.938734997719763e-06, "loss": 0.352178955078125, "step": 14905 }, { "epoch": 0.12892236124201262, "grad_norm": 18.49706956116733, "learning_rate": 5.938694027760813e-06, "loss": 0.051035308837890626, "step": 14910 }, { "epoch": 0.12896559476355587, "grad_norm": 44.59642222517665, "learning_rate": 5.938653044248863e-06, "loss": 0.5751373291015625, "step": 14915 }, { "epoch": 0.12900882828509913, "grad_norm": 36.750458724650855, "learning_rate": 5.938612047184106e-06, "loss": 0.2181884765625, "step": 14920 }, { "epoch": 0.1290520618066424, "grad_norm": 9.521953053562221, "learning_rate": 5.938571036566731e-06, "loss": 0.1797637939453125, "step": 14925 }, { "epoch": 0.12909529532818567, "grad_norm": 11.27920600735301, "learning_rate": 5.938530012396925e-06, "loss": 0.39505615234375, "step": 14930 }, { "epoch": 0.12913852884972893, "grad_norm": 23.824932062318226, "learning_rate": 5.938488974674878e-06, "loss": 0.29955368041992186, "step": 14935 }, { "epoch": 0.12918176237127219, "grad_norm": 18.458108548473618, "learning_rate": 5.93844792340078e-06, "loss": 0.28008270263671875, "step": 14940 }, { "epoch": 0.12922499589281544, "grad_norm": 35.48771089245462, "learning_rate": 5.938406858574819e-06, "loss": 0.3861419677734375, "step": 14945 }, { "epoch": 0.12926822941435873, "grad_norm": 4.740401615587166, "learning_rate": 5.9383657801971864e-06, "loss": 0.0633392333984375, "step": 14950 }, { "epoch": 0.12931146293590198, "grad_norm": 8.630893778022195, "learning_rate": 5.938324688268069e-06, "loss": 0.373040771484375, "step": 14955 }, { "epoch": 0.12935469645744524, "grad_norm": 16.57815140074755, "learning_rate": 5.93828358278766e-06, "loss": 0.15899658203125, "step": 14960 }, { "epoch": 0.1293979299789885, "grad_norm": 28.522924700947943, "learning_rate": 5.938242463756145e-06, "loss": 0.25382080078125, "step": 14965 }, { "epoch": 0.12944116350053178, "grad_norm": 41.634463477245895, "learning_rate": 5.9382013311737155e-06, "loss": 0.2377105712890625, "step": 14970 }, { "epoch": 0.12948439702207504, "grad_norm": 8.792056912212463, "learning_rate": 5.938160185040562e-06, "loss": 0.2664947509765625, "step": 14975 }, { "epoch": 0.1295276305436183, "grad_norm": 28.67456145363823, "learning_rate": 5.938119025356873e-06, "loss": 0.35765533447265624, "step": 14980 }, { "epoch": 0.12957086406516155, "grad_norm": 3.604994875535681, "learning_rate": 5.938077852122837e-06, "loss": 0.31644287109375, "step": 14985 }, { "epoch": 0.12961409758670484, "grad_norm": 8.131180700689058, "learning_rate": 5.938036665338647e-06, "loss": 0.206829833984375, "step": 14990 }, { "epoch": 0.1296573311082481, "grad_norm": 0.0716086918703926, "learning_rate": 5.9379954650044915e-06, "loss": 0.19271621704101563, "step": 14995 }, { "epoch": 0.12970056462979135, "grad_norm": 32.02451943115926, "learning_rate": 5.937954251120561e-06, "loss": 0.3239013671875, "step": 15000 }, { "epoch": 0.1297437981513346, "grad_norm": 5.49727172607535, "learning_rate": 5.937913023687044e-06, "loss": 0.2987030029296875, "step": 15005 }, { "epoch": 0.1297870316728779, "grad_norm": 11.672411192351593, "learning_rate": 5.937871782704133e-06, "loss": 0.15284576416015624, "step": 15010 }, { "epoch": 0.12983026519442115, "grad_norm": 68.78436567959058, "learning_rate": 5.937830528172016e-06, "loss": 0.681463623046875, "step": 15015 }, { "epoch": 0.1298734987159644, "grad_norm": 31.71393894281273, "learning_rate": 5.937789260090885e-06, "loss": 0.2838008880615234, "step": 15020 }, { "epoch": 0.1299167322375077, "grad_norm": 37.26310286166677, "learning_rate": 5.9377479784609275e-06, "loss": 0.17973823547363282, "step": 15025 }, { "epoch": 0.12995996575905094, "grad_norm": 2.516787648052432, "learning_rate": 5.937706683282338e-06, "loss": 0.19639434814453124, "step": 15030 }, { "epoch": 0.1300031992805942, "grad_norm": 6.442524102423849, "learning_rate": 5.9376653745553045e-06, "loss": 0.227093505859375, "step": 15035 }, { "epoch": 0.13004643280213746, "grad_norm": 19.509080968515708, "learning_rate": 5.937624052280017e-06, "loss": 0.1394439697265625, "step": 15040 }, { "epoch": 0.13008966632368074, "grad_norm": 0.19119882989369516, "learning_rate": 5.937582716456668e-06, "loss": 0.19120216369628906, "step": 15045 }, { "epoch": 0.130132899845224, "grad_norm": 12.323833459126641, "learning_rate": 5.937541367085446e-06, "loss": 0.20343017578125, "step": 15050 }, { "epoch": 0.13017613336676725, "grad_norm": 3.1496467324290336, "learning_rate": 5.937500004166544e-06, "loss": 0.20940837860107422, "step": 15055 }, { "epoch": 0.1302193668883105, "grad_norm": 19.864901630531136, "learning_rate": 5.937458627700151e-06, "loss": 0.142822265625, "step": 15060 }, { "epoch": 0.1302626004098538, "grad_norm": 8.175174069294785, "learning_rate": 5.937417237686458e-06, "loss": 0.30141448974609375, "step": 15065 }, { "epoch": 0.13030583393139705, "grad_norm": 33.862014654201815, "learning_rate": 5.937375834125657e-06, "loss": 0.26781234741210935, "step": 15070 }, { "epoch": 0.1303490674529403, "grad_norm": 9.588187547272032, "learning_rate": 5.937334417017938e-06, "loss": 0.2890869140625, "step": 15075 }, { "epoch": 0.13039230097448357, "grad_norm": 0.9296761257318884, "learning_rate": 5.937292986363491e-06, "loss": 0.07383499145507813, "step": 15080 }, { "epoch": 0.13043553449602685, "grad_norm": 43.053825362415225, "learning_rate": 5.93725154216251e-06, "loss": 0.25117645263671873, "step": 15085 }, { "epoch": 0.1304787680175701, "grad_norm": 12.737585021101207, "learning_rate": 5.937210084415184e-06, "loss": 0.15650634765625, "step": 15090 }, { "epoch": 0.13052200153911336, "grad_norm": 4.488774936163862, "learning_rate": 5.937168613121704e-06, "loss": 0.08349456787109374, "step": 15095 }, { "epoch": 0.13056523506065662, "grad_norm": 38.46137137521181, "learning_rate": 5.9371271282822615e-06, "loss": 0.386328125, "step": 15100 }, { "epoch": 0.1306084685821999, "grad_norm": 11.52856182455365, "learning_rate": 5.937085629897049e-06, "loss": 0.2709053039550781, "step": 15105 }, { "epoch": 0.13065170210374316, "grad_norm": 39.52710222869654, "learning_rate": 5.937044117966257e-06, "loss": 0.62762451171875, "step": 15110 }, { "epoch": 0.13069493562528642, "grad_norm": 31.8890257329816, "learning_rate": 5.937002592490077e-06, "loss": 0.361993408203125, "step": 15115 }, { "epoch": 0.13073816914682967, "grad_norm": 49.46446676179151, "learning_rate": 5.936961053468701e-06, "loss": 0.418359375, "step": 15120 }, { "epoch": 0.13078140266837296, "grad_norm": 10.320194992279205, "learning_rate": 5.9369195009023195e-06, "loss": 0.3207275390625, "step": 15125 }, { "epoch": 0.13082463618991622, "grad_norm": 5.736516292283234, "learning_rate": 5.936877934791125e-06, "loss": 0.10708465576171874, "step": 15130 }, { "epoch": 0.13086786971145947, "grad_norm": 2.3163668606180585, "learning_rate": 5.93683635513531e-06, "loss": 0.028679656982421874, "step": 15135 }, { "epoch": 0.13091110323300273, "grad_norm": 11.644058884416106, "learning_rate": 5.936794761935064e-06, "loss": 0.5105178833007813, "step": 15140 }, { "epoch": 0.130954336754546, "grad_norm": 5.232861144350562, "learning_rate": 5.93675315519058e-06, "loss": 0.32423038482666017, "step": 15145 }, { "epoch": 0.13099757027608927, "grad_norm": 40.702313079006174, "learning_rate": 5.93671153490205e-06, "loss": 0.2764610290527344, "step": 15150 }, { "epoch": 0.13104080379763253, "grad_norm": 5.514199051162708, "learning_rate": 5.936669901069666e-06, "loss": 0.0660888671875, "step": 15155 }, { "epoch": 0.13108403731917578, "grad_norm": 2.0548508973597404, "learning_rate": 5.936628253693621e-06, "loss": 0.09753265380859374, "step": 15160 }, { "epoch": 0.13112727084071907, "grad_norm": 30.54686866847536, "learning_rate": 5.936586592774104e-06, "loss": 0.231353759765625, "step": 15165 }, { "epoch": 0.13117050436226232, "grad_norm": 33.17199038153279, "learning_rate": 5.93654491831131e-06, "loss": 0.1747802734375, "step": 15170 }, { "epoch": 0.13121373788380558, "grad_norm": 4.041999750688126, "learning_rate": 5.936503230305431e-06, "loss": 0.13719482421875, "step": 15175 }, { "epoch": 0.13125697140534884, "grad_norm": 34.04024394949842, "learning_rate": 5.936461528756658e-06, "loss": 0.51494140625, "step": 15180 }, { "epoch": 0.13130020492689212, "grad_norm": 6.656457845149924, "learning_rate": 5.936419813665184e-06, "loss": 0.11875, "step": 15185 }, { "epoch": 0.13134343844843538, "grad_norm": 15.802273431243554, "learning_rate": 5.936378085031201e-06, "loss": 0.1290191650390625, "step": 15190 }, { "epoch": 0.13138667196997864, "grad_norm": 11.033824974211745, "learning_rate": 5.936336342854902e-06, "loss": 0.07615203857421875, "step": 15195 }, { "epoch": 0.13142990549152192, "grad_norm": 17.16677903698663, "learning_rate": 5.936294587136479e-06, "loss": 0.20645751953125, "step": 15200 }, { "epoch": 0.13147313901306518, "grad_norm": 5.082290531706724, "learning_rate": 5.936252817876126e-06, "loss": 0.0605255126953125, "step": 15205 }, { "epoch": 0.13151637253460843, "grad_norm": 4.126248310362294, "learning_rate": 5.936211035074033e-06, "loss": 0.4830024719238281, "step": 15210 }, { "epoch": 0.1315596060561517, "grad_norm": 0.10266548381880179, "learning_rate": 5.936169238730394e-06, "loss": 0.05181045532226562, "step": 15215 }, { "epoch": 0.13160283957769497, "grad_norm": 5.6391077710777635, "learning_rate": 5.936127428845403e-06, "loss": 0.18089141845703124, "step": 15220 }, { "epoch": 0.13164607309923823, "grad_norm": 1.3842933853393218, "learning_rate": 5.936085605419251e-06, "loss": 0.01595001220703125, "step": 15225 }, { "epoch": 0.1316893066207815, "grad_norm": 13.567914444894331, "learning_rate": 5.936043768452133e-06, "loss": 0.416168212890625, "step": 15230 }, { "epoch": 0.13173254014232474, "grad_norm": 13.161855574353337, "learning_rate": 5.936001917944239e-06, "loss": 0.13546791076660156, "step": 15235 }, { "epoch": 0.13177577366386803, "grad_norm": 45.4044100656237, "learning_rate": 5.935960053895764e-06, "loss": 0.2619823455810547, "step": 15240 }, { "epoch": 0.13181900718541129, "grad_norm": 6.549388322635006, "learning_rate": 5.9359181763069004e-06, "loss": 0.21012096405029296, "step": 15245 }, { "epoch": 0.13186224070695454, "grad_norm": 4.941944586547778, "learning_rate": 5.935876285177842e-06, "loss": 0.6717132568359375, "step": 15250 }, { "epoch": 0.1319054742284978, "grad_norm": 17.596950519869633, "learning_rate": 5.935834380508781e-06, "loss": 0.212548828125, "step": 15255 }, { "epoch": 0.13194870775004108, "grad_norm": 49.51613181488686, "learning_rate": 5.9357924622999115e-06, "loss": 0.3044281005859375, "step": 15260 }, { "epoch": 0.13199194127158434, "grad_norm": 14.992500041416713, "learning_rate": 5.935750530551427e-06, "loss": 0.19398193359375, "step": 15265 }, { "epoch": 0.1320351747931276, "grad_norm": 18.20473346931192, "learning_rate": 5.93570858526352e-06, "loss": 0.19782638549804688, "step": 15270 }, { "epoch": 0.13207840831467085, "grad_norm": 11.41723393670201, "learning_rate": 5.935666626436385e-06, "loss": 0.229693603515625, "step": 15275 }, { "epoch": 0.13212164183621414, "grad_norm": 5.186328444972898, "learning_rate": 5.935624654070215e-06, "loss": 0.14853057861328126, "step": 15280 }, { "epoch": 0.1321648753577574, "grad_norm": 11.389448436830547, "learning_rate": 5.9355826681652025e-06, "loss": 0.0634185791015625, "step": 15285 }, { "epoch": 0.13220810887930065, "grad_norm": 0.7341542036261167, "learning_rate": 5.935540668721543e-06, "loss": 0.21934967041015624, "step": 15290 }, { "epoch": 0.1322513424008439, "grad_norm": 45.76675934501357, "learning_rate": 5.935498655739429e-06, "loss": 0.16610946655273437, "step": 15295 }, { "epoch": 0.1322945759223872, "grad_norm": 19.542493295236316, "learning_rate": 5.935456629219055e-06, "loss": 0.3954559326171875, "step": 15300 }, { "epoch": 0.13233780944393045, "grad_norm": 34.23953341993522, "learning_rate": 5.935414589160614e-06, "loss": 0.258636474609375, "step": 15305 }, { "epoch": 0.1323810429654737, "grad_norm": 22.13720597262187, "learning_rate": 5.9353725355643e-06, "loss": 0.6404861450195313, "step": 15310 }, { "epoch": 0.13242427648701696, "grad_norm": 41.29272498536659, "learning_rate": 5.935330468430308e-06, "loss": 0.3713348388671875, "step": 15315 }, { "epoch": 0.13246751000856025, "grad_norm": 0.4326387059259602, "learning_rate": 5.935288387758831e-06, "loss": 0.2120513916015625, "step": 15320 }, { "epoch": 0.1325107435301035, "grad_norm": 21.17126988239507, "learning_rate": 5.935246293550063e-06, "loss": 0.399847412109375, "step": 15325 }, { "epoch": 0.13255397705164676, "grad_norm": 2.472163104457559, "learning_rate": 5.935204185804198e-06, "loss": 0.199761962890625, "step": 15330 }, { "epoch": 0.13259721057319002, "grad_norm": 9.324808478777523, "learning_rate": 5.935162064521432e-06, "loss": 0.11522216796875, "step": 15335 }, { "epoch": 0.1326404440947333, "grad_norm": 9.766546674173993, "learning_rate": 5.9351199297019564e-06, "loss": 0.18369903564453124, "step": 15340 }, { "epoch": 0.13268367761627656, "grad_norm": 3.2042361772737826, "learning_rate": 5.935077781345968e-06, "loss": 0.270953369140625, "step": 15345 }, { "epoch": 0.13272691113781981, "grad_norm": 15.975381317318574, "learning_rate": 5.93503561945366e-06, "loss": 0.40148162841796875, "step": 15350 }, { "epoch": 0.13277014465936307, "grad_norm": 34.819953575807034, "learning_rate": 5.934993444025227e-06, "loss": 0.15597381591796874, "step": 15355 }, { "epoch": 0.13281337818090636, "grad_norm": 2.399194794301932, "learning_rate": 5.934951255060863e-06, "loss": 0.10696563720703126, "step": 15360 }, { "epoch": 0.1328566117024496, "grad_norm": 36.77873462924236, "learning_rate": 5.934909052560765e-06, "loss": 0.2050323486328125, "step": 15365 }, { "epoch": 0.13289984522399287, "grad_norm": 26.36970105545054, "learning_rate": 5.934866836525124e-06, "loss": 0.15618972778320311, "step": 15370 }, { "epoch": 0.13294307874553613, "grad_norm": 47.58546921306091, "learning_rate": 5.934824606954138e-06, "loss": 0.20328826904296876, "step": 15375 }, { "epoch": 0.1329863122670794, "grad_norm": 7.150733954642342, "learning_rate": 5.9347823638479986e-06, "loss": 0.4600830078125, "step": 15380 }, { "epoch": 0.13302954578862267, "grad_norm": 4.754441475229637, "learning_rate": 5.934740107206903e-06, "loss": 0.3172271728515625, "step": 15385 }, { "epoch": 0.13307277931016592, "grad_norm": 16.519270309222104, "learning_rate": 5.934697837031045e-06, "loss": 0.1487396240234375, "step": 15390 }, { "epoch": 0.1331160128317092, "grad_norm": 38.87595107444872, "learning_rate": 5.934655553320621e-06, "loss": 0.3196857452392578, "step": 15395 }, { "epoch": 0.13315924635325246, "grad_norm": 19.129983630876012, "learning_rate": 5.9346132560758236e-06, "loss": 0.4632072448730469, "step": 15400 }, { "epoch": 0.13320247987479572, "grad_norm": 0.21834703219443757, "learning_rate": 5.934570945296849e-06, "loss": 0.15872535705566407, "step": 15405 }, { "epoch": 0.13324571339633898, "grad_norm": 1.564027772625229, "learning_rate": 5.934528620983894e-06, "loss": 0.146734619140625, "step": 15410 }, { "epoch": 0.13328894691788226, "grad_norm": 7.7215092244247705, "learning_rate": 5.934486283137151e-06, "loss": 0.2658195495605469, "step": 15415 }, { "epoch": 0.13333218043942552, "grad_norm": 5.844314609778957, "learning_rate": 5.934443931756817e-06, "loss": 0.18275222778320313, "step": 15420 }, { "epoch": 0.13337541396096877, "grad_norm": 7.564838444226029, "learning_rate": 5.934401566843088e-06, "loss": 0.307757568359375, "step": 15425 }, { "epoch": 0.13341864748251203, "grad_norm": 44.69388646348608, "learning_rate": 5.9343591883961565e-06, "loss": 0.3287841796875, "step": 15430 }, { "epoch": 0.13346188100405532, "grad_norm": 16.805163512169965, "learning_rate": 5.934316796416221e-06, "loss": 0.20107421875, "step": 15435 }, { "epoch": 0.13350511452559857, "grad_norm": 27.749568421514944, "learning_rate": 5.934274390903474e-06, "loss": 0.180999755859375, "step": 15440 }, { "epoch": 0.13354834804714183, "grad_norm": 13.53937239492805, "learning_rate": 5.934231971858115e-06, "loss": 0.19996185302734376, "step": 15445 }, { "epoch": 0.13359158156868509, "grad_norm": 44.78678352389129, "learning_rate": 5.934189539280336e-06, "loss": 1.1053466796875, "step": 15450 }, { "epoch": 0.13363481509022837, "grad_norm": 9.885230915184357, "learning_rate": 5.934147093170334e-06, "loss": 0.1235382080078125, "step": 15455 }, { "epoch": 0.13367804861177163, "grad_norm": 26.224916059361448, "learning_rate": 5.934104633528305e-06, "loss": 0.3006500244140625, "step": 15460 }, { "epoch": 0.13372128213331488, "grad_norm": 24.318265075833775, "learning_rate": 5.934062160354445e-06, "loss": 0.40423126220703126, "step": 15465 }, { "epoch": 0.13376451565485814, "grad_norm": 7.672528419658072, "learning_rate": 5.934019673648949e-06, "loss": 0.041790771484375, "step": 15470 }, { "epoch": 0.13380774917640142, "grad_norm": 10.220776182552768, "learning_rate": 5.933977173412014e-06, "loss": 0.20699005126953124, "step": 15475 }, { "epoch": 0.13385098269794468, "grad_norm": 24.099027280268693, "learning_rate": 5.933934659643836e-06, "loss": 0.258648681640625, "step": 15480 }, { "epoch": 0.13389421621948794, "grad_norm": 5.061578065270894, "learning_rate": 5.93389213234461e-06, "loss": 0.1931884765625, "step": 15485 }, { "epoch": 0.1339374497410312, "grad_norm": 6.450684984518139, "learning_rate": 5.933849591514533e-06, "loss": 0.18192291259765625, "step": 15490 }, { "epoch": 0.13398068326257448, "grad_norm": 0.9909678852845774, "learning_rate": 5.9338070371538015e-06, "loss": 0.4262786865234375, "step": 15495 }, { "epoch": 0.13402391678411774, "grad_norm": 6.694081170846877, "learning_rate": 5.9337644692626095e-06, "loss": 0.1013916015625, "step": 15500 }, { "epoch": 0.134067150305661, "grad_norm": 71.59444565799073, "learning_rate": 5.933721887841157e-06, "loss": 0.25119705200195314, "step": 15505 }, { "epoch": 0.13411038382720425, "grad_norm": 0.5332824619763, "learning_rate": 5.933679292889637e-06, "loss": 0.32389678955078127, "step": 15510 }, { "epoch": 0.13415361734874753, "grad_norm": 1.866593665906778, "learning_rate": 5.933636684408248e-06, "loss": 0.0759002685546875, "step": 15515 }, { "epoch": 0.1341968508702908, "grad_norm": 13.980281026055318, "learning_rate": 5.933594062397186e-06, "loss": 0.18091964721679688, "step": 15520 }, { "epoch": 0.13424008439183405, "grad_norm": 31.00615333780484, "learning_rate": 5.933551426856647e-06, "loss": 0.6096641540527343, "step": 15525 }, { "epoch": 0.1342833179133773, "grad_norm": 8.46209130705364, "learning_rate": 5.9335087777868275e-06, "loss": 0.1901580810546875, "step": 15530 }, { "epoch": 0.1343265514349206, "grad_norm": 1.266938237395735, "learning_rate": 5.933466115187925e-06, "loss": 0.056536865234375, "step": 15535 }, { "epoch": 0.13436978495646384, "grad_norm": 19.653735675259636, "learning_rate": 5.933423439060137e-06, "loss": 0.1139129638671875, "step": 15540 }, { "epoch": 0.1344130184780071, "grad_norm": 23.87584588294796, "learning_rate": 5.933380749403658e-06, "loss": 0.322296142578125, "step": 15545 }, { "epoch": 0.13445625199955036, "grad_norm": 28.01233847712946, "learning_rate": 5.933338046218687e-06, "loss": 0.22877979278564453, "step": 15550 }, { "epoch": 0.13449948552109364, "grad_norm": 4.482236252974542, "learning_rate": 5.933295329505419e-06, "loss": 0.17980728149414063, "step": 15555 }, { "epoch": 0.1345427190426369, "grad_norm": 6.463858732820348, "learning_rate": 5.933252599264053e-06, "loss": 0.13226699829101562, "step": 15560 }, { "epoch": 0.13458595256418016, "grad_norm": 4.024807706156505, "learning_rate": 5.933209855494785e-06, "loss": 0.27309112548828124, "step": 15565 }, { "epoch": 0.13462918608572344, "grad_norm": 26.581897653913146, "learning_rate": 5.933167098197813e-06, "loss": 0.131353759765625, "step": 15570 }, { "epoch": 0.1346724196072667, "grad_norm": 14.3654949708258, "learning_rate": 5.933124327373333e-06, "loss": 0.0975830078125, "step": 15575 }, { "epoch": 0.13471565312880995, "grad_norm": 1.4443143307111441, "learning_rate": 5.933081543021542e-06, "loss": 0.11409912109375, "step": 15580 }, { "epoch": 0.1347588866503532, "grad_norm": 9.68238966540108, "learning_rate": 5.933038745142639e-06, "loss": 0.09222412109375, "step": 15585 }, { "epoch": 0.1348021201718965, "grad_norm": 0.3534070869287052, "learning_rate": 5.9329959337368206e-06, "loss": 0.0878662109375, "step": 15590 }, { "epoch": 0.13484535369343975, "grad_norm": 30.27729183722176, "learning_rate": 5.9329531088042835e-06, "loss": 0.435089111328125, "step": 15595 }, { "epoch": 0.134888587214983, "grad_norm": 14.205223716290298, "learning_rate": 5.932910270345227e-06, "loss": 0.17787246704101561, "step": 15600 }, { "epoch": 0.13493182073652626, "grad_norm": 12.038808128933622, "learning_rate": 5.9328674183598464e-06, "loss": 0.22528305053710937, "step": 15605 }, { "epoch": 0.13497505425806955, "grad_norm": 29.651304180040487, "learning_rate": 5.932824552848341e-06, "loss": 0.20831298828125, "step": 15610 }, { "epoch": 0.1350182877796128, "grad_norm": 3.4862828169032185, "learning_rate": 5.932781673810907e-06, "loss": 0.13038330078125, "step": 15615 }, { "epoch": 0.13506152130115606, "grad_norm": 6.666184976443255, "learning_rate": 5.932738781247744e-06, "loss": 0.059466552734375, "step": 15620 }, { "epoch": 0.13510475482269932, "grad_norm": 3.7273253664554944, "learning_rate": 5.932695875159049e-06, "loss": 0.33057689666748047, "step": 15625 }, { "epoch": 0.1351479883442426, "grad_norm": 6.366180567708482, "learning_rate": 5.932652955545019e-06, "loss": 0.1455230712890625, "step": 15630 }, { "epoch": 0.13519122186578586, "grad_norm": 47.055455666538144, "learning_rate": 5.932610022405853e-06, "loss": 0.3109344482421875, "step": 15635 }, { "epoch": 0.13523445538732912, "grad_norm": 3.059848564235419, "learning_rate": 5.932567075741748e-06, "loss": 0.43603515625, "step": 15640 }, { "epoch": 0.13527768890887237, "grad_norm": 15.626767694280492, "learning_rate": 5.932524115552904e-06, "loss": 0.0814239501953125, "step": 15645 }, { "epoch": 0.13532092243041566, "grad_norm": 29.48335669346052, "learning_rate": 5.932481141839517e-06, "loss": 0.172369384765625, "step": 15650 }, { "epoch": 0.13536415595195891, "grad_norm": 21.938407106176015, "learning_rate": 5.932438154601787e-06, "loss": 0.23369140625, "step": 15655 }, { "epoch": 0.13540738947350217, "grad_norm": 5.48813750560348, "learning_rate": 5.93239515383991e-06, "loss": 0.3010498046875, "step": 15660 }, { "epoch": 0.13545062299504543, "grad_norm": 5.980967843263891, "learning_rate": 5.932352139554087e-06, "loss": 0.1413818359375, "step": 15665 }, { "epoch": 0.1354938565165887, "grad_norm": 18.99669689727083, "learning_rate": 5.9323091117445145e-06, "loss": 0.2655494689941406, "step": 15670 }, { "epoch": 0.13553709003813197, "grad_norm": 4.557641907258935, "learning_rate": 5.932266070411392e-06, "loss": 0.06163978576660156, "step": 15675 }, { "epoch": 0.13558032355967523, "grad_norm": 8.37004854738529, "learning_rate": 5.9322230155549166e-06, "loss": 0.21973876953125, "step": 15680 }, { "epoch": 0.13562355708121848, "grad_norm": 37.34484472539925, "learning_rate": 5.932179947175288e-06, "loss": 0.37808837890625, "step": 15685 }, { "epoch": 0.13566679060276177, "grad_norm": 39.451363500351405, "learning_rate": 5.9321368652727045e-06, "loss": 0.25066680908203126, "step": 15690 }, { "epoch": 0.13571002412430502, "grad_norm": 1.1809204566583442, "learning_rate": 5.932093769847365e-06, "loss": 0.3513397216796875, "step": 15695 }, { "epoch": 0.13575325764584828, "grad_norm": 0.6035623894909119, "learning_rate": 5.932050660899468e-06, "loss": 0.1471038818359375, "step": 15700 }, { "epoch": 0.13579649116739154, "grad_norm": 10.926758572643863, "learning_rate": 5.932007538429213e-06, "loss": 0.36240234375, "step": 15705 }, { "epoch": 0.13583972468893482, "grad_norm": 2.41712483950103, "learning_rate": 5.9319644024367975e-06, "loss": 0.28682937622070315, "step": 15710 }, { "epoch": 0.13588295821047808, "grad_norm": 1.451463949308235, "learning_rate": 5.931921252922423e-06, "loss": 0.156048583984375, "step": 15715 }, { "epoch": 0.13592619173202133, "grad_norm": 3.0857090915531984, "learning_rate": 5.931878089886285e-06, "loss": 0.04258575439453125, "step": 15720 }, { "epoch": 0.1359694252535646, "grad_norm": 2.9674497164064184, "learning_rate": 5.931834913328584e-06, "loss": 0.21537017822265625, "step": 15725 }, { "epoch": 0.13601265877510788, "grad_norm": 7.612723820064911, "learning_rate": 5.93179172324952e-06, "loss": 0.2261016845703125, "step": 15730 }, { "epoch": 0.13605589229665113, "grad_norm": 26.738863984261485, "learning_rate": 5.931748519649292e-06, "loss": 0.30914306640625, "step": 15735 }, { "epoch": 0.1360991258181944, "grad_norm": 9.034034138907382, "learning_rate": 5.931705302528099e-06, "loss": 0.275811767578125, "step": 15740 }, { "epoch": 0.13614235933973765, "grad_norm": 27.172806558136678, "learning_rate": 5.93166207188614e-06, "loss": 0.4097728729248047, "step": 15745 }, { "epoch": 0.13618559286128093, "grad_norm": 6.8522997365812515, "learning_rate": 5.931618827723614e-06, "loss": 0.47980194091796874, "step": 15750 }, { "epoch": 0.1362288263828242, "grad_norm": 10.674914536078747, "learning_rate": 5.931575570040721e-06, "loss": 0.6157363891601563, "step": 15755 }, { "epoch": 0.13627205990436744, "grad_norm": 22.393639709139947, "learning_rate": 5.931532298837662e-06, "loss": 0.3499176025390625, "step": 15760 }, { "epoch": 0.13631529342591073, "grad_norm": 7.37490937945627, "learning_rate": 5.931489014114633e-06, "loss": 0.05860595703125, "step": 15765 }, { "epoch": 0.13635852694745398, "grad_norm": 26.349726639208612, "learning_rate": 5.931445715871837e-06, "loss": 0.38985748291015626, "step": 15770 }, { "epoch": 0.13640176046899724, "grad_norm": 0.604530235294314, "learning_rate": 5.9314024041094726e-06, "loss": 0.254718017578125, "step": 15775 }, { "epoch": 0.1364449939905405, "grad_norm": 3.5769299325951858, "learning_rate": 5.931359078827739e-06, "loss": 0.05955810546875, "step": 15780 }, { "epoch": 0.13648822751208378, "grad_norm": 10.965991104005571, "learning_rate": 5.931315740026836e-06, "loss": 0.1360992431640625, "step": 15785 }, { "epoch": 0.13653146103362704, "grad_norm": 6.006617366905231, "learning_rate": 5.931272387706964e-06, "loss": 0.16025009155273437, "step": 15790 }, { "epoch": 0.1365746945551703, "grad_norm": 22.74723144341473, "learning_rate": 5.931229021868323e-06, "loss": 0.1923187255859375, "step": 15795 }, { "epoch": 0.13661792807671355, "grad_norm": 39.01097206357572, "learning_rate": 5.931185642511113e-06, "loss": 0.40423583984375, "step": 15800 }, { "epoch": 0.13666116159825684, "grad_norm": 1.7318346185467786, "learning_rate": 5.931142249635533e-06, "loss": 0.1259765625, "step": 15805 }, { "epoch": 0.1367043951198001, "grad_norm": 61.873920840046296, "learning_rate": 5.931098843241785e-06, "loss": 0.2799896240234375, "step": 15810 }, { "epoch": 0.13674762864134335, "grad_norm": 41.486359670945006, "learning_rate": 5.931055423330066e-06, "loss": 0.386541748046875, "step": 15815 }, { "epoch": 0.1367908621628866, "grad_norm": 5.389614349132367, "learning_rate": 5.9310119899005805e-06, "loss": 0.0724761962890625, "step": 15820 }, { "epoch": 0.1368340956844299, "grad_norm": 3.536927235571073, "learning_rate": 5.930968542953525e-06, "loss": 0.06963653564453125, "step": 15825 }, { "epoch": 0.13687732920597315, "grad_norm": 20.447443062298643, "learning_rate": 5.9309250824891035e-06, "loss": 0.39294281005859377, "step": 15830 }, { "epoch": 0.1369205627275164, "grad_norm": 22.988345587300255, "learning_rate": 5.930881608507514e-06, "loss": 0.23015899658203126, "step": 15835 }, { "epoch": 0.13696379624905966, "grad_norm": 13.480414857399817, "learning_rate": 5.930838121008956e-06, "loss": 0.5200691223144531, "step": 15840 }, { "epoch": 0.13700702977060294, "grad_norm": 140.52498114472826, "learning_rate": 5.9307946199936324e-06, "loss": 0.45081253051757814, "step": 15845 }, { "epoch": 0.1370502632921462, "grad_norm": 19.21325342562093, "learning_rate": 5.930751105461744e-06, "loss": 0.3641845703125, "step": 15850 }, { "epoch": 0.13709349681368946, "grad_norm": 21.174556104500564, "learning_rate": 5.93070757741349e-06, "loss": 0.14376220703125, "step": 15855 }, { "epoch": 0.13713673033523271, "grad_norm": 29.55721823671225, "learning_rate": 5.930664035849071e-06, "loss": 0.16005859375, "step": 15860 }, { "epoch": 0.137179963856776, "grad_norm": 2.917217916907111, "learning_rate": 5.9306204807686885e-06, "loss": 0.1173248291015625, "step": 15865 }, { "epoch": 0.13722319737831926, "grad_norm": 8.842560086239352, "learning_rate": 5.930576912172543e-06, "loss": 0.05250396728515625, "step": 15870 }, { "epoch": 0.1372664308998625, "grad_norm": 49.456311814651464, "learning_rate": 5.930533330060837e-06, "loss": 0.3007080078125, "step": 15875 }, { "epoch": 0.13730966442140577, "grad_norm": 2.542768000995804, "learning_rate": 5.9304897344337694e-06, "loss": 0.15013427734375, "step": 15880 }, { "epoch": 0.13735289794294905, "grad_norm": 2.5343737386869543, "learning_rate": 5.930446125291542e-06, "loss": 0.4632080078125, "step": 15885 }, { "epoch": 0.1373961314644923, "grad_norm": 14.06930178283949, "learning_rate": 5.9304025026343565e-06, "loss": 0.31496429443359375, "step": 15890 }, { "epoch": 0.13743936498603557, "grad_norm": 6.061535593276188, "learning_rate": 5.930358866462413e-06, "loss": 0.458917236328125, "step": 15895 }, { "epoch": 0.13748259850757882, "grad_norm": 7.70183579621195, "learning_rate": 5.9303152167759134e-06, "loss": 0.051873779296875, "step": 15900 }, { "epoch": 0.1375258320291221, "grad_norm": 19.11026631270834, "learning_rate": 5.930271553575059e-06, "loss": 0.12526092529296876, "step": 15905 }, { "epoch": 0.13756906555066536, "grad_norm": 2.226112487993049, "learning_rate": 5.930227876860051e-06, "loss": 0.38644561767578123, "step": 15910 }, { "epoch": 0.13761229907220862, "grad_norm": 10.249009380697546, "learning_rate": 5.9301841866310925e-06, "loss": 0.2422607421875, "step": 15915 }, { "epoch": 0.13765553259375188, "grad_norm": 2.8294278868989893, "learning_rate": 5.930140482888381e-06, "loss": 0.16834564208984376, "step": 15920 }, { "epoch": 0.13769876611529516, "grad_norm": 267.3516201443124, "learning_rate": 5.930096765632122e-06, "loss": 0.3294044494628906, "step": 15925 }, { "epoch": 0.13774199963683842, "grad_norm": 18.95939217508487, "learning_rate": 5.930053034862516e-06, "loss": 0.21285400390625, "step": 15930 }, { "epoch": 0.13778523315838168, "grad_norm": 13.038470424923387, "learning_rate": 5.930009290579762e-06, "loss": 0.090423583984375, "step": 15935 }, { "epoch": 0.13782846667992496, "grad_norm": 0.5770472060549248, "learning_rate": 5.929965532784066e-06, "loss": 0.03912811279296875, "step": 15940 }, { "epoch": 0.13787170020146822, "grad_norm": 5.749127773981404, "learning_rate": 5.929921761475629e-06, "loss": 0.15803604125976561, "step": 15945 }, { "epoch": 0.13791493372301147, "grad_norm": 16.567993701861763, "learning_rate": 5.929877976654649e-06, "loss": 0.434539794921875, "step": 15950 }, { "epoch": 0.13795816724455473, "grad_norm": 28.168863884127706, "learning_rate": 5.929834178321333e-06, "loss": 0.6388336181640625, "step": 15955 }, { "epoch": 0.13800140076609801, "grad_norm": 1.8385092191101366, "learning_rate": 5.929790366475879e-06, "loss": 0.03444671630859375, "step": 15960 }, { "epoch": 0.13804463428764127, "grad_norm": 17.57948369448117, "learning_rate": 5.929746541118491e-06, "loss": 0.13470458984375, "step": 15965 }, { "epoch": 0.13808786780918453, "grad_norm": 1.2685397058852752, "learning_rate": 5.929702702249372e-06, "loss": 0.3978759765625, "step": 15970 }, { "epoch": 0.13813110133072778, "grad_norm": 20.08927233692365, "learning_rate": 5.9296588498687215e-06, "loss": 0.2524147033691406, "step": 15975 }, { "epoch": 0.13817433485227107, "grad_norm": 9.943484166176637, "learning_rate": 5.929614983976743e-06, "loss": 0.3594505310058594, "step": 15980 }, { "epoch": 0.13821756837381433, "grad_norm": 17.444143784483714, "learning_rate": 5.9295711045736405e-06, "loss": 0.28685302734375, "step": 15985 }, { "epoch": 0.13826080189535758, "grad_norm": 33.255345926948095, "learning_rate": 5.9295272116596145e-06, "loss": 0.47296905517578125, "step": 15990 }, { "epoch": 0.13830403541690084, "grad_norm": 12.082653790691447, "learning_rate": 5.9294833052348675e-06, "loss": 0.2573150634765625, "step": 15995 }, { "epoch": 0.13834726893844412, "grad_norm": 2.867313926930228, "learning_rate": 5.929439385299603e-06, "loss": 0.0577728271484375, "step": 16000 }, { "epoch": 0.13839050245998738, "grad_norm": 33.968714135990076, "learning_rate": 5.929395451854022e-06, "loss": 0.1415008544921875, "step": 16005 }, { "epoch": 0.13843373598153064, "grad_norm": 10.223132731540097, "learning_rate": 5.929351504898328e-06, "loss": 0.34122772216796876, "step": 16010 }, { "epoch": 0.1384769695030739, "grad_norm": 2.5693991692618456, "learning_rate": 5.929307544432724e-06, "loss": 0.10935821533203124, "step": 16015 }, { "epoch": 0.13852020302461718, "grad_norm": 4.503715743858449, "learning_rate": 5.929263570457414e-06, "loss": 0.199749755859375, "step": 16020 }, { "epoch": 0.13856343654616043, "grad_norm": 15.682090690356924, "learning_rate": 5.929219582972597e-06, "loss": 0.095672607421875, "step": 16025 }, { "epoch": 0.1386066700677037, "grad_norm": 5.567588619527575, "learning_rate": 5.929175581978479e-06, "loss": 0.1223663330078125, "step": 16030 }, { "epoch": 0.13864990358924695, "grad_norm": 1.8891743707715172, "learning_rate": 5.929131567475262e-06, "loss": 0.0906585693359375, "step": 16035 }, { "epoch": 0.13869313711079023, "grad_norm": 31.42320748034939, "learning_rate": 5.929087539463149e-06, "loss": 0.32440185546875, "step": 16040 }, { "epoch": 0.1387363706323335, "grad_norm": 1.3139066840561868, "learning_rate": 5.929043497942344e-06, "loss": 0.26340179443359374, "step": 16045 }, { "epoch": 0.13877960415387675, "grad_norm": 0.3352707828042887, "learning_rate": 5.928999442913047e-06, "loss": 0.13485107421875, "step": 16050 }, { "epoch": 0.13882283767542, "grad_norm": 23.00240791431845, "learning_rate": 5.928955374375466e-06, "loss": 0.2256561279296875, "step": 16055 }, { "epoch": 0.1388660711969633, "grad_norm": 45.41803419175751, "learning_rate": 5.9289112923298e-06, "loss": 0.3120361328125, "step": 16060 }, { "epoch": 0.13890930471850654, "grad_norm": 1.3984208889413399, "learning_rate": 5.928867196776254e-06, "loss": 0.5205375671386718, "step": 16065 }, { "epoch": 0.1389525382400498, "grad_norm": 26.85033955946842, "learning_rate": 5.928823087715032e-06, "loss": 0.168804931640625, "step": 16070 }, { "epoch": 0.13899577176159306, "grad_norm": 19.63647737449243, "learning_rate": 5.928778965146336e-06, "loss": 0.060573577880859375, "step": 16075 }, { "epoch": 0.13903900528313634, "grad_norm": 2.774718418792585, "learning_rate": 5.9287348290703695e-06, "loss": 0.08153228759765625, "step": 16080 }, { "epoch": 0.1390822388046796, "grad_norm": 10.820705730678387, "learning_rate": 5.928690679487339e-06, "loss": 0.28583335876464844, "step": 16085 }, { "epoch": 0.13912547232622285, "grad_norm": 2.9123407610999505, "learning_rate": 5.928646516397444e-06, "loss": 0.450689697265625, "step": 16090 }, { "epoch": 0.1391687058477661, "grad_norm": 25.274515133906043, "learning_rate": 5.92860233980089e-06, "loss": 0.21689777374267577, "step": 16095 }, { "epoch": 0.1392119393693094, "grad_norm": 15.777489078246619, "learning_rate": 5.928558149697882e-06, "loss": 0.04420013427734375, "step": 16100 }, { "epoch": 0.13925517289085265, "grad_norm": 3.4455291663470367, "learning_rate": 5.928513946088622e-06, "loss": 0.157989501953125, "step": 16105 }, { "epoch": 0.1392984064123959, "grad_norm": 54.583792276727735, "learning_rate": 5.928469728973315e-06, "loss": 0.22408599853515626, "step": 16110 }, { "epoch": 0.13934163993393917, "grad_norm": 1.4576993673215357, "learning_rate": 5.928425498352163e-06, "loss": 0.33008346557617185, "step": 16115 }, { "epoch": 0.13938487345548245, "grad_norm": 19.125552021814922, "learning_rate": 5.9283812542253724e-06, "loss": 0.373394775390625, "step": 16120 }, { "epoch": 0.1394281069770257, "grad_norm": 61.95778809203529, "learning_rate": 5.928336996593145e-06, "loss": 0.44383544921875, "step": 16125 }, { "epoch": 0.13947134049856896, "grad_norm": 35.98635037602293, "learning_rate": 5.928292725455689e-06, "loss": 0.29320068359375, "step": 16130 }, { "epoch": 0.13951457402011225, "grad_norm": 9.980770880534669, "learning_rate": 5.928248440813203e-06, "loss": 0.09216670989990235, "step": 16135 }, { "epoch": 0.1395578075416555, "grad_norm": 26.37696067033681, "learning_rate": 5.928204142665894e-06, "loss": 0.2286865234375, "step": 16140 }, { "epoch": 0.13960104106319876, "grad_norm": 24.38724420141136, "learning_rate": 5.928159831013968e-06, "loss": 0.09842376708984375, "step": 16145 }, { "epoch": 0.13964427458474202, "grad_norm": 7.1145864887395565, "learning_rate": 5.928115505857627e-06, "loss": 0.0645233154296875, "step": 16150 }, { "epoch": 0.1396875081062853, "grad_norm": 6.688380432704362, "learning_rate": 5.928071167197075e-06, "loss": 0.09265594482421875, "step": 16155 }, { "epoch": 0.13973074162782856, "grad_norm": 5.103470246365329, "learning_rate": 5.928026815032519e-06, "loss": 0.11761245727539063, "step": 16160 }, { "epoch": 0.13977397514937182, "grad_norm": 6.23244818722278, "learning_rate": 5.927982449364161e-06, "loss": 0.06690902709960937, "step": 16165 }, { "epoch": 0.13981720867091507, "grad_norm": 18.160380978726565, "learning_rate": 5.927938070192206e-06, "loss": 0.2678009033203125, "step": 16170 }, { "epoch": 0.13986044219245836, "grad_norm": 4.730547155402601, "learning_rate": 5.927893677516861e-06, "loss": 0.17227783203125, "step": 16175 }, { "epoch": 0.1399036757140016, "grad_norm": 18.372439154704644, "learning_rate": 5.927849271338328e-06, "loss": 0.7213926315307617, "step": 16180 }, { "epoch": 0.13994690923554487, "grad_norm": 1.5283786854591095, "learning_rate": 5.927804851656812e-06, "loss": 0.022564697265625, "step": 16185 }, { "epoch": 0.13999014275708813, "grad_norm": 4.75054862132701, "learning_rate": 5.92776041847252e-06, "loss": 0.04644927978515625, "step": 16190 }, { "epoch": 0.1400333762786314, "grad_norm": 24.714281311770616, "learning_rate": 5.9277159717856565e-06, "loss": 0.24861373901367187, "step": 16195 }, { "epoch": 0.14007660980017467, "grad_norm": 9.779516262463558, "learning_rate": 5.9276715115964235e-06, "loss": 0.112921142578125, "step": 16200 }, { "epoch": 0.14011984332171792, "grad_norm": 1.0921109853193152, "learning_rate": 5.92762703790503e-06, "loss": 0.3003692626953125, "step": 16205 }, { "epoch": 0.14016307684326118, "grad_norm": 3.939422238166449, "learning_rate": 5.927582550711678e-06, "loss": 0.11837005615234375, "step": 16210 }, { "epoch": 0.14020631036480447, "grad_norm": 2.931149999512193, "learning_rate": 5.927538050016575e-06, "loss": 0.46580810546875, "step": 16215 }, { "epoch": 0.14024954388634772, "grad_norm": 2.164329357710955, "learning_rate": 5.927493535819924e-06, "loss": 0.13133544921875, "step": 16220 }, { "epoch": 0.14029277740789098, "grad_norm": 3.2528843733838566, "learning_rate": 5.927449008121933e-06, "loss": 0.08573455810546875, "step": 16225 }, { "epoch": 0.14033601092943424, "grad_norm": 8.244124329015705, "learning_rate": 5.927404466922805e-06, "loss": 0.403759765625, "step": 16230 }, { "epoch": 0.14037924445097752, "grad_norm": 11.520669263386461, "learning_rate": 5.927359912222746e-06, "loss": 0.10804901123046876, "step": 16235 }, { "epoch": 0.14042247797252078, "grad_norm": 6.270246055810291, "learning_rate": 5.9273153440219625e-06, "loss": 0.3307586669921875, "step": 16240 }, { "epoch": 0.14046571149406403, "grad_norm": 14.886285705754773, "learning_rate": 5.927270762320659e-06, "loss": 0.07600746154785157, "step": 16245 }, { "epoch": 0.1405089450156073, "grad_norm": 45.29086373071676, "learning_rate": 5.927226167119041e-06, "loss": 0.45510406494140626, "step": 16250 }, { "epoch": 0.14055217853715057, "grad_norm": 2.6149660108850363, "learning_rate": 5.927181558417315e-06, "loss": 0.28014068603515624, "step": 16255 }, { "epoch": 0.14059541205869383, "grad_norm": 1.9009037682215122, "learning_rate": 5.9271369362156865e-06, "loss": 0.07784385681152343, "step": 16260 }, { "epoch": 0.1406386455802371, "grad_norm": 5.7317912436736735, "learning_rate": 5.927092300514361e-06, "loss": 0.2625030517578125, "step": 16265 }, { "epoch": 0.14068187910178034, "grad_norm": 17.255536214152624, "learning_rate": 5.927047651313544e-06, "loss": 0.4707855224609375, "step": 16270 }, { "epoch": 0.14072511262332363, "grad_norm": 22.83957138991938, "learning_rate": 5.927002988613442e-06, "loss": 0.22706375122070313, "step": 16275 }, { "epoch": 0.14076834614486688, "grad_norm": 4.857735682225901, "learning_rate": 5.926958312414261e-06, "loss": 0.31496429443359375, "step": 16280 }, { "epoch": 0.14081157966641014, "grad_norm": 8.101233152778645, "learning_rate": 5.926913622716207e-06, "loss": 0.16481475830078124, "step": 16285 }, { "epoch": 0.1408548131879534, "grad_norm": 9.237359849323925, "learning_rate": 5.926868919519486e-06, "loss": 0.08463287353515625, "step": 16290 }, { "epoch": 0.14089804670949668, "grad_norm": 49.560708219689644, "learning_rate": 5.926824202824304e-06, "loss": 0.4633037567138672, "step": 16295 }, { "epoch": 0.14094128023103994, "grad_norm": 10.909326389677352, "learning_rate": 5.9267794726308666e-06, "loss": 0.0648101806640625, "step": 16300 }, { "epoch": 0.1409845137525832, "grad_norm": 55.895968932757704, "learning_rate": 5.926734728939382e-06, "loss": 0.37186279296875, "step": 16305 }, { "epoch": 0.14102774727412648, "grad_norm": 11.632342764510902, "learning_rate": 5.926689971750055e-06, "loss": 0.12191314697265625, "step": 16310 }, { "epoch": 0.14107098079566974, "grad_norm": 17.344955840994633, "learning_rate": 5.926645201063093e-06, "loss": 0.07750167846679687, "step": 16315 }, { "epoch": 0.141114214317213, "grad_norm": 40.689001750186954, "learning_rate": 5.926600416878701e-06, "loss": 0.3976287841796875, "step": 16320 }, { "epoch": 0.14115744783875625, "grad_norm": 28.484407016572014, "learning_rate": 5.926555619197088e-06, "loss": 0.6412124633789062, "step": 16325 }, { "epoch": 0.14120068136029953, "grad_norm": 11.885413727738017, "learning_rate": 5.926510808018457e-06, "loss": 0.14867210388183594, "step": 16330 }, { "epoch": 0.1412439148818428, "grad_norm": 48.147360459195895, "learning_rate": 5.926465983343018e-06, "loss": 0.216558837890625, "step": 16335 }, { "epoch": 0.14128714840338605, "grad_norm": 8.042148633787018, "learning_rate": 5.926421145170976e-06, "loss": 0.227923583984375, "step": 16340 }, { "epoch": 0.1413303819249293, "grad_norm": 6.433230386069336, "learning_rate": 5.9263762935025396e-06, "loss": 0.608807373046875, "step": 16345 }, { "epoch": 0.1413736154464726, "grad_norm": 51.18809040321457, "learning_rate": 5.926331428337912e-06, "loss": 0.5113998413085937, "step": 16350 }, { "epoch": 0.14141684896801585, "grad_norm": 5.297889012655832, "learning_rate": 5.926286549677304e-06, "loss": 0.05788726806640625, "step": 16355 }, { "epoch": 0.1414600824895591, "grad_norm": 24.64332506628646, "learning_rate": 5.926241657520921e-06, "loss": 0.107257080078125, "step": 16360 }, { "epoch": 0.14150331601110236, "grad_norm": 0.16351500056235488, "learning_rate": 5.926196751868969e-06, "loss": 0.44730148315429685, "step": 16365 }, { "epoch": 0.14154654953264564, "grad_norm": 5.953252650989257, "learning_rate": 5.926151832721657e-06, "loss": 0.39794921875, "step": 16370 }, { "epoch": 0.1415897830541889, "grad_norm": 28.379316911817867, "learning_rate": 5.926106900079192e-06, "loss": 0.2529022216796875, "step": 16375 }, { "epoch": 0.14163301657573216, "grad_norm": 1.0836342011980145, "learning_rate": 5.92606195394178e-06, "loss": 0.1241241455078125, "step": 16380 }, { "epoch": 0.1416762500972754, "grad_norm": 8.064061893717083, "learning_rate": 5.926016994309628e-06, "loss": 0.03536376953125, "step": 16385 }, { "epoch": 0.1417194836188187, "grad_norm": 4.978307331625878, "learning_rate": 5.9259720211829444e-06, "loss": 0.07766494750976563, "step": 16390 }, { "epoch": 0.14176271714036195, "grad_norm": 3.2086932971138213, "learning_rate": 5.925927034561937e-06, "loss": 0.1677734375, "step": 16395 }, { "epoch": 0.1418059506619052, "grad_norm": 15.279037148675675, "learning_rate": 5.925882034446812e-06, "loss": 0.14821319580078124, "step": 16400 }, { "epoch": 0.14184918418344847, "grad_norm": 44.00953421758605, "learning_rate": 5.925837020837778e-06, "loss": 0.277069091796875, "step": 16405 }, { "epoch": 0.14189241770499175, "grad_norm": 6.682771575260814, "learning_rate": 5.925791993735043e-06, "loss": 0.155242919921875, "step": 16410 }, { "epoch": 0.141935651226535, "grad_norm": 9.027546559398628, "learning_rate": 5.925746953138813e-06, "loss": 0.22848052978515626, "step": 16415 }, { "epoch": 0.14197888474807827, "grad_norm": 0.9747324139289683, "learning_rate": 5.9257018990492966e-06, "loss": 0.08507080078125, "step": 16420 }, { "epoch": 0.14202211826962152, "grad_norm": 6.998681356384003, "learning_rate": 5.925656831466701e-06, "loss": 0.20360641479492186, "step": 16425 }, { "epoch": 0.1420653517911648, "grad_norm": 1.4685793151731426, "learning_rate": 5.925611750391235e-06, "loss": 0.160272216796875, "step": 16430 }, { "epoch": 0.14210858531270806, "grad_norm": 4.699248528308727, "learning_rate": 5.925566655823106e-06, "loss": 0.338836669921875, "step": 16435 }, { "epoch": 0.14215181883425132, "grad_norm": 1.7035074522493785, "learning_rate": 5.925521547762522e-06, "loss": 0.14292755126953124, "step": 16440 }, { "epoch": 0.14219505235579458, "grad_norm": 12.049569021072879, "learning_rate": 5.9254764262096916e-06, "loss": 0.1182159423828125, "step": 16445 }, { "epoch": 0.14223828587733786, "grad_norm": 64.76984691904356, "learning_rate": 5.925431291164823e-06, "loss": 0.42981796264648436, "step": 16450 }, { "epoch": 0.14228151939888112, "grad_norm": 19.63635444265763, "learning_rate": 5.925386142628122e-06, "loss": 0.11247940063476562, "step": 16455 }, { "epoch": 0.14232475292042437, "grad_norm": 25.387420862315064, "learning_rate": 5.9253409805998e-06, "loss": 0.29624786376953127, "step": 16460 }, { "epoch": 0.14236798644196763, "grad_norm": 43.36643796903917, "learning_rate": 5.925295805080064e-06, "loss": 0.418145751953125, "step": 16465 }, { "epoch": 0.14241121996351092, "grad_norm": 53.74726891371195, "learning_rate": 5.9252506160691215e-06, "loss": 0.2779361724853516, "step": 16470 }, { "epoch": 0.14245445348505417, "grad_norm": 9.650172816547935, "learning_rate": 5.925205413567181e-06, "loss": 0.1570404052734375, "step": 16475 }, { "epoch": 0.14249768700659743, "grad_norm": 0.4719856628218746, "learning_rate": 5.925160197574453e-06, "loss": 0.0851776123046875, "step": 16480 }, { "epoch": 0.14254092052814069, "grad_norm": 9.929980784784279, "learning_rate": 5.925114968091145e-06, "loss": 0.2656707763671875, "step": 16485 }, { "epoch": 0.14258415404968397, "grad_norm": 3.2977696152379026, "learning_rate": 5.925069725117464e-06, "loss": 0.08206787109375, "step": 16490 }, { "epoch": 0.14262738757122723, "grad_norm": 6.019712050532792, "learning_rate": 5.92502446865362e-06, "loss": 0.08149032592773438, "step": 16495 }, { "epoch": 0.14267062109277048, "grad_norm": 2.9987437467780316, "learning_rate": 5.924979198699822e-06, "loss": 0.17388916015625, "step": 16500 }, { "epoch": 0.14271385461431377, "grad_norm": 13.802550874670333, "learning_rate": 5.9249339152562776e-06, "loss": 0.171343994140625, "step": 16505 }, { "epoch": 0.14275708813585702, "grad_norm": 12.93965007972728, "learning_rate": 5.924888618323197e-06, "loss": 0.0737274169921875, "step": 16510 }, { "epoch": 0.14280032165740028, "grad_norm": 1.7937662062590978, "learning_rate": 5.924843307900789e-06, "loss": 0.244720458984375, "step": 16515 }, { "epoch": 0.14284355517894354, "grad_norm": 6.95679317089471, "learning_rate": 5.924797983989262e-06, "loss": 0.4449207305908203, "step": 16520 }, { "epoch": 0.14288678870048682, "grad_norm": 31.943836099679853, "learning_rate": 5.924752646588826e-06, "loss": 0.410107421875, "step": 16525 }, { "epoch": 0.14293002222203008, "grad_norm": 2.1887878280401005, "learning_rate": 5.924707295699687e-06, "loss": 0.1662872314453125, "step": 16530 }, { "epoch": 0.14297325574357334, "grad_norm": 1.006993492928405, "learning_rate": 5.9246619313220575e-06, "loss": 0.06823806762695313, "step": 16535 }, { "epoch": 0.1430164892651166, "grad_norm": 1.2973630585221883, "learning_rate": 5.924616553456146e-06, "loss": 0.3257293701171875, "step": 16540 }, { "epoch": 0.14305972278665988, "grad_norm": 4.009486150416436, "learning_rate": 5.924571162102161e-06, "loss": 0.10010986328125, "step": 16545 }, { "epoch": 0.14310295630820313, "grad_norm": 13.143705522939443, "learning_rate": 5.924525757260313e-06, "loss": 0.2120849609375, "step": 16550 }, { "epoch": 0.1431461898297464, "grad_norm": 16.793918403566554, "learning_rate": 5.924480338930808e-06, "loss": 0.24268798828125, "step": 16555 }, { "epoch": 0.14318942335128965, "grad_norm": 28.521204556578493, "learning_rate": 5.924434907113862e-06, "loss": 0.3591594696044922, "step": 16560 }, { "epoch": 0.14323265687283293, "grad_norm": 13.280001485053758, "learning_rate": 5.924389461809677e-06, "loss": 0.17744369506835939, "step": 16565 }, { "epoch": 0.1432758903943762, "grad_norm": 16.15916814114217, "learning_rate": 5.9243440030184685e-06, "loss": 0.450091552734375, "step": 16570 }, { "epoch": 0.14331912391591944, "grad_norm": 16.202756794922657, "learning_rate": 5.924298530740443e-06, "loss": 0.0381011962890625, "step": 16575 }, { "epoch": 0.1433623574374627, "grad_norm": 45.269768360252364, "learning_rate": 5.924253044975811e-06, "loss": 0.3175384521484375, "step": 16580 }, { "epoch": 0.14340559095900599, "grad_norm": 13.722592557815569, "learning_rate": 5.924207545724782e-06, "loss": 0.156671142578125, "step": 16585 }, { "epoch": 0.14344882448054924, "grad_norm": 8.403065314825366, "learning_rate": 5.924162032987567e-06, "loss": 0.22517852783203124, "step": 16590 }, { "epoch": 0.1434920580020925, "grad_norm": 1.6881244711426495, "learning_rate": 5.924116506764375e-06, "loss": 0.09156265258789062, "step": 16595 }, { "epoch": 0.14353529152363576, "grad_norm": 13.349474419399659, "learning_rate": 5.9240709670554154e-06, "loss": 0.14130859375, "step": 16600 }, { "epoch": 0.14357852504517904, "grad_norm": 4.064264925424284, "learning_rate": 5.9240254138609e-06, "loss": 0.237982177734375, "step": 16605 }, { "epoch": 0.1436217585667223, "grad_norm": 12.378501885736545, "learning_rate": 5.923979847181036e-06, "loss": 0.072479248046875, "step": 16610 }, { "epoch": 0.14366499208826555, "grad_norm": 31.901114049011, "learning_rate": 5.923934267016037e-06, "loss": 0.4554145812988281, "step": 16615 }, { "epoch": 0.1437082256098088, "grad_norm": 3.8545874734672334, "learning_rate": 5.923888673366111e-06, "loss": 0.1373992919921875, "step": 16620 }, { "epoch": 0.1437514591313521, "grad_norm": 22.90930626907558, "learning_rate": 5.923843066231468e-06, "loss": 0.21756744384765625, "step": 16625 }, { "epoch": 0.14379469265289535, "grad_norm": 7.359670268559375, "learning_rate": 5.923797445612321e-06, "loss": 0.12798194885253905, "step": 16630 }, { "epoch": 0.1438379261744386, "grad_norm": 5.591680028199843, "learning_rate": 5.923751811508877e-06, "loss": 0.061602783203125, "step": 16635 }, { "epoch": 0.14388115969598186, "grad_norm": 14.410684632245097, "learning_rate": 5.9237061639213475e-06, "loss": 0.3792137145996094, "step": 16640 }, { "epoch": 0.14392439321752515, "grad_norm": 36.21403113719913, "learning_rate": 5.923660502849946e-06, "loss": 0.1643768310546875, "step": 16645 }, { "epoch": 0.1439676267390684, "grad_norm": 42.315090416839915, "learning_rate": 5.923614828294878e-06, "loss": 0.453057861328125, "step": 16650 }, { "epoch": 0.14401086026061166, "grad_norm": 17.10430298120411, "learning_rate": 5.923569140256358e-06, "loss": 0.5128440856933594, "step": 16655 }, { "epoch": 0.14405409378215492, "grad_norm": 1.5439698748846504, "learning_rate": 5.923523438734595e-06, "loss": 0.11755218505859374, "step": 16660 }, { "epoch": 0.1440973273036982, "grad_norm": 2.382444311030836, "learning_rate": 5.923477723729801e-06, "loss": 0.4487060546875, "step": 16665 }, { "epoch": 0.14414056082524146, "grad_norm": 19.87386215734945, "learning_rate": 5.9234319952421845e-06, "loss": 0.3898750305175781, "step": 16670 }, { "epoch": 0.14418379434678472, "grad_norm": 5.861559124846552, "learning_rate": 5.923386253271959e-06, "loss": 0.14653701782226564, "step": 16675 }, { "epoch": 0.144227027868328, "grad_norm": 5.541595802246494, "learning_rate": 5.9233404978193345e-06, "loss": 0.4745849609375, "step": 16680 }, { "epoch": 0.14427026138987126, "grad_norm": 2.654100539862431, "learning_rate": 5.923294728884522e-06, "loss": 0.051580810546875, "step": 16685 }, { "epoch": 0.1443134949114145, "grad_norm": 18.412961578271037, "learning_rate": 5.923248946467732e-06, "loss": 0.21466331481933593, "step": 16690 }, { "epoch": 0.14435672843295777, "grad_norm": 0.3032944434331412, "learning_rate": 5.923203150569177e-06, "loss": 0.12707901000976562, "step": 16695 }, { "epoch": 0.14439996195450105, "grad_norm": 1.740344403749772, "learning_rate": 5.923157341189066e-06, "loss": 0.24943389892578124, "step": 16700 }, { "epoch": 0.1444431954760443, "grad_norm": 5.40943402253499, "learning_rate": 5.923111518327613e-06, "loss": 0.17626953125, "step": 16705 }, { "epoch": 0.14448642899758757, "grad_norm": 71.96055958966893, "learning_rate": 5.923065681985028e-06, "loss": 0.446240234375, "step": 16710 }, { "epoch": 0.14452966251913082, "grad_norm": 39.647797743935584, "learning_rate": 5.923019832161521e-06, "loss": 0.27965545654296875, "step": 16715 }, { "epoch": 0.1445728960406741, "grad_norm": 33.270359850980896, "learning_rate": 5.922973968857306e-06, "loss": 0.17944564819335937, "step": 16720 }, { "epoch": 0.14461612956221737, "grad_norm": 4.10018289198918, "learning_rate": 5.922928092072592e-06, "loss": 0.0940155029296875, "step": 16725 }, { "epoch": 0.14465936308376062, "grad_norm": 8.7447876000492, "learning_rate": 5.922882201807593e-06, "loss": 0.11206283569335937, "step": 16730 }, { "epoch": 0.14470259660530388, "grad_norm": 11.569173673570216, "learning_rate": 5.92283629806252e-06, "loss": 0.5197113037109375, "step": 16735 }, { "epoch": 0.14474583012684716, "grad_norm": 1.56966358429192, "learning_rate": 5.922790380837583e-06, "loss": 0.170147705078125, "step": 16740 }, { "epoch": 0.14478906364839042, "grad_norm": 13.524739786494946, "learning_rate": 5.922744450132996e-06, "loss": 0.13305816650390626, "step": 16745 }, { "epoch": 0.14483229716993368, "grad_norm": 1.0990016667171727, "learning_rate": 5.92269850594897e-06, "loss": 0.071307373046875, "step": 16750 }, { "epoch": 0.14487553069147693, "grad_norm": 50.66039729762288, "learning_rate": 5.922652548285716e-06, "loss": 0.39757537841796875, "step": 16755 }, { "epoch": 0.14491876421302022, "grad_norm": 30.04110680123458, "learning_rate": 5.922606577143446e-06, "loss": 0.26331787109375, "step": 16760 }, { "epoch": 0.14496199773456347, "grad_norm": 10.463914354716202, "learning_rate": 5.9225605925223746e-06, "loss": 0.17069854736328124, "step": 16765 }, { "epoch": 0.14500523125610673, "grad_norm": 4.376657844935754, "learning_rate": 5.922514594422711e-06, "loss": 0.3573211669921875, "step": 16770 }, { "epoch": 0.14504846477765, "grad_norm": 20.82395046724013, "learning_rate": 5.922468582844669e-06, "loss": 0.3186737060546875, "step": 16775 }, { "epoch": 0.14509169829919327, "grad_norm": 1.9077885598085254, "learning_rate": 5.92242255778846e-06, "loss": 0.4022064208984375, "step": 16780 }, { "epoch": 0.14513493182073653, "grad_norm": 26.65978709142893, "learning_rate": 5.922376519254295e-06, "loss": 0.25615234375, "step": 16785 }, { "epoch": 0.14517816534227979, "grad_norm": 2.875792208967807, "learning_rate": 5.922330467242389e-06, "loss": 0.170074462890625, "step": 16790 }, { "epoch": 0.14522139886382304, "grad_norm": 3.515822765078381, "learning_rate": 5.922284401752953e-06, "loss": 0.17528533935546875, "step": 16795 }, { "epoch": 0.14526463238536633, "grad_norm": 7.020196762114929, "learning_rate": 5.922238322786199e-06, "loss": 0.1989105224609375, "step": 16800 }, { "epoch": 0.14530786590690958, "grad_norm": 4.1719156712897005, "learning_rate": 5.92219223034234e-06, "loss": 0.12209568023681641, "step": 16805 }, { "epoch": 0.14535109942845284, "grad_norm": 3.5685953268608954, "learning_rate": 5.922146124421589e-06, "loss": 0.1543792724609375, "step": 16810 }, { "epoch": 0.1453943329499961, "grad_norm": 29.292712721765906, "learning_rate": 5.922100005024159e-06, "loss": 0.115386962890625, "step": 16815 }, { "epoch": 0.14543756647153938, "grad_norm": 9.442512126131168, "learning_rate": 5.922053872150261e-06, "loss": 0.2796875, "step": 16820 }, { "epoch": 0.14548079999308264, "grad_norm": 4.881084534881247, "learning_rate": 5.922007725800108e-06, "loss": 0.4415016174316406, "step": 16825 }, { "epoch": 0.1455240335146259, "grad_norm": 1.3544185875584343, "learning_rate": 5.921961565973915e-06, "loss": 0.12544097900390624, "step": 16830 }, { "epoch": 0.14556726703616915, "grad_norm": 8.305896066843266, "learning_rate": 5.9219153926718936e-06, "loss": 0.07363662719726563, "step": 16835 }, { "epoch": 0.14561050055771244, "grad_norm": 9.747427768903417, "learning_rate": 5.921869205894256e-06, "loss": 0.50460205078125, "step": 16840 }, { "epoch": 0.1456537340792557, "grad_norm": 3.6188557204150826, "learning_rate": 5.921823005641215e-06, "loss": 0.20681915283203126, "step": 16845 }, { "epoch": 0.14569696760079895, "grad_norm": 45.80875550383127, "learning_rate": 5.921776791912986e-06, "loss": 0.6342041015625, "step": 16850 }, { "epoch": 0.1457402011223422, "grad_norm": 4.509888308599645, "learning_rate": 5.92173056470978e-06, "loss": 0.12196788787841797, "step": 16855 }, { "epoch": 0.1457834346438855, "grad_norm": 0.4170709611051168, "learning_rate": 5.92168432403181e-06, "loss": 0.1864501953125, "step": 16860 }, { "epoch": 0.14582666816542875, "grad_norm": 24.736768814655978, "learning_rate": 5.9216380698792915e-06, "loss": 0.10371894836425781, "step": 16865 }, { "epoch": 0.145869901686972, "grad_norm": 19.03398639572747, "learning_rate": 5.921591802252435e-06, "loss": 0.26744384765625, "step": 16870 }, { "epoch": 0.1459131352085153, "grad_norm": 5.098511139824401, "learning_rate": 5.9215455211514565e-06, "loss": 0.44227142333984376, "step": 16875 }, { "epoch": 0.14595636873005854, "grad_norm": 51.748311069591985, "learning_rate": 5.921499226576567e-06, "loss": 0.46764984130859377, "step": 16880 }, { "epoch": 0.1459996022516018, "grad_norm": 40.85348832810252, "learning_rate": 5.921452918527982e-06, "loss": 0.379290771484375, "step": 16885 }, { "epoch": 0.14604283577314506, "grad_norm": 8.973679664892067, "learning_rate": 5.921406597005915e-06, "loss": 0.2733795166015625, "step": 16890 }, { "epoch": 0.14608606929468834, "grad_norm": 14.091443490121645, "learning_rate": 5.9213602620105776e-06, "loss": 0.5196624755859375, "step": 16895 }, { "epoch": 0.1461293028162316, "grad_norm": 6.060206633255842, "learning_rate": 5.921313913542185e-06, "loss": 0.1448944091796875, "step": 16900 }, { "epoch": 0.14617253633777486, "grad_norm": 2.491701333368038, "learning_rate": 5.921267551600951e-06, "loss": 0.0379730224609375, "step": 16905 }, { "epoch": 0.1462157698593181, "grad_norm": 0.5171816219877285, "learning_rate": 5.921221176187089e-06, "loss": 0.31669158935546876, "step": 16910 }, { "epoch": 0.1462590033808614, "grad_norm": 30.39962331419116, "learning_rate": 5.921174787300814e-06, "loss": 0.23787345886230468, "step": 16915 }, { "epoch": 0.14630223690240465, "grad_norm": 7.942438185914288, "learning_rate": 5.921128384942338e-06, "loss": 0.1429931640625, "step": 16920 }, { "epoch": 0.1463454704239479, "grad_norm": 7.258887424651392, "learning_rate": 5.921081969111876e-06, "loss": 0.15953369140625, "step": 16925 }, { "epoch": 0.14638870394549117, "grad_norm": 22.263853515718267, "learning_rate": 5.921035539809642e-06, "loss": 0.21367225646972657, "step": 16930 }, { "epoch": 0.14643193746703445, "grad_norm": 4.366078502932118, "learning_rate": 5.920989097035851e-06, "loss": 0.133905029296875, "step": 16935 }, { "epoch": 0.1464751709885777, "grad_norm": 2.366185512651357, "learning_rate": 5.920942640790716e-06, "loss": 0.3185302734375, "step": 16940 }, { "epoch": 0.14651840451012096, "grad_norm": 11.074079066122156, "learning_rate": 5.920896171074452e-06, "loss": 0.1152099609375, "step": 16945 }, { "epoch": 0.14656163803166422, "grad_norm": 16.381809990755922, "learning_rate": 5.9208496878872726e-06, "loss": 0.3563323974609375, "step": 16950 }, { "epoch": 0.1466048715532075, "grad_norm": 9.969004987861036, "learning_rate": 5.920803191229391e-06, "loss": 0.0697021484375, "step": 16955 }, { "epoch": 0.14664810507475076, "grad_norm": 1.586418496500373, "learning_rate": 5.920756681101025e-06, "loss": 0.2564208984375, "step": 16960 }, { "epoch": 0.14669133859629402, "grad_norm": 9.523007224011078, "learning_rate": 5.920710157502387e-06, "loss": 0.32929840087890627, "step": 16965 }, { "epoch": 0.14673457211783728, "grad_norm": 47.13795127816318, "learning_rate": 5.9206636204336915e-06, "loss": 0.5809394836425781, "step": 16970 }, { "epoch": 0.14677780563938056, "grad_norm": 34.30030506358018, "learning_rate": 5.920617069895154e-06, "loss": 0.3154266357421875, "step": 16975 }, { "epoch": 0.14682103916092382, "grad_norm": 45.28661524906409, "learning_rate": 5.920570505886988e-06, "loss": 0.389385986328125, "step": 16980 }, { "epoch": 0.14686427268246707, "grad_norm": 2.0318928998050763, "learning_rate": 5.92052392840941e-06, "loss": 0.04930267333984375, "step": 16985 }, { "epoch": 0.14690750620401033, "grad_norm": 48.67908176821227, "learning_rate": 5.920477337462632e-06, "loss": 0.5508247375488281, "step": 16990 }, { "epoch": 0.14695073972555361, "grad_norm": 25.52647515864899, "learning_rate": 5.920430733046872e-06, "loss": 0.37969970703125, "step": 16995 }, { "epoch": 0.14699397324709687, "grad_norm": 2.2042889754050607, "learning_rate": 5.9203841151623425e-06, "loss": 0.05496368408203125, "step": 17000 }, { "epoch": 0.14703720676864013, "grad_norm": 51.981631058280044, "learning_rate": 5.92033748380926e-06, "loss": 0.45701904296875, "step": 17005 }, { "epoch": 0.14708044029018338, "grad_norm": 1.3408298344835785, "learning_rate": 5.920290838987839e-06, "loss": 0.175885009765625, "step": 17010 }, { "epoch": 0.14712367381172667, "grad_norm": 3.0660675874536416, "learning_rate": 5.920244180698295e-06, "loss": 0.106048583984375, "step": 17015 }, { "epoch": 0.14716690733326993, "grad_norm": 8.426955640343978, "learning_rate": 5.920197508940843e-06, "loss": 0.21785736083984375, "step": 17020 }, { "epoch": 0.14721014085481318, "grad_norm": 1.809079775250747, "learning_rate": 5.920150823715698e-06, "loss": 0.507940673828125, "step": 17025 }, { "epoch": 0.14725337437635644, "grad_norm": 21.008928308345247, "learning_rate": 5.920104125023075e-06, "loss": 0.21885261535644532, "step": 17030 }, { "epoch": 0.14729660789789972, "grad_norm": 8.298132283807483, "learning_rate": 5.92005741286319e-06, "loss": 0.301837158203125, "step": 17035 }, { "epoch": 0.14733984141944298, "grad_norm": 64.57594540330764, "learning_rate": 5.920010687236258e-06, "loss": 0.1878082275390625, "step": 17040 }, { "epoch": 0.14738307494098624, "grad_norm": 1.9972055542426925, "learning_rate": 5.919963948142495e-06, "loss": 0.378070068359375, "step": 17045 }, { "epoch": 0.14742630846252952, "grad_norm": 11.967559831622772, "learning_rate": 5.919917195582116e-06, "loss": 0.2517723083496094, "step": 17050 }, { "epoch": 0.14746954198407278, "grad_norm": 4.6279887700664375, "learning_rate": 5.919870429555337e-06, "loss": 0.05991973876953125, "step": 17055 }, { "epoch": 0.14751277550561603, "grad_norm": 2.892437404934506, "learning_rate": 5.9198236500623735e-06, "loss": 0.31417388916015626, "step": 17060 }, { "epoch": 0.1475560090271593, "grad_norm": 31.152516162175992, "learning_rate": 5.919776857103442e-06, "loss": 0.199951171875, "step": 17065 }, { "epoch": 0.14759924254870257, "grad_norm": 52.0126680308671, "learning_rate": 5.919730050678757e-06, "loss": 0.2948211669921875, "step": 17070 }, { "epoch": 0.14764247607024583, "grad_norm": 140.2597623526963, "learning_rate": 5.919683230788536e-06, "loss": 0.217724609375, "step": 17075 }, { "epoch": 0.1476857095917891, "grad_norm": 19.605529394950942, "learning_rate": 5.919636397432993e-06, "loss": 0.1300750732421875, "step": 17080 }, { "epoch": 0.14772894311333234, "grad_norm": 6.326897170718189, "learning_rate": 5.919589550612344e-06, "loss": 0.0532867431640625, "step": 17085 }, { "epoch": 0.14777217663487563, "grad_norm": 21.03759562066031, "learning_rate": 5.919542690326808e-06, "loss": 0.1983551025390625, "step": 17090 }, { "epoch": 0.14781541015641889, "grad_norm": 4.43485928316658, "learning_rate": 5.919495816576597e-06, "loss": 0.119110107421875, "step": 17095 }, { "epoch": 0.14785864367796214, "grad_norm": 36.4920326717131, "learning_rate": 5.919448929361931e-06, "loss": 0.376519775390625, "step": 17100 }, { "epoch": 0.1479018771995054, "grad_norm": 4.044645757956681, "learning_rate": 5.919402028683023e-06, "loss": 0.22481231689453124, "step": 17105 }, { "epoch": 0.14794511072104868, "grad_norm": 43.865736177355544, "learning_rate": 5.919355114540092e-06, "loss": 0.16710205078125, "step": 17110 }, { "epoch": 0.14798834424259194, "grad_norm": 10.626421829239828, "learning_rate": 5.919308186933353e-06, "loss": 0.1844451904296875, "step": 17115 }, { "epoch": 0.1480315777641352, "grad_norm": 1.481765280947935, "learning_rate": 5.9192612458630225e-06, "loss": 0.07192459106445312, "step": 17120 }, { "epoch": 0.14807481128567845, "grad_norm": 17.07941528897959, "learning_rate": 5.919214291329317e-06, "loss": 0.254876708984375, "step": 17125 }, { "epoch": 0.14811804480722174, "grad_norm": 0.8548762554172888, "learning_rate": 5.919167323332453e-06, "loss": 0.3041046142578125, "step": 17130 }, { "epoch": 0.148161278328765, "grad_norm": 5.581401605875958, "learning_rate": 5.919120341872647e-06, "loss": 0.13402099609375, "step": 17135 }, { "epoch": 0.14820451185030825, "grad_norm": 26.607914422771728, "learning_rate": 5.919073346950116e-06, "loss": 0.22738037109375, "step": 17140 }, { "epoch": 0.1482477453718515, "grad_norm": 2.414536836123267, "learning_rate": 5.919026338565078e-06, "loss": 0.29275054931640626, "step": 17145 }, { "epoch": 0.1482909788933948, "grad_norm": 20.340474028605854, "learning_rate": 5.9189793167177464e-06, "loss": 0.33428955078125, "step": 17150 }, { "epoch": 0.14833421241493805, "grad_norm": 12.02934901020012, "learning_rate": 5.918932281408342e-06, "loss": 0.3375984191894531, "step": 17155 }, { "epoch": 0.1483774459364813, "grad_norm": 2.6026350118475197, "learning_rate": 5.918885232637079e-06, "loss": 0.5422332763671875, "step": 17160 }, { "epoch": 0.14842067945802456, "grad_norm": 11.377419648446539, "learning_rate": 5.918838170404175e-06, "loss": 0.349169921875, "step": 17165 }, { "epoch": 0.14846391297956785, "grad_norm": 31.734068310033095, "learning_rate": 5.918791094709847e-06, "loss": 0.392279052734375, "step": 17170 }, { "epoch": 0.1485071465011111, "grad_norm": 0.688021193785736, "learning_rate": 5.918744005554313e-06, "loss": 0.2605438232421875, "step": 17175 }, { "epoch": 0.14855038002265436, "grad_norm": 0.7982493905501168, "learning_rate": 5.91869690293779e-06, "loss": 0.12491912841796875, "step": 17180 }, { "epoch": 0.14859361354419762, "grad_norm": 4.830024827201549, "learning_rate": 5.9186497868604935e-06, "loss": 0.1886913299560547, "step": 17185 }, { "epoch": 0.1486368470657409, "grad_norm": 28.17164843149409, "learning_rate": 5.918602657322643e-06, "loss": 0.24816741943359374, "step": 17190 }, { "epoch": 0.14868008058728416, "grad_norm": 11.17326214987487, "learning_rate": 5.918555514324455e-06, "loss": 0.1255462646484375, "step": 17195 }, { "epoch": 0.14872331410882741, "grad_norm": 16.134970530134375, "learning_rate": 5.918508357866146e-06, "loss": 0.15464324951171876, "step": 17200 }, { "epoch": 0.14876654763037067, "grad_norm": 0.5317856019357258, "learning_rate": 5.918461187947935e-06, "loss": 0.1845233917236328, "step": 17205 }, { "epoch": 0.14880978115191396, "grad_norm": 16.95133367821003, "learning_rate": 5.918414004570038e-06, "loss": 0.30350341796875, "step": 17210 }, { "epoch": 0.1488530146734572, "grad_norm": 6.83304953926701, "learning_rate": 5.9183668077326745e-06, "loss": 0.1255279541015625, "step": 17215 }, { "epoch": 0.14889624819500047, "grad_norm": 14.190874053081227, "learning_rate": 5.9183195974360614e-06, "loss": 0.08680572509765624, "step": 17220 }, { "epoch": 0.14893948171654373, "grad_norm": 1.070221256999885, "learning_rate": 5.918272373680415e-06, "loss": 0.09957351684570312, "step": 17225 }, { "epoch": 0.148982715238087, "grad_norm": 10.846547452542639, "learning_rate": 5.9182251364659554e-06, "loss": 0.1812286376953125, "step": 17230 }, { "epoch": 0.14902594875963027, "grad_norm": 10.09667394836873, "learning_rate": 5.918177885792899e-06, "loss": 0.34249420166015626, "step": 17235 }, { "epoch": 0.14906918228117352, "grad_norm": 27.054076090571552, "learning_rate": 5.9181306216614634e-06, "loss": 0.26817169189453127, "step": 17240 }, { "epoch": 0.1491124158027168, "grad_norm": 16.168224345981386, "learning_rate": 5.918083344071868e-06, "loss": 0.4316837310791016, "step": 17245 }, { "epoch": 0.14915564932426006, "grad_norm": 33.587236151241804, "learning_rate": 5.9180360530243295e-06, "loss": 0.2535713195800781, "step": 17250 }, { "epoch": 0.14919888284580332, "grad_norm": 15.850748216827908, "learning_rate": 5.917988748519068e-06, "loss": 0.11064071655273437, "step": 17255 }, { "epoch": 0.14924211636734658, "grad_norm": 5.4035893400327595, "learning_rate": 5.9179414305562985e-06, "loss": 0.2724884033203125, "step": 17260 }, { "epoch": 0.14928534988888986, "grad_norm": 1.3544957279929584, "learning_rate": 5.9178940991362414e-06, "loss": 0.35576934814453126, "step": 17265 }, { "epoch": 0.14932858341043312, "grad_norm": 13.646954351210347, "learning_rate": 5.9178467542591145e-06, "loss": 0.0678070068359375, "step": 17270 }, { "epoch": 0.14937181693197638, "grad_norm": 0.4087748037423289, "learning_rate": 5.917799395925136e-06, "loss": 0.3601409912109375, "step": 17275 }, { "epoch": 0.14941505045351963, "grad_norm": 9.199921899796307, "learning_rate": 5.917752024134526e-06, "loss": 0.3286022186279297, "step": 17280 }, { "epoch": 0.14945828397506292, "grad_norm": 19.371889863110493, "learning_rate": 5.9177046388875e-06, "loss": 0.16883506774902343, "step": 17285 }, { "epoch": 0.14950151749660617, "grad_norm": 6.631424371335406, "learning_rate": 5.917657240184278e-06, "loss": 0.07372779846191406, "step": 17290 }, { "epoch": 0.14954475101814943, "grad_norm": 42.454160552092624, "learning_rate": 5.917609828025079e-06, "loss": 0.3509765625, "step": 17295 }, { "epoch": 0.1495879845396927, "grad_norm": 5.267960406239073, "learning_rate": 5.917562402410122e-06, "loss": 0.7882293701171875, "step": 17300 }, { "epoch": 0.14963121806123597, "grad_norm": 3.490067245807428, "learning_rate": 5.917514963339624e-06, "loss": 0.1997760772705078, "step": 17305 }, { "epoch": 0.14967445158277923, "grad_norm": 28.643874120101977, "learning_rate": 5.9174675108138045e-06, "loss": 0.31212158203125, "step": 17310 }, { "epoch": 0.14971768510432248, "grad_norm": 37.70404231459035, "learning_rate": 5.917420044832884e-06, "loss": 0.17230072021484374, "step": 17315 }, { "epoch": 0.14976091862586574, "grad_norm": 6.041842309559596, "learning_rate": 5.9173725653970795e-06, "loss": 0.35547332763671874, "step": 17320 }, { "epoch": 0.14980415214740903, "grad_norm": 0.9912355962888119, "learning_rate": 5.91732507250661e-06, "loss": 0.156884765625, "step": 17325 }, { "epoch": 0.14984738566895228, "grad_norm": 17.29004171283406, "learning_rate": 5.917277566161695e-06, "loss": 0.184478759765625, "step": 17330 }, { "epoch": 0.14989061919049554, "grad_norm": 4.74814063136041, "learning_rate": 5.917230046362554e-06, "loss": 0.275457763671875, "step": 17335 }, { "epoch": 0.1499338527120388, "grad_norm": 0.8517089552519516, "learning_rate": 5.917182513109406e-06, "loss": 0.19664382934570312, "step": 17340 }, { "epoch": 0.14997708623358208, "grad_norm": 1.3502692995630092, "learning_rate": 5.917134966402469e-06, "loss": 0.1412017822265625, "step": 17345 }, { "epoch": 0.15002031975512534, "grad_norm": 3.776128885492998, "learning_rate": 5.917087406241964e-06, "loss": 0.427587890625, "step": 17350 }, { "epoch": 0.1500635532766686, "grad_norm": 48.59528179845327, "learning_rate": 5.91703983262811e-06, "loss": 0.3123809814453125, "step": 17355 }, { "epoch": 0.15010678679821185, "grad_norm": 5.442471765586202, "learning_rate": 5.916992245561126e-06, "loss": 0.0695709228515625, "step": 17360 }, { "epoch": 0.15015002031975513, "grad_norm": 22.531697576805207, "learning_rate": 5.916944645041231e-06, "loss": 0.5043830871582031, "step": 17365 }, { "epoch": 0.1501932538412984, "grad_norm": 5.085726937240026, "learning_rate": 5.916897031068645e-06, "loss": 0.1828094482421875, "step": 17370 }, { "epoch": 0.15023648736284165, "grad_norm": 16.241728104569813, "learning_rate": 5.916849403643588e-06, "loss": 0.12763404846191406, "step": 17375 }, { "epoch": 0.1502797208843849, "grad_norm": 4.073728416997315, "learning_rate": 5.9168017627662794e-06, "loss": 0.04463119506835937, "step": 17380 }, { "epoch": 0.1503229544059282, "grad_norm": 0.4450762773647758, "learning_rate": 5.916754108436939e-06, "loss": 0.1469024658203125, "step": 17385 }, { "epoch": 0.15036618792747145, "grad_norm": 3.446210397059912, "learning_rate": 5.916706440655786e-06, "loss": 0.2352874755859375, "step": 17390 }, { "epoch": 0.1504094214490147, "grad_norm": 1.7114781601048563, "learning_rate": 5.916658759423041e-06, "loss": 0.0442535400390625, "step": 17395 }, { "epoch": 0.15045265497055796, "grad_norm": 37.52797773116981, "learning_rate": 5.9166110647389225e-06, "loss": 0.643414306640625, "step": 17400 }, { "epoch": 0.15049588849210124, "grad_norm": 11.715189078201805, "learning_rate": 5.916563356603652e-06, "loss": 0.130255126953125, "step": 17405 }, { "epoch": 0.1505391220136445, "grad_norm": 9.147760307253268, "learning_rate": 5.91651563501745e-06, "loss": 0.106146240234375, "step": 17410 }, { "epoch": 0.15058235553518776, "grad_norm": 13.035403816134396, "learning_rate": 5.916467899980534e-06, "loss": 0.1509063720703125, "step": 17415 }, { "epoch": 0.15062558905673104, "grad_norm": 6.746576389176456, "learning_rate": 5.916420151493127e-06, "loss": 0.119219970703125, "step": 17420 }, { "epoch": 0.1506688225782743, "grad_norm": 0.4605658266291703, "learning_rate": 5.916372389555447e-06, "loss": 0.2097332000732422, "step": 17425 }, { "epoch": 0.15071205609981755, "grad_norm": 4.351396564059247, "learning_rate": 5.916324614167715e-06, "loss": 0.279296875, "step": 17430 }, { "epoch": 0.1507552896213608, "grad_norm": 1.118851879521594, "learning_rate": 5.916276825330154e-06, "loss": 0.2145233154296875, "step": 17435 }, { "epoch": 0.1507985231429041, "grad_norm": 4.801501082952275, "learning_rate": 5.9162290230429794e-06, "loss": 0.20077667236328126, "step": 17440 }, { "epoch": 0.15084175666444735, "grad_norm": 9.619764223078127, "learning_rate": 5.9161812073064145e-06, "loss": 0.0739227294921875, "step": 17445 }, { "epoch": 0.1508849901859906, "grad_norm": 18.003720051317806, "learning_rate": 5.91613337812068e-06, "loss": 0.4474945068359375, "step": 17450 }, { "epoch": 0.15092822370753387, "grad_norm": 24.727849770332547, "learning_rate": 5.916085535485997e-06, "loss": 0.22910690307617188, "step": 17455 }, { "epoch": 0.15097145722907715, "grad_norm": 10.101907600077025, "learning_rate": 5.916037679402584e-06, "loss": 0.1132720947265625, "step": 17460 }, { "epoch": 0.1510146907506204, "grad_norm": 17.748069745384626, "learning_rate": 5.915989809870663e-06, "loss": 0.3099395751953125, "step": 17465 }, { "epoch": 0.15105792427216366, "grad_norm": 2.8439554604260002, "learning_rate": 5.915941926890455e-06, "loss": 0.234637451171875, "step": 17470 }, { "epoch": 0.15110115779370692, "grad_norm": 39.0307259169612, "learning_rate": 5.91589403046218e-06, "loss": 0.40147705078125, "step": 17475 }, { "epoch": 0.1511443913152502, "grad_norm": 3.826584031903583, "learning_rate": 5.915846120586059e-06, "loss": 0.13590240478515625, "step": 17480 }, { "epoch": 0.15118762483679346, "grad_norm": 3.2609927656949864, "learning_rate": 5.915798197262315e-06, "loss": 0.15787811279296876, "step": 17485 }, { "epoch": 0.15123085835833672, "grad_norm": 57.97844375963201, "learning_rate": 5.915750260491167e-06, "loss": 0.16172637939453124, "step": 17490 }, { "epoch": 0.15127409187987997, "grad_norm": 8.734702693616953, "learning_rate": 5.915702310272835e-06, "loss": 0.13070068359375, "step": 17495 }, { "epoch": 0.15131732540142326, "grad_norm": 2.031802107878597, "learning_rate": 5.915654346607543e-06, "loss": 0.5065338134765625, "step": 17500 }, { "epoch": 0.15136055892296651, "grad_norm": 17.912579702348634, "learning_rate": 5.9156063694955095e-06, "loss": 0.1161712646484375, "step": 17505 }, { "epoch": 0.15140379244450977, "grad_norm": 4.8566100245678525, "learning_rate": 5.915558378936958e-06, "loss": 0.37690277099609376, "step": 17510 }, { "epoch": 0.15144702596605303, "grad_norm": 5.983020312511538, "learning_rate": 5.915510374932109e-06, "loss": 0.206646728515625, "step": 17515 }, { "epoch": 0.1514902594875963, "grad_norm": 10.485207912066842, "learning_rate": 5.915462357481183e-06, "loss": 0.09623565673828124, "step": 17520 }, { "epoch": 0.15153349300913957, "grad_norm": 31.26967148267222, "learning_rate": 5.9154143265844026e-06, "loss": 0.2505767822265625, "step": 17525 }, { "epoch": 0.15157672653068283, "grad_norm": 5.836152251394665, "learning_rate": 5.915366282241989e-06, "loss": 0.24151763916015626, "step": 17530 }, { "epoch": 0.15161996005222608, "grad_norm": 1.3260927944498615, "learning_rate": 5.915318224454164e-06, "loss": 0.19646453857421875, "step": 17535 }, { "epoch": 0.15166319357376937, "grad_norm": 3.7780731182158305, "learning_rate": 5.915270153221149e-06, "loss": 0.40715847015380857, "step": 17540 }, { "epoch": 0.15170642709531262, "grad_norm": 7.762336337233774, "learning_rate": 5.915222068543165e-06, "loss": 0.18404541015625, "step": 17545 }, { "epoch": 0.15174966061685588, "grad_norm": 20.299295409468186, "learning_rate": 5.915173970420435e-06, "loss": 0.4330657958984375, "step": 17550 }, { "epoch": 0.15179289413839914, "grad_norm": 10.42853819747841, "learning_rate": 5.91512585885318e-06, "loss": 0.0807403564453125, "step": 17555 }, { "epoch": 0.15183612765994242, "grad_norm": 17.17849348920206, "learning_rate": 5.915077733841622e-06, "loss": 0.2677520751953125, "step": 17560 }, { "epoch": 0.15187936118148568, "grad_norm": 52.00559468042826, "learning_rate": 5.915029595385984e-06, "loss": 0.5133895874023438, "step": 17565 }, { "epoch": 0.15192259470302893, "grad_norm": 14.946859334847241, "learning_rate": 5.914981443486485e-06, "loss": 0.19756317138671875, "step": 17570 }, { "epoch": 0.1519658282245722, "grad_norm": 24.473683968217262, "learning_rate": 5.9149332781433516e-06, "loss": 0.2105194091796875, "step": 17575 }, { "epoch": 0.15200906174611548, "grad_norm": 1.0386392729301583, "learning_rate": 5.914885099356802e-06, "loss": 0.10679817199707031, "step": 17580 }, { "epoch": 0.15205229526765873, "grad_norm": 9.578636006057016, "learning_rate": 5.91483690712706e-06, "loss": 0.09444198608398438, "step": 17585 }, { "epoch": 0.152095528789202, "grad_norm": 5.654004663784634, "learning_rate": 5.914788701454348e-06, "loss": 0.2273590087890625, "step": 17590 }, { "epoch": 0.15213876231074525, "grad_norm": 29.41410410761266, "learning_rate": 5.914740482338887e-06, "loss": 0.2767578125, "step": 17595 }, { "epoch": 0.15218199583228853, "grad_norm": 5.7840907616852935, "learning_rate": 5.914692249780901e-06, "loss": 0.2689910888671875, "step": 17600 }, { "epoch": 0.1522252293538318, "grad_norm": 5.468148512903627, "learning_rate": 5.914644003780613e-06, "loss": 0.12615966796875, "step": 17605 }, { "epoch": 0.15226846287537504, "grad_norm": 3.334983074210034, "learning_rate": 5.914595744338244e-06, "loss": 0.038474655151367186, "step": 17610 }, { "epoch": 0.15231169639691833, "grad_norm": 7.879888633038141, "learning_rate": 5.914547471454016e-06, "loss": 0.2686920166015625, "step": 17615 }, { "epoch": 0.15235492991846158, "grad_norm": 24.5451666866875, "learning_rate": 5.914499185128153e-06, "loss": 0.18434906005859375, "step": 17620 }, { "epoch": 0.15239816344000484, "grad_norm": 2.5946390575480023, "learning_rate": 5.914450885360879e-06, "loss": 0.4181034088134766, "step": 17625 }, { "epoch": 0.1524413969615481, "grad_norm": 20.151404495874868, "learning_rate": 5.914402572152413e-06, "loss": 0.58438720703125, "step": 17630 }, { "epoch": 0.15248463048309138, "grad_norm": 8.685333584000878, "learning_rate": 5.9143542455029805e-06, "loss": 0.1179595947265625, "step": 17635 }, { "epoch": 0.15252786400463464, "grad_norm": 39.315076022012455, "learning_rate": 5.914305905412804e-06, "loss": 0.272111701965332, "step": 17640 }, { "epoch": 0.1525710975261779, "grad_norm": 13.700370987845991, "learning_rate": 5.914257551882105e-06, "loss": 0.3627685546875, "step": 17645 }, { "epoch": 0.15261433104772115, "grad_norm": 17.55912017209921, "learning_rate": 5.91420918491111e-06, "loss": 0.14647216796875, "step": 17650 }, { "epoch": 0.15265756456926444, "grad_norm": 3.1860030206408765, "learning_rate": 5.914160804500037e-06, "loss": 0.197357177734375, "step": 17655 }, { "epoch": 0.1527007980908077, "grad_norm": 7.2270919904926325, "learning_rate": 5.914112410649113e-06, "loss": 0.6417724609375, "step": 17660 }, { "epoch": 0.15274403161235095, "grad_norm": 7.051789827900744, "learning_rate": 5.914064003358561e-06, "loss": 0.302593994140625, "step": 17665 }, { "epoch": 0.1527872651338942, "grad_norm": 15.976367928970166, "learning_rate": 5.914015582628602e-06, "loss": 0.3654571533203125, "step": 17670 }, { "epoch": 0.1528304986554375, "grad_norm": 3.207221467551434, "learning_rate": 5.9139671484594615e-06, "loss": 0.49833984375, "step": 17675 }, { "epoch": 0.15287373217698075, "grad_norm": 9.460337121133295, "learning_rate": 5.913918700851362e-06, "loss": 0.14486846923828126, "step": 17680 }, { "epoch": 0.152916965698524, "grad_norm": 23.77016408955745, "learning_rate": 5.913870239804527e-06, "loss": 0.20213470458984376, "step": 17685 }, { "epoch": 0.15296019922006726, "grad_norm": 49.0688235801874, "learning_rate": 5.913821765319178e-06, "loss": 0.2737274169921875, "step": 17690 }, { "epoch": 0.15300343274161055, "grad_norm": 6.762853874244158, "learning_rate": 5.913773277395543e-06, "loss": 0.119281005859375, "step": 17695 }, { "epoch": 0.1530466662631538, "grad_norm": 3.3408117781531086, "learning_rate": 5.913724776033841e-06, "loss": 0.1657470703125, "step": 17700 }, { "epoch": 0.15308989978469706, "grad_norm": 4.071535120917019, "learning_rate": 5.9136762612342995e-06, "loss": 0.10311508178710938, "step": 17705 }, { "epoch": 0.15313313330624032, "grad_norm": 10.620341491955505, "learning_rate": 5.91362773299714e-06, "loss": 0.3180717468261719, "step": 17710 }, { "epoch": 0.1531763668277836, "grad_norm": 4.917724500249976, "learning_rate": 5.913579191322587e-06, "loss": 0.0626678466796875, "step": 17715 }, { "epoch": 0.15321960034932686, "grad_norm": 19.21920603661667, "learning_rate": 5.913530636210864e-06, "loss": 0.102984619140625, "step": 17720 }, { "epoch": 0.1532628338708701, "grad_norm": 11.77549373451749, "learning_rate": 5.913482067662195e-06, "loss": 0.28370361328125, "step": 17725 }, { "epoch": 0.15330606739241337, "grad_norm": 40.34425839127403, "learning_rate": 5.913433485676804e-06, "loss": 0.23847808837890624, "step": 17730 }, { "epoch": 0.15334930091395665, "grad_norm": 5.133945422027836, "learning_rate": 5.9133848902549154e-06, "loss": 0.2002685546875, "step": 17735 }, { "epoch": 0.1533925344354999, "grad_norm": 12.190378983097908, "learning_rate": 5.913336281396753e-06, "loss": 0.10392074584960938, "step": 17740 }, { "epoch": 0.15343576795704317, "grad_norm": 16.65192980947679, "learning_rate": 5.913287659102541e-06, "loss": 0.12486495971679687, "step": 17745 }, { "epoch": 0.15347900147858642, "grad_norm": 14.660882335963109, "learning_rate": 5.913239023372504e-06, "loss": 0.12020721435546874, "step": 17750 }, { "epoch": 0.1535222350001297, "grad_norm": 17.055122191428637, "learning_rate": 5.913190374206866e-06, "loss": 0.45455398559570315, "step": 17755 }, { "epoch": 0.15356546852167297, "grad_norm": 7.294895657774037, "learning_rate": 5.913141711605852e-06, "loss": 0.2209991455078125, "step": 17760 }, { "epoch": 0.15360870204321622, "grad_norm": 9.131221433219302, "learning_rate": 5.913093035569685e-06, "loss": 0.181640625, "step": 17765 }, { "epoch": 0.15365193556475948, "grad_norm": 9.748692595739325, "learning_rate": 5.913044346098591e-06, "loss": 0.118035888671875, "step": 17770 }, { "epoch": 0.15369516908630276, "grad_norm": 7.354579920822497, "learning_rate": 5.9129956431927935e-06, "loss": 0.24148941040039062, "step": 17775 }, { "epoch": 0.15373840260784602, "grad_norm": 2.4610109124605755, "learning_rate": 5.9129469268525166e-06, "loss": 0.363580322265625, "step": 17780 }, { "epoch": 0.15378163612938928, "grad_norm": 30.761330701759512, "learning_rate": 5.912898197077988e-06, "loss": 0.304949951171875, "step": 17785 }, { "epoch": 0.15382486965093256, "grad_norm": 15.00248472464913, "learning_rate": 5.912849453869429e-06, "loss": 0.6134628295898438, "step": 17790 }, { "epoch": 0.15386810317247582, "grad_norm": 13.20090470697383, "learning_rate": 5.912800697227066e-06, "loss": 0.1615264892578125, "step": 17795 }, { "epoch": 0.15391133669401907, "grad_norm": 0.15195482502602728, "learning_rate": 5.912751927151123e-06, "loss": 0.15856170654296875, "step": 17800 }, { "epoch": 0.15395457021556233, "grad_norm": 27.915789043628937, "learning_rate": 5.912703143641825e-06, "loss": 0.1978179931640625, "step": 17805 }, { "epoch": 0.15399780373710562, "grad_norm": 3.082504022815407, "learning_rate": 5.912654346699399e-06, "loss": 0.152191162109375, "step": 17810 }, { "epoch": 0.15404103725864887, "grad_norm": 22.500870526604736, "learning_rate": 5.912605536324068e-06, "loss": 0.32352294921875, "step": 17815 }, { "epoch": 0.15408427078019213, "grad_norm": 2.0460779990293108, "learning_rate": 5.912556712516057e-06, "loss": 0.45067138671875, "step": 17820 }, { "epoch": 0.15412750430173539, "grad_norm": 1.030566625258253, "learning_rate": 5.912507875275593e-06, "loss": 0.0358642578125, "step": 17825 }, { "epoch": 0.15417073782327867, "grad_norm": 4.842850673420539, "learning_rate": 5.912459024602899e-06, "loss": 0.298016357421875, "step": 17830 }, { "epoch": 0.15421397134482193, "grad_norm": 0.27670201585428605, "learning_rate": 5.912410160498201e-06, "loss": 0.39081592559814454, "step": 17835 }, { "epoch": 0.15425720486636518, "grad_norm": 3.642121177845874, "learning_rate": 5.912361282961725e-06, "loss": 0.15823974609375, "step": 17840 }, { "epoch": 0.15430043838790844, "grad_norm": 7.851105112373311, "learning_rate": 5.912312391993697e-06, "loss": 0.39248046875, "step": 17845 }, { "epoch": 0.15434367190945172, "grad_norm": 19.983635921064245, "learning_rate": 5.9122634875943405e-06, "loss": 0.07797813415527344, "step": 17850 }, { "epoch": 0.15438690543099498, "grad_norm": 9.136475483757428, "learning_rate": 5.912214569763883e-06, "loss": 0.27996673583984377, "step": 17855 }, { "epoch": 0.15443013895253824, "grad_norm": 2.230208503403616, "learning_rate": 5.912165638502549e-06, "loss": 0.14171142578125, "step": 17860 }, { "epoch": 0.1544733724740815, "grad_norm": 20.148552488784787, "learning_rate": 5.912116693810565e-06, "loss": 0.14994049072265625, "step": 17865 }, { "epoch": 0.15451660599562478, "grad_norm": 5.066039451750499, "learning_rate": 5.912067735688154e-06, "loss": 0.0949676513671875, "step": 17870 }, { "epoch": 0.15455983951716804, "grad_norm": 25.44560001472181, "learning_rate": 5.912018764135546e-06, "loss": 0.10883369445800781, "step": 17875 }, { "epoch": 0.1546030730387113, "grad_norm": 2.2873258487218155, "learning_rate": 5.911969779152964e-06, "loss": 0.3085418701171875, "step": 17880 }, { "epoch": 0.15464630656025455, "grad_norm": 38.41485646332964, "learning_rate": 5.911920780740635e-06, "loss": 0.34178466796875, "step": 17885 }, { "epoch": 0.15468954008179783, "grad_norm": 11.558852384055232, "learning_rate": 5.9118717688987834e-06, "loss": 0.09441070556640625, "step": 17890 }, { "epoch": 0.1547327736033411, "grad_norm": 38.4910171878339, "learning_rate": 5.911822743627638e-06, "loss": 0.5243408203125, "step": 17895 }, { "epoch": 0.15477600712488435, "grad_norm": 39.27598947561461, "learning_rate": 5.9117737049274225e-06, "loss": 0.365618896484375, "step": 17900 }, { "epoch": 0.1548192406464276, "grad_norm": 3.140919204479213, "learning_rate": 5.911724652798365e-06, "loss": 0.46981658935546877, "step": 17905 }, { "epoch": 0.1548624741679709, "grad_norm": 27.496768649756497, "learning_rate": 5.91167558724069e-06, "loss": 0.3941375732421875, "step": 17910 }, { "epoch": 0.15490570768951414, "grad_norm": 1.4977071492900047, "learning_rate": 5.911626508254624e-06, "loss": 0.11214599609375, "step": 17915 }, { "epoch": 0.1549489412110574, "grad_norm": 11.63656195311001, "learning_rate": 5.911577415840394e-06, "loss": 0.06707763671875, "step": 17920 }, { "epoch": 0.15499217473260066, "grad_norm": 14.358437082683645, "learning_rate": 5.911528309998227e-06, "loss": 0.1941965103149414, "step": 17925 }, { "epoch": 0.15503540825414394, "grad_norm": 24.02999158449203, "learning_rate": 5.911479190728348e-06, "loss": 0.23683624267578124, "step": 17930 }, { "epoch": 0.1550786417756872, "grad_norm": 0.6763764880797801, "learning_rate": 5.911430058030985e-06, "loss": 0.0544189453125, "step": 17935 }, { "epoch": 0.15512187529723045, "grad_norm": 24.49119345919311, "learning_rate": 5.911380911906363e-06, "loss": 0.169427490234375, "step": 17940 }, { "epoch": 0.1551651088187737, "grad_norm": 10.204802722243112, "learning_rate": 5.911331752354709e-06, "loss": 0.22715530395507813, "step": 17945 }, { "epoch": 0.155208342340317, "grad_norm": 21.12424759974152, "learning_rate": 5.911282579376252e-06, "loss": 0.28841552734375, "step": 17950 }, { "epoch": 0.15525157586186025, "grad_norm": 7.845543681951318, "learning_rate": 5.911233392971216e-06, "loss": 0.151495361328125, "step": 17955 }, { "epoch": 0.1552948093834035, "grad_norm": 0.9445241367814319, "learning_rate": 5.911184193139828e-06, "loss": 0.08222198486328125, "step": 17960 }, { "epoch": 0.15533804290494677, "grad_norm": 8.252293583873042, "learning_rate": 5.911134979882317e-06, "loss": 0.13711090087890626, "step": 17965 }, { "epoch": 0.15538127642649005, "grad_norm": 63.004811348629566, "learning_rate": 5.911085753198908e-06, "loss": 0.4750823974609375, "step": 17970 }, { "epoch": 0.1554245099480333, "grad_norm": 2.830298316821077, "learning_rate": 5.91103651308983e-06, "loss": 0.044793701171875, "step": 17975 }, { "epoch": 0.15546774346957656, "grad_norm": 5.105306988422574, "learning_rate": 5.9109872595553075e-06, "loss": 0.259942626953125, "step": 17980 }, { "epoch": 0.15551097699111985, "grad_norm": 0.5889234156592711, "learning_rate": 5.910937992595569e-06, "loss": 0.4613185882568359, "step": 17985 }, { "epoch": 0.1555542105126631, "grad_norm": 10.676225049197871, "learning_rate": 5.910888712210842e-06, "loss": 0.07996940612792969, "step": 17990 }, { "epoch": 0.15559744403420636, "grad_norm": 13.885284601992536, "learning_rate": 5.910839418401354e-06, "loss": 0.13533477783203124, "step": 17995 }, { "epoch": 0.15564067755574962, "grad_norm": 0.3475970489748084, "learning_rate": 5.910790111167331e-06, "loss": 0.21352996826171874, "step": 18000 }, { "epoch": 0.1556839110772929, "grad_norm": 3.0090948836126112, "learning_rate": 5.9107407905090015e-06, "loss": 0.150982666015625, "step": 18005 }, { "epoch": 0.15572714459883616, "grad_norm": 43.20451052666899, "learning_rate": 5.910691456426592e-06, "loss": 0.34289398193359377, "step": 18010 }, { "epoch": 0.15577037812037942, "grad_norm": 10.24821363581399, "learning_rate": 5.910642108920332e-06, "loss": 0.4156890869140625, "step": 18015 }, { "epoch": 0.15581361164192267, "grad_norm": 6.9243218106990305, "learning_rate": 5.910592747990447e-06, "loss": 0.091552734375, "step": 18020 }, { "epoch": 0.15585684516346596, "grad_norm": 21.72766818739777, "learning_rate": 5.910543373637166e-06, "loss": 0.32521209716796873, "step": 18025 }, { "epoch": 0.1559000786850092, "grad_norm": 0.966243664403487, "learning_rate": 5.910493985860715e-06, "loss": 0.2213592529296875, "step": 18030 }, { "epoch": 0.15594331220655247, "grad_norm": 1.8539302084580591, "learning_rate": 5.910444584661324e-06, "loss": 0.1370452880859375, "step": 18035 }, { "epoch": 0.15598654572809573, "grad_norm": 38.61875819584219, "learning_rate": 5.9103951700392196e-06, "loss": 0.42632369995117186, "step": 18040 }, { "epoch": 0.156029779249639, "grad_norm": 13.699012367746155, "learning_rate": 5.910345741994629e-06, "loss": 0.1186126708984375, "step": 18045 }, { "epoch": 0.15607301277118227, "grad_norm": 6.939461257918195, "learning_rate": 5.9102963005277825e-06, "loss": 0.06872406005859374, "step": 18050 }, { "epoch": 0.15611624629272552, "grad_norm": 12.224491402404116, "learning_rate": 5.910246845638904e-06, "loss": 0.26560211181640625, "step": 18055 }, { "epoch": 0.15615947981426878, "grad_norm": 11.915608550822082, "learning_rate": 5.910197377328227e-06, "loss": 0.29444580078125, "step": 18060 }, { "epoch": 0.15620271333581207, "grad_norm": 12.427708224969303, "learning_rate": 5.910147895595976e-06, "loss": 0.24402809143066406, "step": 18065 }, { "epoch": 0.15624594685735532, "grad_norm": 17.321523445073833, "learning_rate": 5.9100984004423796e-06, "loss": 0.1284088134765625, "step": 18070 }, { "epoch": 0.15628918037889858, "grad_norm": 3.1499104235863085, "learning_rate": 5.910048891867667e-06, "loss": 0.1389627456665039, "step": 18075 }, { "epoch": 0.15633241390044184, "grad_norm": 5.5586235292610375, "learning_rate": 5.909999369872065e-06, "loss": 0.203009033203125, "step": 18080 }, { "epoch": 0.15637564742198512, "grad_norm": 26.59771972851976, "learning_rate": 5.909949834455805e-06, "loss": 0.38847503662109373, "step": 18085 }, { "epoch": 0.15641888094352838, "grad_norm": 1.2875137177536398, "learning_rate": 5.909900285619112e-06, "loss": 0.0847259521484375, "step": 18090 }, { "epoch": 0.15646211446507163, "grad_norm": 7.270440613056111, "learning_rate": 5.909850723362217e-06, "loss": 0.06399993896484375, "step": 18095 }, { "epoch": 0.1565053479866149, "grad_norm": 8.927239908951295, "learning_rate": 5.909801147685346e-06, "loss": 0.3251220703125, "step": 18100 }, { "epoch": 0.15654858150815817, "grad_norm": 17.830372988767163, "learning_rate": 5.909751558588731e-06, "loss": 0.28905029296875, "step": 18105 }, { "epoch": 0.15659181502970143, "grad_norm": 2.684231537207428, "learning_rate": 5.909701956072598e-06, "loss": 0.170428466796875, "step": 18110 }, { "epoch": 0.1566350485512447, "grad_norm": 0.4484241132206628, "learning_rate": 5.909652340137176e-06, "loss": 0.08510818481445312, "step": 18115 }, { "epoch": 0.15667828207278794, "grad_norm": 53.518768471529256, "learning_rate": 5.909602710782695e-06, "loss": 0.594384765625, "step": 18120 }, { "epoch": 0.15672151559433123, "grad_norm": 16.733551469389976, "learning_rate": 5.909553068009384e-06, "loss": 0.37866897583007814, "step": 18125 }, { "epoch": 0.15676474911587449, "grad_norm": 33.0478220277303, "learning_rate": 5.90950341181747e-06, "loss": 0.41790084838867186, "step": 18130 }, { "epoch": 0.15680798263741774, "grad_norm": 0.1849344662255298, "learning_rate": 5.909453742207185e-06, "loss": 0.17212677001953125, "step": 18135 }, { "epoch": 0.156851216158961, "grad_norm": 12.139231531113714, "learning_rate": 5.909404059178756e-06, "loss": 0.0958770751953125, "step": 18140 }, { "epoch": 0.15689444968050428, "grad_norm": 0.8932578945353582, "learning_rate": 5.9093543627324116e-06, "loss": 0.23886337280273437, "step": 18145 }, { "epoch": 0.15693768320204754, "grad_norm": 15.250735451224392, "learning_rate": 5.909304652868383e-06, "loss": 0.1515167236328125, "step": 18150 }, { "epoch": 0.1569809167235908, "grad_norm": 3.691111182303715, "learning_rate": 5.909254929586899e-06, "loss": 0.19677352905273438, "step": 18155 }, { "epoch": 0.15702415024513408, "grad_norm": 42.25654138914492, "learning_rate": 5.909205192888187e-06, "loss": 0.2311767578125, "step": 18160 }, { "epoch": 0.15706738376667734, "grad_norm": 5.435100046018372, "learning_rate": 5.909155442772478e-06, "loss": 0.0362701416015625, "step": 18165 }, { "epoch": 0.1571106172882206, "grad_norm": 2.4758059445503617, "learning_rate": 5.909105679240001e-06, "loss": 0.05197982788085938, "step": 18170 }, { "epoch": 0.15715385080976385, "grad_norm": 8.907660785058168, "learning_rate": 5.9090559022909865e-06, "loss": 0.0840911865234375, "step": 18175 }, { "epoch": 0.15719708433130714, "grad_norm": 5.760390175812317, "learning_rate": 5.9090061119256625e-06, "loss": 0.297857666015625, "step": 18180 }, { "epoch": 0.1572403178528504, "grad_norm": 14.66724024829053, "learning_rate": 5.90895630814426e-06, "loss": 0.0735595703125, "step": 18185 }, { "epoch": 0.15728355137439365, "grad_norm": 6.9409265324291765, "learning_rate": 5.9089064909470065e-06, "loss": 0.2002431869506836, "step": 18190 }, { "epoch": 0.1573267848959369, "grad_norm": 47.531522475943866, "learning_rate": 5.908856660334134e-06, "loss": 0.147564697265625, "step": 18195 }, { "epoch": 0.1573700184174802, "grad_norm": 52.36771113836715, "learning_rate": 5.908806816305872e-06, "loss": 0.29318695068359374, "step": 18200 }, { "epoch": 0.15741325193902345, "grad_norm": 5.746056005851249, "learning_rate": 5.908756958862451e-06, "loss": 0.033498382568359374, "step": 18205 }, { "epoch": 0.1574564854605667, "grad_norm": 21.905598908354, "learning_rate": 5.908707088004099e-06, "loss": 0.09055023193359375, "step": 18210 }, { "epoch": 0.15749971898210996, "grad_norm": 32.092810761207545, "learning_rate": 5.908657203731046e-06, "loss": 0.220513916015625, "step": 18215 }, { "epoch": 0.15754295250365324, "grad_norm": 1.2235526784062605, "learning_rate": 5.9086073060435245e-06, "loss": 0.11640167236328125, "step": 18220 }, { "epoch": 0.1575861860251965, "grad_norm": 8.595153037644286, "learning_rate": 5.9085573949417625e-06, "loss": 0.0941497802734375, "step": 18225 }, { "epoch": 0.15762941954673976, "grad_norm": 49.841551719975975, "learning_rate": 5.90850747042599e-06, "loss": 0.38638916015625, "step": 18230 }, { "epoch": 0.15767265306828301, "grad_norm": 3.006645573335205, "learning_rate": 5.908457532496439e-06, "loss": 0.1818859100341797, "step": 18235 }, { "epoch": 0.1577158865898263, "grad_norm": 16.46661476247405, "learning_rate": 5.908407581153339e-06, "loss": 0.17902145385742188, "step": 18240 }, { "epoch": 0.15775912011136956, "grad_norm": 3.642663911786817, "learning_rate": 5.908357616396919e-06, "loss": 0.07077484130859375, "step": 18245 }, { "epoch": 0.1578023536329128, "grad_norm": 0.127603130191714, "learning_rate": 5.908307638227412e-06, "loss": 0.24315948486328126, "step": 18250 }, { "epoch": 0.15784558715445607, "grad_norm": 15.224917618349817, "learning_rate": 5.908257646645047e-06, "loss": 0.2107940673828125, "step": 18255 }, { "epoch": 0.15788882067599935, "grad_norm": 24.73333621249261, "learning_rate": 5.9082076416500534e-06, "loss": 0.17890090942382814, "step": 18260 }, { "epoch": 0.1579320541975426, "grad_norm": 7.3715256575006505, "learning_rate": 5.9081576232426645e-06, "loss": 0.26587371826171874, "step": 18265 }, { "epoch": 0.15797528771908587, "grad_norm": 71.93074420592264, "learning_rate": 5.908107591423109e-06, "loss": 0.220599365234375, "step": 18270 }, { "epoch": 0.15801852124062912, "grad_norm": 40.85702452087819, "learning_rate": 5.9080575461916185e-06, "loss": 0.402044677734375, "step": 18275 }, { "epoch": 0.1580617547621724, "grad_norm": 51.45791809753238, "learning_rate": 5.908007487548424e-06, "loss": 0.22661247253417968, "step": 18280 }, { "epoch": 0.15810498828371566, "grad_norm": 38.20666616399058, "learning_rate": 5.907957415493756e-06, "loss": 0.382086181640625, "step": 18285 }, { "epoch": 0.15814822180525892, "grad_norm": 2.6575862212388914, "learning_rate": 5.907907330027844e-06, "loss": 0.1100433349609375, "step": 18290 }, { "epoch": 0.15819145532680218, "grad_norm": 2.9959338384633796, "learning_rate": 5.907857231150921e-06, "loss": 0.325616455078125, "step": 18295 }, { "epoch": 0.15823468884834546, "grad_norm": 5.431978640608665, "learning_rate": 5.907807118863218e-06, "loss": 0.17629661560058593, "step": 18300 }, { "epoch": 0.15827792236988872, "grad_norm": 7.983493978411822, "learning_rate": 5.907756993164965e-06, "loss": 0.23128662109375, "step": 18305 }, { "epoch": 0.15832115589143197, "grad_norm": 33.878062001349065, "learning_rate": 5.907706854056394e-06, "loss": 0.3039398193359375, "step": 18310 }, { "epoch": 0.15836438941297523, "grad_norm": 3.025521179785065, "learning_rate": 5.907656701537735e-06, "loss": 0.08740234375, "step": 18315 }, { "epoch": 0.15840762293451852, "grad_norm": 3.7196276598911755, "learning_rate": 5.907606535609221e-06, "loss": 0.135504150390625, "step": 18320 }, { "epoch": 0.15845085645606177, "grad_norm": 1.3669027556428532, "learning_rate": 5.907556356271082e-06, "loss": 0.3016326904296875, "step": 18325 }, { "epoch": 0.15849408997760503, "grad_norm": 47.386549267006416, "learning_rate": 5.9075061635235504e-06, "loss": 0.2443206787109375, "step": 18330 }, { "epoch": 0.15853732349914829, "grad_norm": 3.06616352147364, "learning_rate": 5.907455957366858e-06, "loss": 0.2264984130859375, "step": 18335 }, { "epoch": 0.15858055702069157, "grad_norm": 42.849744886790944, "learning_rate": 5.907405737801235e-06, "loss": 0.5413482666015625, "step": 18340 }, { "epoch": 0.15862379054223483, "grad_norm": 2.610391158515186, "learning_rate": 5.907355504826913e-06, "loss": 0.2864055633544922, "step": 18345 }, { "epoch": 0.15866702406377808, "grad_norm": 34.33776090499562, "learning_rate": 5.907305258444125e-06, "loss": 0.17433204650878906, "step": 18350 }, { "epoch": 0.15871025758532137, "grad_norm": 35.89563115346041, "learning_rate": 5.907254998653102e-06, "loss": 0.3238189697265625, "step": 18355 }, { "epoch": 0.15875349110686462, "grad_norm": 19.86031537011966, "learning_rate": 5.907204725454076e-06, "loss": 0.202874755859375, "step": 18360 }, { "epoch": 0.15879672462840788, "grad_norm": 3.059220415222128, "learning_rate": 5.907154438847279e-06, "loss": 0.0577545166015625, "step": 18365 }, { "epoch": 0.15883995814995114, "grad_norm": 10.192114655859239, "learning_rate": 5.907104138832942e-06, "loss": 0.382049560546875, "step": 18370 }, { "epoch": 0.15888319167149442, "grad_norm": 50.03337471197756, "learning_rate": 5.9070538254112974e-06, "loss": 0.48524169921875, "step": 18375 }, { "epoch": 0.15892642519303768, "grad_norm": 11.974006374794591, "learning_rate": 5.9070034985825775e-06, "loss": 0.2621368408203125, "step": 18380 }, { "epoch": 0.15896965871458094, "grad_norm": 8.070009773212892, "learning_rate": 5.906953158347015e-06, "loss": 0.28013153076171876, "step": 18385 }, { "epoch": 0.1590128922361242, "grad_norm": 4.167296898265123, "learning_rate": 5.90690280470484e-06, "loss": 0.0852142333984375, "step": 18390 }, { "epoch": 0.15905612575766748, "grad_norm": 18.37036771499242, "learning_rate": 5.906852437656287e-06, "loss": 0.492999267578125, "step": 18395 }, { "epoch": 0.15909935927921073, "grad_norm": 26.26948299321978, "learning_rate": 5.906802057201587e-06, "loss": 0.3626434326171875, "step": 18400 }, { "epoch": 0.159142592800754, "grad_norm": 15.603359003395182, "learning_rate": 5.9067516633409736e-06, "loss": 0.1215911865234375, "step": 18405 }, { "epoch": 0.15918582632229725, "grad_norm": 25.791572545518125, "learning_rate": 5.906701256074678e-06, "loss": 0.24772796630859376, "step": 18410 }, { "epoch": 0.15922905984384053, "grad_norm": 13.24801117750097, "learning_rate": 5.906650835402934e-06, "loss": 0.3462982177734375, "step": 18415 }, { "epoch": 0.1592722933653838, "grad_norm": 11.153034592841696, "learning_rate": 5.9066004013259726e-06, "loss": 0.13020553588867187, "step": 18420 }, { "epoch": 0.15931552688692704, "grad_norm": 27.744201915454003, "learning_rate": 5.9065499538440255e-06, "loss": 0.2150177001953125, "step": 18425 }, { "epoch": 0.1593587604084703, "grad_norm": 5.490386650096594, "learning_rate": 5.906499492957329e-06, "loss": 0.142694091796875, "step": 18430 }, { "epoch": 0.15940199393001359, "grad_norm": 5.18675212303607, "learning_rate": 5.906449018666111e-06, "loss": 0.0669830322265625, "step": 18435 }, { "epoch": 0.15944522745155684, "grad_norm": 6.009046866163282, "learning_rate": 5.906398530970609e-06, "loss": 0.059637451171875, "step": 18440 }, { "epoch": 0.1594884609731001, "grad_norm": 1.4871732606086512, "learning_rate": 5.906348029871053e-06, "loss": 0.256927490234375, "step": 18445 }, { "epoch": 0.15953169449464336, "grad_norm": 7.079104477100563, "learning_rate": 5.906297515367678e-06, "loss": 0.3817287445068359, "step": 18450 }, { "epoch": 0.15957492801618664, "grad_norm": 6.457448822371054, "learning_rate": 5.9062469874607145e-06, "loss": 0.20388107299804686, "step": 18455 }, { "epoch": 0.1596181615377299, "grad_norm": 1.7239034790992684, "learning_rate": 5.906196446150397e-06, "loss": 0.22093124389648439, "step": 18460 }, { "epoch": 0.15966139505927315, "grad_norm": 3.7323529681765097, "learning_rate": 5.906145891436958e-06, "loss": 0.379119873046875, "step": 18465 }, { "epoch": 0.1597046285808164, "grad_norm": 70.91808769022407, "learning_rate": 5.906095323320632e-06, "loss": 0.11746978759765625, "step": 18470 }, { "epoch": 0.1597478621023597, "grad_norm": 23.66446896870885, "learning_rate": 5.906044741801648e-06, "loss": 0.038852691650390625, "step": 18475 }, { "epoch": 0.15979109562390295, "grad_norm": 18.417206149211335, "learning_rate": 5.9059941468802456e-06, "loss": 0.273016357421875, "step": 18480 }, { "epoch": 0.1598343291454462, "grad_norm": 15.467637631792268, "learning_rate": 5.905943538556655e-06, "loss": 0.0690032958984375, "step": 18485 }, { "epoch": 0.15987756266698946, "grad_norm": 41.94888041792517, "learning_rate": 5.905892916831108e-06, "loss": 0.5196060180664063, "step": 18490 }, { "epoch": 0.15992079618853275, "grad_norm": 22.1639168719606, "learning_rate": 5.905842281703841e-06, "loss": 0.50350341796875, "step": 18495 }, { "epoch": 0.159964029710076, "grad_norm": 2.9062748793979982, "learning_rate": 5.905791633175084e-06, "loss": 0.2065093994140625, "step": 18500 }, { "epoch": 0.16000726323161926, "grad_norm": 9.367058468039266, "learning_rate": 5.905740971245075e-06, "loss": 0.19373626708984376, "step": 18505 }, { "epoch": 0.16005049675316252, "grad_norm": 16.14325326633839, "learning_rate": 5.905690295914044e-06, "loss": 0.1026641845703125, "step": 18510 }, { "epoch": 0.1600937302747058, "grad_norm": 3.199561986535286, "learning_rate": 5.905639607182227e-06, "loss": 0.062274169921875, "step": 18515 }, { "epoch": 0.16013696379624906, "grad_norm": 4.9971245771283925, "learning_rate": 5.905588905049857e-06, "loss": 0.1724700927734375, "step": 18520 }, { "epoch": 0.16018019731779232, "grad_norm": 18.094022621613504, "learning_rate": 5.905538189517167e-06, "loss": 0.13302841186523437, "step": 18525 }, { "epoch": 0.1602234308393356, "grad_norm": 13.643499446503814, "learning_rate": 5.905487460584392e-06, "loss": 0.438714599609375, "step": 18530 }, { "epoch": 0.16026666436087886, "grad_norm": 9.916207115524795, "learning_rate": 5.905436718251766e-06, "loss": 0.32674484252929686, "step": 18535 }, { "epoch": 0.16030989788242211, "grad_norm": 17.101619602863906, "learning_rate": 5.905385962519522e-06, "loss": 0.133355712890625, "step": 18540 }, { "epoch": 0.16035313140396537, "grad_norm": 3.1415618838855, "learning_rate": 5.905335193387894e-06, "loss": 0.12156982421875, "step": 18545 }, { "epoch": 0.16039636492550866, "grad_norm": 2.8627480777000867, "learning_rate": 5.905284410857118e-06, "loss": 0.11914520263671875, "step": 18550 }, { "epoch": 0.1604395984470519, "grad_norm": 20.06025444707851, "learning_rate": 5.9052336149274265e-06, "loss": 0.18319244384765626, "step": 18555 }, { "epoch": 0.16048283196859517, "grad_norm": 13.297208272538287, "learning_rate": 5.905182805599054e-06, "loss": 0.2075408935546875, "step": 18560 }, { "epoch": 0.16052606549013843, "grad_norm": 14.587101235213932, "learning_rate": 5.905131982872235e-06, "loss": 0.4159442901611328, "step": 18565 }, { "epoch": 0.1605692990116817, "grad_norm": 5.719064948417053, "learning_rate": 5.905081146747205e-06, "loss": 0.212615966796875, "step": 18570 }, { "epoch": 0.16061253253322497, "grad_norm": 3.0733185348462686, "learning_rate": 5.905030297224196e-06, "loss": 0.117144775390625, "step": 18575 }, { "epoch": 0.16065576605476822, "grad_norm": 13.17419223464278, "learning_rate": 5.904979434303445e-06, "loss": 0.21723480224609376, "step": 18580 }, { "epoch": 0.16069899957631148, "grad_norm": 26.686251313699074, "learning_rate": 5.904928557985186e-06, "loss": 0.2667236328125, "step": 18585 }, { "epoch": 0.16074223309785476, "grad_norm": 26.785939239128922, "learning_rate": 5.904877668269652e-06, "loss": 0.483746337890625, "step": 18590 }, { "epoch": 0.16078546661939802, "grad_norm": 0.3650046743912247, "learning_rate": 5.90482676515708e-06, "loss": 0.0714691162109375, "step": 18595 }, { "epoch": 0.16082870014094128, "grad_norm": 23.587319617807196, "learning_rate": 5.904775848647703e-06, "loss": 0.13724021911621093, "step": 18600 }, { "epoch": 0.16087193366248453, "grad_norm": 1.9590037868716783, "learning_rate": 5.904724918741756e-06, "loss": 0.07200927734375, "step": 18605 }, { "epoch": 0.16091516718402782, "grad_norm": 2.5000462802736174, "learning_rate": 5.904673975439475e-06, "loss": 0.4175140380859375, "step": 18610 }, { "epoch": 0.16095840070557108, "grad_norm": 6.2185234927631505, "learning_rate": 5.904623018741094e-06, "loss": 0.28305892944335936, "step": 18615 }, { "epoch": 0.16100163422711433, "grad_norm": 1.0534397716965818, "learning_rate": 5.904572048646849e-06, "loss": 0.310772705078125, "step": 18620 }, { "epoch": 0.1610448677486576, "grad_norm": 1.678044778015022, "learning_rate": 5.9045210651569735e-06, "loss": 0.07275390625, "step": 18625 }, { "epoch": 0.16108810127020087, "grad_norm": 10.245220194701366, "learning_rate": 5.904470068271704e-06, "loss": 0.23126220703125, "step": 18630 }, { "epoch": 0.16113133479174413, "grad_norm": 13.894916003197833, "learning_rate": 5.904419057991275e-06, "loss": 0.1725006103515625, "step": 18635 }, { "epoch": 0.1611745683132874, "grad_norm": 6.430157248324974, "learning_rate": 5.904368034315922e-06, "loss": 0.45040283203125, "step": 18640 }, { "epoch": 0.16121780183483064, "grad_norm": 5.328479438243145, "learning_rate": 5.904316997245879e-06, "loss": 0.09196319580078124, "step": 18645 }, { "epoch": 0.16126103535637393, "grad_norm": 8.179127227902773, "learning_rate": 5.904265946781385e-06, "loss": 0.520001220703125, "step": 18650 }, { "epoch": 0.16130426887791718, "grad_norm": 26.281562611971324, "learning_rate": 5.904214882922672e-06, "loss": 0.270147705078125, "step": 18655 }, { "epoch": 0.16134750239946044, "grad_norm": 43.232538743312304, "learning_rate": 5.904163805669976e-06, "loss": 0.4746734619140625, "step": 18660 }, { "epoch": 0.1613907359210037, "grad_norm": 2.7702722165259175, "learning_rate": 5.9041127150235335e-06, "loss": 0.11904220581054688, "step": 18665 }, { "epoch": 0.16143396944254698, "grad_norm": 4.562017344582694, "learning_rate": 5.90406161098358e-06, "loss": 0.16640625, "step": 18670 }, { "epoch": 0.16147720296409024, "grad_norm": 23.59503925032054, "learning_rate": 5.90401049355035e-06, "loss": 0.10806427001953126, "step": 18675 }, { "epoch": 0.1615204364856335, "grad_norm": 3.0322323048376996, "learning_rate": 5.903959362724081e-06, "loss": 0.2360980987548828, "step": 18680 }, { "epoch": 0.16156367000717675, "grad_norm": 3.3516180507446167, "learning_rate": 5.903908218505009e-06, "loss": 0.039910888671875, "step": 18685 }, { "epoch": 0.16160690352872004, "grad_norm": 3.6181948537493978, "learning_rate": 5.903857060893367e-06, "loss": 0.168316650390625, "step": 18690 }, { "epoch": 0.1616501370502633, "grad_norm": 12.754178809350599, "learning_rate": 5.9038058898893945e-06, "loss": 0.4773719787597656, "step": 18695 }, { "epoch": 0.16169337057180655, "grad_norm": 26.830126854998312, "learning_rate": 5.903754705493325e-06, "loss": 0.5480777740478515, "step": 18700 }, { "epoch": 0.1617366040933498, "grad_norm": 24.938053720601953, "learning_rate": 5.903703507705395e-06, "loss": 0.38968505859375, "step": 18705 }, { "epoch": 0.1617798376148931, "grad_norm": 5.314347874457476, "learning_rate": 5.903652296525842e-06, "loss": 0.05373382568359375, "step": 18710 }, { "epoch": 0.16182307113643635, "grad_norm": 32.2212790348636, "learning_rate": 5.9036010719549e-06, "loss": 0.34593048095703127, "step": 18715 }, { "epoch": 0.1618663046579796, "grad_norm": 9.405365531685318, "learning_rate": 5.9035498339928075e-06, "loss": 0.06060104370117188, "step": 18720 }, { "epoch": 0.1619095381795229, "grad_norm": 7.1009601327563345, "learning_rate": 5.903498582639799e-06, "loss": 0.051263427734375, "step": 18725 }, { "epoch": 0.16195277170106614, "grad_norm": 2.599471886802718, "learning_rate": 5.903447317896112e-06, "loss": 0.147869873046875, "step": 18730 }, { "epoch": 0.1619960052226094, "grad_norm": 40.4378846752519, "learning_rate": 5.903396039761983e-06, "loss": 0.14150772094726563, "step": 18735 }, { "epoch": 0.16203923874415266, "grad_norm": 7.301575386153703, "learning_rate": 5.903344748237647e-06, "loss": 0.31187744140625, "step": 18740 }, { "epoch": 0.16208247226569594, "grad_norm": 3.702838876575185, "learning_rate": 5.903293443323342e-06, "loss": 0.18156280517578124, "step": 18745 }, { "epoch": 0.1621257057872392, "grad_norm": 49.25266626811795, "learning_rate": 5.903242125019304e-06, "loss": 0.3693023681640625, "step": 18750 }, { "epoch": 0.16216893930878246, "grad_norm": 45.270990418068386, "learning_rate": 5.90319079332577e-06, "loss": 0.9804832458496093, "step": 18755 }, { "epoch": 0.1622121728303257, "grad_norm": 15.045462688451064, "learning_rate": 5.903139448242977e-06, "loss": 0.1084747314453125, "step": 18760 }, { "epoch": 0.162255406351869, "grad_norm": 35.63607590722256, "learning_rate": 5.9030880897711606e-06, "loss": 0.19584236145019532, "step": 18765 }, { "epoch": 0.16229863987341225, "grad_norm": 35.043527331418545, "learning_rate": 5.903036717910559e-06, "loss": 0.139263916015625, "step": 18770 }, { "epoch": 0.1623418733949555, "grad_norm": 16.855923428804143, "learning_rate": 5.902985332661409e-06, "loss": 0.277874755859375, "step": 18775 }, { "epoch": 0.16238510691649877, "grad_norm": 37.71551690484471, "learning_rate": 5.902933934023946e-06, "loss": 0.21221542358398438, "step": 18780 }, { "epoch": 0.16242834043804205, "grad_norm": 4.177262000047362, "learning_rate": 5.902882521998408e-06, "loss": 0.07234344482421876, "step": 18785 }, { "epoch": 0.1624715739595853, "grad_norm": 1.6747110051525584, "learning_rate": 5.9028310965850336e-06, "loss": 0.1101226806640625, "step": 18790 }, { "epoch": 0.16251480748112856, "grad_norm": 73.8027155649233, "learning_rate": 5.902779657784058e-06, "loss": 0.223590087890625, "step": 18795 }, { "epoch": 0.16255804100267182, "grad_norm": 23.00251460182629, "learning_rate": 5.90272820559572e-06, "loss": 0.0826507568359375, "step": 18800 }, { "epoch": 0.1626012745242151, "grad_norm": 8.010275398279676, "learning_rate": 5.902676740020254e-06, "loss": 0.0927734375, "step": 18805 }, { "epoch": 0.16264450804575836, "grad_norm": 5.889790039075263, "learning_rate": 5.9026252610579016e-06, "loss": 0.09942474365234374, "step": 18810 }, { "epoch": 0.16268774156730162, "grad_norm": 8.243072658233476, "learning_rate": 5.902573768708897e-06, "loss": 0.33037109375, "step": 18815 }, { "epoch": 0.16273097508884488, "grad_norm": 35.46648751627148, "learning_rate": 5.902522262973479e-06, "loss": 0.8901134490966797, "step": 18820 }, { "epoch": 0.16277420861038816, "grad_norm": 3.86676322113661, "learning_rate": 5.902470743851884e-06, "loss": 0.04681396484375, "step": 18825 }, { "epoch": 0.16281744213193142, "grad_norm": 18.33559002039148, "learning_rate": 5.902419211344351e-06, "loss": 0.242047119140625, "step": 18830 }, { "epoch": 0.16286067565347467, "grad_norm": 23.18166844751268, "learning_rate": 5.9023676654511166e-06, "loss": 0.16912841796875, "step": 18835 }, { "epoch": 0.16290390917501793, "grad_norm": 6.82568972240715, "learning_rate": 5.9023161061724195e-06, "loss": 0.2438812255859375, "step": 18840 }, { "epoch": 0.16294714269656121, "grad_norm": 3.485931058712672, "learning_rate": 5.902264533508497e-06, "loss": 0.52874755859375, "step": 18845 }, { "epoch": 0.16299037621810447, "grad_norm": 18.44537001220139, "learning_rate": 5.902212947459586e-06, "loss": 0.269268798828125, "step": 18850 }, { "epoch": 0.16303360973964773, "grad_norm": 2.761223491380943, "learning_rate": 5.902161348025927e-06, "loss": 0.28809814453125, "step": 18855 }, { "epoch": 0.16307684326119098, "grad_norm": 6.503025162539405, "learning_rate": 5.902109735207755e-06, "loss": 0.541363525390625, "step": 18860 }, { "epoch": 0.16312007678273427, "grad_norm": 1.053819660562763, "learning_rate": 5.902058109005309e-06, "loss": 0.2604248046875, "step": 18865 }, { "epoch": 0.16316331030427753, "grad_norm": 11.597741941873934, "learning_rate": 5.902006469418829e-06, "loss": 0.20680084228515624, "step": 18870 }, { "epoch": 0.16320654382582078, "grad_norm": 2.6281014709608383, "learning_rate": 5.9019548164485505e-06, "loss": 0.05943756103515625, "step": 18875 }, { "epoch": 0.16324977734736404, "grad_norm": 3.349779465068383, "learning_rate": 5.901903150094713e-06, "loss": 0.4902801513671875, "step": 18880 }, { "epoch": 0.16329301086890732, "grad_norm": 6.097170309413962, "learning_rate": 5.901851470357554e-06, "loss": 0.17288360595703126, "step": 18885 }, { "epoch": 0.16333624439045058, "grad_norm": 4.410046299761464, "learning_rate": 5.9017997772373136e-06, "loss": 0.66436767578125, "step": 18890 }, { "epoch": 0.16337947791199384, "grad_norm": 5.687114033343565, "learning_rate": 5.901748070734228e-06, "loss": 0.15806427001953124, "step": 18895 }, { "epoch": 0.16342271143353712, "grad_norm": 8.849739017595393, "learning_rate": 5.9016963508485376e-06, "loss": 0.1301544189453125, "step": 18900 }, { "epoch": 0.16346594495508038, "grad_norm": 34.99809264458316, "learning_rate": 5.901644617580479e-06, "loss": 0.448779296875, "step": 18905 }, { "epoch": 0.16350917847662363, "grad_norm": 7.385394979237893, "learning_rate": 5.901592870930292e-06, "loss": 0.1525390625, "step": 18910 }, { "epoch": 0.1635524119981669, "grad_norm": 2.8184237318927776, "learning_rate": 5.901541110898216e-06, "loss": 0.1502300262451172, "step": 18915 }, { "epoch": 0.16359564551971018, "grad_norm": 15.45230227061373, "learning_rate": 5.901489337484488e-06, "loss": 0.21883373260498046, "step": 18920 }, { "epoch": 0.16363887904125343, "grad_norm": 25.996770363224833, "learning_rate": 5.9014375506893484e-06, "loss": 0.19451522827148438, "step": 18925 }, { "epoch": 0.1636821125627967, "grad_norm": 5.270733200779605, "learning_rate": 5.901385750513034e-06, "loss": 0.41553955078125, "step": 18930 }, { "epoch": 0.16372534608433995, "grad_norm": 10.837592632813255, "learning_rate": 5.901333936955786e-06, "loss": 0.0558349609375, "step": 18935 }, { "epoch": 0.16376857960588323, "grad_norm": 10.238064007994176, "learning_rate": 5.901282110017841e-06, "loss": 0.3014404296875, "step": 18940 }, { "epoch": 0.1638118131274265, "grad_norm": 17.32012172131748, "learning_rate": 5.901230269699441e-06, "loss": 0.40297927856445315, "step": 18945 }, { "epoch": 0.16385504664896974, "grad_norm": 8.150987264580678, "learning_rate": 5.901178416000822e-06, "loss": 0.18154296875, "step": 18950 }, { "epoch": 0.163898280170513, "grad_norm": 2.579732144783236, "learning_rate": 5.901126548922224e-06, "loss": 0.0950408935546875, "step": 18955 }, { "epoch": 0.16394151369205628, "grad_norm": 11.741748190890556, "learning_rate": 5.901074668463888e-06, "loss": 0.149285888671875, "step": 18960 }, { "epoch": 0.16398474721359954, "grad_norm": 51.3689311975751, "learning_rate": 5.901022774626052e-06, "loss": 0.2657806396484375, "step": 18965 }, { "epoch": 0.1640279807351428, "grad_norm": 3.650246819697637, "learning_rate": 5.9009708674089545e-06, "loss": 0.1423614501953125, "step": 18970 }, { "epoch": 0.16407121425668605, "grad_norm": 4.415258118830975, "learning_rate": 5.900918946812836e-06, "loss": 0.0397979736328125, "step": 18975 }, { "epoch": 0.16411444777822934, "grad_norm": 11.508197761964885, "learning_rate": 5.900867012837936e-06, "loss": 0.07921142578125, "step": 18980 }, { "epoch": 0.1641576812997726, "grad_norm": 35.82977200591129, "learning_rate": 5.900815065484493e-06, "loss": 0.1475128173828125, "step": 18985 }, { "epoch": 0.16420091482131585, "grad_norm": 1.6487634098463153, "learning_rate": 5.9007631047527475e-06, "loss": 0.1587066650390625, "step": 18990 }, { "epoch": 0.1642441483428591, "grad_norm": 0.7965982790180239, "learning_rate": 5.900711130642939e-06, "loss": 0.08021697998046876, "step": 18995 }, { "epoch": 0.1642873818644024, "grad_norm": 2.9022229581132457, "learning_rate": 5.900659143155307e-06, "loss": 0.1721466064453125, "step": 19000 }, { "epoch": 0.16433061538594565, "grad_norm": 38.90086058747275, "learning_rate": 5.900607142290092e-06, "loss": 0.3516845703125, "step": 19005 }, { "epoch": 0.1643738489074889, "grad_norm": 12.032579403938835, "learning_rate": 5.900555128047532e-06, "loss": 0.2647796630859375, "step": 19010 }, { "epoch": 0.16441708242903216, "grad_norm": 4.581623221668638, "learning_rate": 5.900503100427869e-06, "loss": 0.41943359375, "step": 19015 }, { "epoch": 0.16446031595057545, "grad_norm": 14.135488704317552, "learning_rate": 5.900451059431341e-06, "loss": 0.57740478515625, "step": 19020 }, { "epoch": 0.1645035494721187, "grad_norm": 6.349813895721785, "learning_rate": 5.9003990050581894e-06, "loss": 0.0848876953125, "step": 19025 }, { "epoch": 0.16454678299366196, "grad_norm": 13.328793385521738, "learning_rate": 5.900346937308654e-06, "loss": 0.11121826171875, "step": 19030 }, { "epoch": 0.16459001651520522, "grad_norm": 22.399481894238782, "learning_rate": 5.900294856182975e-06, "loss": 0.17057037353515625, "step": 19035 }, { "epoch": 0.1646332500367485, "grad_norm": 26.744323394475792, "learning_rate": 5.9002427616813925e-06, "loss": 0.1090057373046875, "step": 19040 }, { "epoch": 0.16467648355829176, "grad_norm": 39.85748205329972, "learning_rate": 5.900190653804146e-06, "loss": 0.26198348999023435, "step": 19045 }, { "epoch": 0.16471971707983502, "grad_norm": 43.39851582184366, "learning_rate": 5.900138532551478e-06, "loss": 0.3467041015625, "step": 19050 }, { "epoch": 0.16476295060137827, "grad_norm": 2.3366463077426913, "learning_rate": 5.900086397923625e-06, "loss": 0.07329254150390625, "step": 19055 }, { "epoch": 0.16480618412292156, "grad_norm": 43.48863028251151, "learning_rate": 5.900034249920831e-06, "loss": 0.37965087890625, "step": 19060 }, { "epoch": 0.1648494176444648, "grad_norm": 89.09609833540499, "learning_rate": 5.899982088543336e-06, "loss": 0.17330322265625, "step": 19065 }, { "epoch": 0.16489265116600807, "grad_norm": 1.2973468459321391, "learning_rate": 5.899929913791378e-06, "loss": 0.08460693359375, "step": 19070 }, { "epoch": 0.16493588468755133, "grad_norm": 2.328918533748567, "learning_rate": 5.899877725665201e-06, "loss": 0.10787353515625, "step": 19075 }, { "epoch": 0.1649791182090946, "grad_norm": 19.08958032587743, "learning_rate": 5.899825524165044e-06, "loss": 0.23640060424804688, "step": 19080 }, { "epoch": 0.16502235173063787, "grad_norm": 0.29323538322245407, "learning_rate": 5.899773309291148e-06, "loss": 0.30889739990234377, "step": 19085 }, { "epoch": 0.16506558525218112, "grad_norm": 3.0776732837818224, "learning_rate": 5.899721081043753e-06, "loss": 0.22236099243164062, "step": 19090 }, { "epoch": 0.1651088187737244, "grad_norm": 6.648730652016332, "learning_rate": 5.899668839423101e-06, "loss": 0.18482666015625, "step": 19095 }, { "epoch": 0.16515205229526767, "grad_norm": 11.880603372899087, "learning_rate": 5.899616584429433e-06, "loss": 0.09363250732421875, "step": 19100 }, { "epoch": 0.16519528581681092, "grad_norm": 43.99133638877014, "learning_rate": 5.899564316062989e-06, "loss": 0.5501708984375, "step": 19105 }, { "epoch": 0.16523851933835418, "grad_norm": 9.584376831289598, "learning_rate": 5.899512034324011e-06, "loss": 0.142510986328125, "step": 19110 }, { "epoch": 0.16528175285989746, "grad_norm": 28.38013884271316, "learning_rate": 5.899459739212739e-06, "loss": 0.43656005859375, "step": 19115 }, { "epoch": 0.16532498638144072, "grad_norm": 7.895424538755779, "learning_rate": 5.899407430729415e-06, "loss": 0.04764251708984375, "step": 19120 }, { "epoch": 0.16536821990298398, "grad_norm": 21.911195778674944, "learning_rate": 5.8993551088742795e-06, "loss": 0.181610107421875, "step": 19125 }, { "epoch": 0.16541145342452723, "grad_norm": 8.351882723120482, "learning_rate": 5.899302773647576e-06, "loss": 0.417449951171875, "step": 19130 }, { "epoch": 0.16545468694607052, "grad_norm": 60.156748486612095, "learning_rate": 5.8992504250495434e-06, "loss": 0.35106964111328126, "step": 19135 }, { "epoch": 0.16549792046761377, "grad_norm": 5.675530528428177, "learning_rate": 5.8991980630804246e-06, "loss": 0.09716873168945313, "step": 19140 }, { "epoch": 0.16554115398915703, "grad_norm": 5.937177796903188, "learning_rate": 5.89914568774046e-06, "loss": 0.19553375244140625, "step": 19145 }, { "epoch": 0.1655843875107003, "grad_norm": 5.007907287941078, "learning_rate": 5.899093299029892e-06, "loss": 0.35116424560546877, "step": 19150 }, { "epoch": 0.16562762103224357, "grad_norm": 14.02408927878511, "learning_rate": 5.899040896948961e-06, "loss": 0.3251708984375, "step": 19155 }, { "epoch": 0.16567085455378683, "grad_norm": 3.4224752911417577, "learning_rate": 5.898988481497911e-06, "loss": 0.15628509521484374, "step": 19160 }, { "epoch": 0.16571408807533008, "grad_norm": 1.972912634453489, "learning_rate": 5.898936052676981e-06, "loss": 0.19596176147460936, "step": 19165 }, { "epoch": 0.16575732159687334, "grad_norm": 1.0856920449037861, "learning_rate": 5.898883610486415e-06, "loss": 0.2515899658203125, "step": 19170 }, { "epoch": 0.16580055511841663, "grad_norm": 17.54157651927055, "learning_rate": 5.898831154926454e-06, "loss": 0.0561248779296875, "step": 19175 }, { "epoch": 0.16584378863995988, "grad_norm": 4.485752430118755, "learning_rate": 5.898778685997339e-06, "loss": 0.12508544921875, "step": 19180 }, { "epoch": 0.16588702216150314, "grad_norm": 18.49547971179302, "learning_rate": 5.898726203699313e-06, "loss": 0.15803356170654298, "step": 19185 }, { "epoch": 0.1659302556830464, "grad_norm": 51.56842048995225, "learning_rate": 5.898673708032618e-06, "loss": 0.2503684997558594, "step": 19190 }, { "epoch": 0.16597348920458968, "grad_norm": 7.2064383748400465, "learning_rate": 5.898621198997496e-06, "loss": 0.2399749755859375, "step": 19195 }, { "epoch": 0.16601672272613294, "grad_norm": 24.424258461205977, "learning_rate": 5.8985686765941886e-06, "loss": 0.4180816650390625, "step": 19200 }, { "epoch": 0.1660599562476762, "grad_norm": 7.753371923384376, "learning_rate": 5.898516140822939e-06, "loss": 0.18180007934570314, "step": 19205 }, { "epoch": 0.16610318976921945, "grad_norm": 68.12848050999968, "learning_rate": 5.89846359168399e-06, "loss": 0.20602378845214844, "step": 19210 }, { "epoch": 0.16614642329076273, "grad_norm": 9.61246548798304, "learning_rate": 5.898411029177582e-06, "loss": 0.2705356597900391, "step": 19215 }, { "epoch": 0.166189656812306, "grad_norm": 5.1454396953332, "learning_rate": 5.8983584533039596e-06, "loss": 0.19522705078125, "step": 19220 }, { "epoch": 0.16623289033384925, "grad_norm": 10.005631467636992, "learning_rate": 5.898305864063363e-06, "loss": 0.20526123046875, "step": 19225 }, { "epoch": 0.1662761238553925, "grad_norm": 32.66042207487282, "learning_rate": 5.8982532614560354e-06, "loss": 0.2162464141845703, "step": 19230 }, { "epoch": 0.1663193573769358, "grad_norm": 36.58755081961314, "learning_rate": 5.898200645482221e-06, "loss": 0.36578521728515623, "step": 19235 }, { "epoch": 0.16636259089847905, "grad_norm": 13.665461897340736, "learning_rate": 5.898148016142161e-06, "loss": 0.2876708984375, "step": 19240 }, { "epoch": 0.1664058244200223, "grad_norm": 36.658857857452176, "learning_rate": 5.898095373436099e-06, "loss": 0.35628585815429686, "step": 19245 }, { "epoch": 0.16644905794156556, "grad_norm": 0.7518856187667499, "learning_rate": 5.898042717364276e-06, "loss": 0.08272361755371094, "step": 19250 }, { "epoch": 0.16649229146310884, "grad_norm": 13.879178788229945, "learning_rate": 5.897990047926936e-06, "loss": 0.427880859375, "step": 19255 }, { "epoch": 0.1665355249846521, "grad_norm": 20.818970114040496, "learning_rate": 5.897937365124323e-06, "loss": 0.08686447143554688, "step": 19260 }, { "epoch": 0.16657875850619536, "grad_norm": 2.884344785490106, "learning_rate": 5.897884668956679e-06, "loss": 0.13834152221679688, "step": 19265 }, { "epoch": 0.16662199202773864, "grad_norm": 8.468525048494707, "learning_rate": 5.897831959424247e-06, "loss": 0.0865509033203125, "step": 19270 }, { "epoch": 0.1666652255492819, "grad_norm": 1.1657653531874754, "learning_rate": 5.897779236527269e-06, "loss": 0.6663490295410156, "step": 19275 }, { "epoch": 0.16670845907082515, "grad_norm": 1.307782461695027, "learning_rate": 5.89772650026599e-06, "loss": 0.1338409423828125, "step": 19280 }, { "epoch": 0.1667516925923684, "grad_norm": 188.57982853956892, "learning_rate": 5.897673750640651e-06, "loss": 0.4425384521484375, "step": 19285 }, { "epoch": 0.1667949261139117, "grad_norm": 3.763323276385562, "learning_rate": 5.897620987651499e-06, "loss": 0.2185791015625, "step": 19290 }, { "epoch": 0.16683815963545495, "grad_norm": 20.286638742462266, "learning_rate": 5.897568211298773e-06, "loss": 0.3656787872314453, "step": 19295 }, { "epoch": 0.1668813931569982, "grad_norm": 5.594589155966525, "learning_rate": 5.8975154215827195e-06, "loss": 0.10850982666015625, "step": 19300 }, { "epoch": 0.16692462667854147, "grad_norm": 29.36288765939617, "learning_rate": 5.897462618503581e-06, "loss": 0.10019073486328126, "step": 19305 }, { "epoch": 0.16696786020008475, "grad_norm": 11.86653531378857, "learning_rate": 5.897409802061601e-06, "loss": 0.08846282958984375, "step": 19310 }, { "epoch": 0.167011093721628, "grad_norm": 32.352584790923345, "learning_rate": 5.897356972257023e-06, "loss": 0.9787303924560546, "step": 19315 }, { "epoch": 0.16705432724317126, "grad_norm": 1.7616526537750532, "learning_rate": 5.89730412909009e-06, "loss": 0.12905807495117189, "step": 19320 }, { "epoch": 0.16709756076471452, "grad_norm": 19.469739548963027, "learning_rate": 5.897251272561046e-06, "loss": 0.2224395751953125, "step": 19325 }, { "epoch": 0.1671407942862578, "grad_norm": 11.150543240209192, "learning_rate": 5.897198402670136e-06, "loss": 0.23929901123046876, "step": 19330 }, { "epoch": 0.16718402780780106, "grad_norm": 9.38757754695777, "learning_rate": 5.897145519417604e-06, "loss": 0.32874755859375, "step": 19335 }, { "epoch": 0.16722726132934432, "grad_norm": 9.066576732989397, "learning_rate": 5.897092622803691e-06, "loss": 0.2438812255859375, "step": 19340 }, { "epoch": 0.16727049485088757, "grad_norm": 7.881524238290179, "learning_rate": 5.8970397128286435e-06, "loss": 0.37751922607421873, "step": 19345 }, { "epoch": 0.16731372837243086, "grad_norm": 34.00039971125765, "learning_rate": 5.8969867894927045e-06, "loss": 0.15836639404296876, "step": 19350 }, { "epoch": 0.16735696189397412, "grad_norm": 22.06226156144979, "learning_rate": 5.896933852796119e-06, "loss": 0.3908203125, "step": 19355 }, { "epoch": 0.16740019541551737, "grad_norm": 25.834488941222887, "learning_rate": 5.89688090273913e-06, "loss": 0.09736785888671876, "step": 19360 }, { "epoch": 0.16744342893706063, "grad_norm": 12.796412988457094, "learning_rate": 5.8968279393219825e-06, "loss": 0.15099334716796875, "step": 19365 }, { "epoch": 0.1674866624586039, "grad_norm": 13.955381531821482, "learning_rate": 5.896774962544921e-06, "loss": 0.0803375244140625, "step": 19370 }, { "epoch": 0.16752989598014717, "grad_norm": 22.631998606954923, "learning_rate": 5.896721972408189e-06, "loss": 0.1865264892578125, "step": 19375 }, { "epoch": 0.16757312950169043, "grad_norm": 13.456621063870001, "learning_rate": 5.896668968912031e-06, "loss": 0.31639862060546875, "step": 19380 }, { "epoch": 0.16761636302323368, "grad_norm": 5.220213134554717, "learning_rate": 5.896615952056692e-06, "loss": 0.22945556640625, "step": 19385 }, { "epoch": 0.16765959654477697, "grad_norm": 44.0793474963272, "learning_rate": 5.896562921842415e-06, "loss": 0.2299633026123047, "step": 19390 }, { "epoch": 0.16770283006632022, "grad_norm": 5.674454174797702, "learning_rate": 5.896509878269447e-06, "loss": 0.1108123779296875, "step": 19395 }, { "epoch": 0.16774606358786348, "grad_norm": 3.9467394282909285, "learning_rate": 5.8964568213380315e-06, "loss": 0.34472808837890623, "step": 19400 }, { "epoch": 0.16778929710940674, "grad_norm": 1.0518586899863718, "learning_rate": 5.896403751048413e-06, "loss": 0.31759033203125, "step": 19405 }, { "epoch": 0.16783253063095002, "grad_norm": 14.19740863214449, "learning_rate": 5.896350667400836e-06, "loss": 0.4885162353515625, "step": 19410 }, { "epoch": 0.16787576415249328, "grad_norm": 2.681120440204562, "learning_rate": 5.896297570395545e-06, "loss": 0.35957984924316405, "step": 19415 }, { "epoch": 0.16791899767403654, "grad_norm": 5.206996727451824, "learning_rate": 5.896244460032787e-06, "loss": 0.0910919189453125, "step": 19420 }, { "epoch": 0.1679622311955798, "grad_norm": 29.961466731957955, "learning_rate": 5.896191336312804e-06, "loss": 0.299029541015625, "step": 19425 }, { "epoch": 0.16800546471712308, "grad_norm": 3.862629379530116, "learning_rate": 5.896138199235843e-06, "loss": 0.07808151245117187, "step": 19430 }, { "epoch": 0.16804869823866633, "grad_norm": 10.74587296110412, "learning_rate": 5.896085048802149e-06, "loss": 0.5592864990234375, "step": 19435 }, { "epoch": 0.1680919317602096, "grad_norm": 22.568986527445183, "learning_rate": 5.896031885011966e-06, "loss": 0.306707763671875, "step": 19440 }, { "epoch": 0.16813516528175285, "grad_norm": 15.694651873033127, "learning_rate": 5.89597870786554e-06, "loss": 0.2331939697265625, "step": 19445 }, { "epoch": 0.16817839880329613, "grad_norm": 19.841620591408994, "learning_rate": 5.895925517363117e-06, "loss": 0.1643829345703125, "step": 19450 }, { "epoch": 0.1682216323248394, "grad_norm": 25.838247583049817, "learning_rate": 5.8958723135049405e-06, "loss": 0.193280029296875, "step": 19455 }, { "epoch": 0.16826486584638264, "grad_norm": 38.401769461460596, "learning_rate": 5.895819096291257e-06, "loss": 0.3315765380859375, "step": 19460 }, { "epoch": 0.16830809936792593, "grad_norm": 3.1398576315881765, "learning_rate": 5.895765865722311e-06, "loss": 0.19241943359375, "step": 19465 }, { "epoch": 0.16835133288946919, "grad_norm": 15.366863174204907, "learning_rate": 5.89571262179835e-06, "loss": 0.31494140625, "step": 19470 }, { "epoch": 0.16839456641101244, "grad_norm": 20.90745854197201, "learning_rate": 5.895659364519617e-06, "loss": 0.23389434814453125, "step": 19475 }, { "epoch": 0.1684377999325557, "grad_norm": 17.276786804918306, "learning_rate": 5.895606093886359e-06, "loss": 0.23475418090820313, "step": 19480 }, { "epoch": 0.16848103345409898, "grad_norm": 7.734017482620451, "learning_rate": 5.895552809898822e-06, "loss": 0.1226165771484375, "step": 19485 }, { "epoch": 0.16852426697564224, "grad_norm": 7.144563792981767, "learning_rate": 5.895499512557252e-06, "loss": 0.2722076416015625, "step": 19490 }, { "epoch": 0.1685675004971855, "grad_norm": 17.000854769731745, "learning_rate": 5.895446201861893e-06, "loss": 0.5630828857421875, "step": 19495 }, { "epoch": 0.16861073401872875, "grad_norm": 38.804084750774635, "learning_rate": 5.895392877812993e-06, "loss": 0.31044921875, "step": 19500 }, { "epoch": 0.16865396754027204, "grad_norm": 4.881009141512086, "learning_rate": 5.895339540410796e-06, "loss": 0.178778076171875, "step": 19505 }, { "epoch": 0.1686972010618153, "grad_norm": 3.1467456135799554, "learning_rate": 5.895286189655549e-06, "loss": 0.093377685546875, "step": 19510 }, { "epoch": 0.16874043458335855, "grad_norm": 7.95814649381515, "learning_rate": 5.895232825547498e-06, "loss": 0.1938262939453125, "step": 19515 }, { "epoch": 0.1687836681049018, "grad_norm": 0.7293668603923557, "learning_rate": 5.895179448086889e-06, "loss": 0.0698760986328125, "step": 19520 }, { "epoch": 0.1688269016264451, "grad_norm": 6.7929026696332615, "learning_rate": 5.895126057273968e-06, "loss": 0.15612945556640626, "step": 19525 }, { "epoch": 0.16887013514798835, "grad_norm": 34.39649602917991, "learning_rate": 5.895072653108982e-06, "loss": 0.42281494140625, "step": 19530 }, { "epoch": 0.1689133686695316, "grad_norm": 9.441877794401243, "learning_rate": 5.895019235592177e-06, "loss": 0.12692413330078126, "step": 19535 }, { "epoch": 0.16895660219107486, "grad_norm": 27.98456415645856, "learning_rate": 5.8949658047237985e-06, "loss": 0.17752227783203126, "step": 19540 }, { "epoch": 0.16899983571261815, "grad_norm": 1.277368733792755, "learning_rate": 5.894912360504093e-06, "loss": 0.26366310119628905, "step": 19545 }, { "epoch": 0.1690430692341614, "grad_norm": 7.585655076845845, "learning_rate": 5.894858902933308e-06, "loss": 0.25234375, "step": 19550 }, { "epoch": 0.16908630275570466, "grad_norm": 2.3222638196133896, "learning_rate": 5.89480543201169e-06, "loss": 0.1207061767578125, "step": 19555 }, { "epoch": 0.16912953627724792, "grad_norm": 25.409015337983117, "learning_rate": 5.894751947739485e-06, "loss": 0.48464412689208985, "step": 19560 }, { "epoch": 0.1691727697987912, "grad_norm": 3.119354691247033, "learning_rate": 5.894698450116939e-06, "loss": 0.0377777099609375, "step": 19565 }, { "epoch": 0.16921600332033446, "grad_norm": 10.634263221827384, "learning_rate": 5.8946449391443005e-06, "loss": 0.194415283203125, "step": 19570 }, { "epoch": 0.1692592368418777, "grad_norm": 6.7560315099458235, "learning_rate": 5.894591414821815e-06, "loss": 0.29404296875, "step": 19575 }, { "epoch": 0.16930247036342097, "grad_norm": 1.6030012683939332, "learning_rate": 5.894537877149729e-06, "loss": 0.2408935546875, "step": 19580 }, { "epoch": 0.16934570388496425, "grad_norm": 16.783915267978557, "learning_rate": 5.894484326128291e-06, "loss": 0.12579116821289063, "step": 19585 }, { "epoch": 0.1693889374065075, "grad_norm": 6.777361501753731, "learning_rate": 5.894430761757747e-06, "loss": 0.0624420166015625, "step": 19590 }, { "epoch": 0.16943217092805077, "grad_norm": 9.980643410876233, "learning_rate": 5.8943771840383436e-06, "loss": 0.1752777099609375, "step": 19595 }, { "epoch": 0.16947540444959402, "grad_norm": 3.667160699256644, "learning_rate": 5.894323592970328e-06, "loss": 0.193487548828125, "step": 19600 }, { "epoch": 0.1695186379711373, "grad_norm": 2.822206477408497, "learning_rate": 5.894269988553949e-06, "loss": 0.33677978515625, "step": 19605 }, { "epoch": 0.16956187149268057, "grad_norm": 4.727399682126478, "learning_rate": 5.894216370789452e-06, "loss": 0.35816650390625, "step": 19610 }, { "epoch": 0.16960510501422382, "grad_norm": 5.8082122673089405, "learning_rate": 5.894162739677084e-06, "loss": 0.30703582763671877, "step": 19615 }, { "epoch": 0.16964833853576708, "grad_norm": 41.452169934326214, "learning_rate": 5.894109095217094e-06, "loss": 0.10981216430664062, "step": 19620 }, { "epoch": 0.16969157205731036, "grad_norm": 4.334936111757978, "learning_rate": 5.894055437409728e-06, "loss": 0.115374755859375, "step": 19625 }, { "epoch": 0.16973480557885362, "grad_norm": 37.499307684367096, "learning_rate": 5.894001766255235e-06, "loss": 0.31040191650390625, "step": 19630 }, { "epoch": 0.16977803910039688, "grad_norm": 34.56155642574477, "learning_rate": 5.893948081753861e-06, "loss": 0.33275299072265624, "step": 19635 }, { "epoch": 0.16982127262194016, "grad_norm": 7.626581048464319, "learning_rate": 5.893894383905854e-06, "loss": 0.14148788452148436, "step": 19640 }, { "epoch": 0.16986450614348342, "grad_norm": 17.52302682545812, "learning_rate": 5.893840672711462e-06, "loss": 0.329119873046875, "step": 19645 }, { "epoch": 0.16990773966502667, "grad_norm": 2.018023418263198, "learning_rate": 5.893786948170933e-06, "loss": 0.14331207275390626, "step": 19650 }, { "epoch": 0.16995097318656993, "grad_norm": 0.6472362984200181, "learning_rate": 5.893733210284513e-06, "loss": 0.09834842681884766, "step": 19655 }, { "epoch": 0.16999420670811322, "grad_norm": 1.9177685673727909, "learning_rate": 5.893679459052453e-06, "loss": 0.175738525390625, "step": 19660 }, { "epoch": 0.17003744022965647, "grad_norm": 22.068593947585867, "learning_rate": 5.893625694474997e-06, "loss": 0.3316490173339844, "step": 19665 }, { "epoch": 0.17008067375119973, "grad_norm": 53.60348482178433, "learning_rate": 5.893571916552397e-06, "loss": 0.5905845642089844, "step": 19670 }, { "epoch": 0.17012390727274299, "grad_norm": 10.33974168504898, "learning_rate": 5.893518125284898e-06, "loss": 0.2245635986328125, "step": 19675 }, { "epoch": 0.17016714079428627, "grad_norm": 1.9375732044400178, "learning_rate": 5.893464320672748e-06, "loss": 0.29077606201171874, "step": 19680 }, { "epoch": 0.17021037431582953, "grad_norm": 0.5158510194262176, "learning_rate": 5.893410502716198e-06, "loss": 0.11367301940917969, "step": 19685 }, { "epoch": 0.17025360783737278, "grad_norm": 37.84547111469372, "learning_rate": 5.893356671415493e-06, "loss": 0.503070068359375, "step": 19690 }, { "epoch": 0.17029684135891604, "grad_norm": 18.134543076893326, "learning_rate": 5.893302826770884e-06, "loss": 0.15812530517578124, "step": 19695 }, { "epoch": 0.17034007488045932, "grad_norm": 10.730128817688128, "learning_rate": 5.8932489687826164e-06, "loss": 0.066119384765625, "step": 19700 }, { "epoch": 0.17038330840200258, "grad_norm": 1.6199095981442384, "learning_rate": 5.893195097450942e-06, "loss": 0.1257537841796875, "step": 19705 }, { "epoch": 0.17042654192354584, "grad_norm": 6.88871246769352, "learning_rate": 5.893141212776106e-06, "loss": 0.12350502014160156, "step": 19710 }, { "epoch": 0.1704697754450891, "grad_norm": 39.722256971073534, "learning_rate": 5.8930873147583595e-06, "loss": 0.1284820556640625, "step": 19715 }, { "epoch": 0.17051300896663238, "grad_norm": 2.6884570711671225, "learning_rate": 5.89303340339795e-06, "loss": 0.39290847778320315, "step": 19720 }, { "epoch": 0.17055624248817564, "grad_norm": 6.729292962245727, "learning_rate": 5.892979478695125e-06, "loss": 0.21494140625, "step": 19725 }, { "epoch": 0.1705994760097189, "grad_norm": 54.990602624943044, "learning_rate": 5.892925540650135e-06, "loss": 0.471185302734375, "step": 19730 }, { "epoch": 0.17064270953126215, "grad_norm": 5.388858010687356, "learning_rate": 5.892871589263228e-06, "loss": 0.12701644897460937, "step": 19735 }, { "epoch": 0.17068594305280543, "grad_norm": 18.24116082775071, "learning_rate": 5.892817624534652e-06, "loss": 0.17941741943359374, "step": 19740 }, { "epoch": 0.1707291765743487, "grad_norm": 26.687708721466983, "learning_rate": 5.892763646464658e-06, "loss": 0.33797683715820315, "step": 19745 }, { "epoch": 0.17077241009589195, "grad_norm": 16.75600607143391, "learning_rate": 5.892709655053493e-06, "loss": 0.12020263671875, "step": 19750 }, { "epoch": 0.1708156436174352, "grad_norm": 2.223969628546006, "learning_rate": 5.892655650301407e-06, "loss": 0.36429595947265625, "step": 19755 }, { "epoch": 0.1708588771389785, "grad_norm": 4.947046158404125, "learning_rate": 5.892601632208648e-06, "loss": 0.131854248046875, "step": 19760 }, { "epoch": 0.17090211066052174, "grad_norm": 0.7569345982648311, "learning_rate": 5.892547600775467e-06, "loss": 0.07259902954101563, "step": 19765 }, { "epoch": 0.170945344182065, "grad_norm": 32.509552223092236, "learning_rate": 5.892493556002111e-06, "loss": 0.05241069793701172, "step": 19770 }, { "epoch": 0.17098857770360826, "grad_norm": 11.317499201746461, "learning_rate": 5.892439497888831e-06, "loss": 0.088726806640625, "step": 19775 }, { "epoch": 0.17103181122515154, "grad_norm": 1.245135674306318, "learning_rate": 5.8923854264358755e-06, "loss": 0.41992530822753904, "step": 19780 }, { "epoch": 0.1710750447466948, "grad_norm": 10.152492617247505, "learning_rate": 5.892331341643493e-06, "loss": 0.18502197265625, "step": 19785 }, { "epoch": 0.17111827826823806, "grad_norm": 17.166490181857455, "learning_rate": 5.892277243511936e-06, "loss": 0.721923828125, "step": 19790 }, { "epoch": 0.1711615117897813, "grad_norm": 33.65392987160457, "learning_rate": 5.89222313204145e-06, "loss": 0.2087371826171875, "step": 19795 }, { "epoch": 0.1712047453113246, "grad_norm": 1.4709005742623655, "learning_rate": 5.892169007232287e-06, "loss": 0.6455474853515625, "step": 19800 }, { "epoch": 0.17124797883286785, "grad_norm": 1.1994359371799097, "learning_rate": 5.892114869084696e-06, "loss": 0.374932861328125, "step": 19805 }, { "epoch": 0.1712912123544111, "grad_norm": 15.590505242736528, "learning_rate": 5.892060717598927e-06, "loss": 0.31952667236328125, "step": 19810 }, { "epoch": 0.17133444587595437, "grad_norm": 17.08576517471819, "learning_rate": 5.8920065527752305e-06, "loss": 0.2010986328125, "step": 19815 }, { "epoch": 0.17137767939749765, "grad_norm": 15.068118586961848, "learning_rate": 5.891952374613854e-06, "loss": 0.3114288330078125, "step": 19820 }, { "epoch": 0.1714209129190409, "grad_norm": 36.38481905519011, "learning_rate": 5.891898183115049e-06, "loss": 0.43634033203125, "step": 19825 }, { "epoch": 0.17146414644058416, "grad_norm": 21.88866172805354, "learning_rate": 5.891843978279065e-06, "loss": 0.227972412109375, "step": 19830 }, { "epoch": 0.17150737996212745, "grad_norm": 0.509476208070799, "learning_rate": 5.891789760106153e-06, "loss": 0.10867156982421874, "step": 19835 }, { "epoch": 0.1715506134836707, "grad_norm": 5.241900419967077, "learning_rate": 5.891735528596561e-06, "loss": 0.558184814453125, "step": 19840 }, { "epoch": 0.17159384700521396, "grad_norm": 3.0419650043970314, "learning_rate": 5.8916812837505405e-06, "loss": 0.1669147491455078, "step": 19845 }, { "epoch": 0.17163708052675722, "grad_norm": 3.5639413834174762, "learning_rate": 5.891627025568342e-06, "loss": 0.4244651794433594, "step": 19850 }, { "epoch": 0.1716803140483005, "grad_norm": 21.323280927755317, "learning_rate": 5.891572754050214e-06, "loss": 0.32811279296875, "step": 19855 }, { "epoch": 0.17172354756984376, "grad_norm": 4.014723927783626, "learning_rate": 5.891518469196409e-06, "loss": 0.40241241455078125, "step": 19860 }, { "epoch": 0.17176678109138702, "grad_norm": 49.98758215382415, "learning_rate": 5.891464171007176e-06, "loss": 0.4045654296875, "step": 19865 }, { "epoch": 0.17181001461293027, "grad_norm": 12.1350247339622, "learning_rate": 5.891409859482766e-06, "loss": 0.24642333984375, "step": 19870 }, { "epoch": 0.17185324813447356, "grad_norm": 28.830938415197377, "learning_rate": 5.891355534623429e-06, "loss": 0.4011405944824219, "step": 19875 }, { "epoch": 0.17189648165601681, "grad_norm": 1.5783449910137946, "learning_rate": 5.8913011964294156e-06, "loss": 0.16882524490356446, "step": 19880 }, { "epoch": 0.17193971517756007, "grad_norm": 7.10809147548617, "learning_rate": 5.891246844900977e-06, "loss": 0.12192230224609375, "step": 19885 }, { "epoch": 0.17198294869910333, "grad_norm": 13.055582618762902, "learning_rate": 5.891192480038362e-06, "loss": 0.43612060546875, "step": 19890 }, { "epoch": 0.1720261822206466, "grad_norm": 1.9428733433579024, "learning_rate": 5.891138101841825e-06, "loss": 0.120965576171875, "step": 19895 }, { "epoch": 0.17206941574218987, "grad_norm": 3.1644723674752435, "learning_rate": 5.891083710311613e-06, "loss": 0.17362060546875, "step": 19900 }, { "epoch": 0.17211264926373313, "grad_norm": 21.04963103635032, "learning_rate": 5.8910293054479795e-06, "loss": 0.06699371337890625, "step": 19905 }, { "epoch": 0.17215588278527638, "grad_norm": 1.33571832924388, "learning_rate": 5.890974887251173e-06, "loss": 0.2344146728515625, "step": 19910 }, { "epoch": 0.17219911630681967, "grad_norm": 0.8149991680367795, "learning_rate": 5.890920455721447e-06, "loss": 0.070843505859375, "step": 19915 }, { "epoch": 0.17224234982836292, "grad_norm": 5.272031069297755, "learning_rate": 5.890866010859051e-06, "loss": 0.19485092163085938, "step": 19920 }, { "epoch": 0.17228558334990618, "grad_norm": 1.2217554345582111, "learning_rate": 5.890811552664236e-06, "loss": 0.19954833984375, "step": 19925 }, { "epoch": 0.17232881687144944, "grad_norm": 19.52368419539954, "learning_rate": 5.890757081137253e-06, "loss": 0.18093490600585938, "step": 19930 }, { "epoch": 0.17237205039299272, "grad_norm": 0.452558655446878, "learning_rate": 5.890702596278354e-06, "loss": 0.1798919677734375, "step": 19935 }, { "epoch": 0.17241528391453598, "grad_norm": 15.63525849227562, "learning_rate": 5.890648098087791e-06, "loss": 0.093365478515625, "step": 19940 }, { "epoch": 0.17245851743607923, "grad_norm": 3.3204368774762125, "learning_rate": 5.890593586565814e-06, "loss": 0.41485061645507815, "step": 19945 }, { "epoch": 0.1725017509576225, "grad_norm": 1.8747687158440811, "learning_rate": 5.890539061712675e-06, "loss": 0.12036590576171875, "step": 19950 }, { "epoch": 0.17254498447916577, "grad_norm": 21.697045610468113, "learning_rate": 5.890484523528624e-06, "loss": 0.389013671875, "step": 19955 }, { "epoch": 0.17258821800070903, "grad_norm": 15.849203411336694, "learning_rate": 5.890429972013915e-06, "loss": 0.1169189453125, "step": 19960 }, { "epoch": 0.1726314515222523, "grad_norm": 31.975418483459112, "learning_rate": 5.890375407168798e-06, "loss": 0.113433837890625, "step": 19965 }, { "epoch": 0.17267468504379554, "grad_norm": 14.84997959983496, "learning_rate": 5.8903208289935255e-06, "loss": 0.1435546875, "step": 19970 }, { "epoch": 0.17271791856533883, "grad_norm": 0.9311583561834127, "learning_rate": 5.890266237488349e-06, "loss": 0.0192047119140625, "step": 19975 }, { "epoch": 0.17276115208688209, "grad_norm": 12.15697159274025, "learning_rate": 5.890211632653519e-06, "loss": 0.13046875, "step": 19980 }, { "epoch": 0.17280438560842534, "grad_norm": 56.652290146766816, "learning_rate": 5.890157014489288e-06, "loss": 0.29560089111328125, "step": 19985 }, { "epoch": 0.1728476191299686, "grad_norm": 31.355198983369124, "learning_rate": 5.890102382995909e-06, "loss": 0.293096923828125, "step": 19990 }, { "epoch": 0.17289085265151188, "grad_norm": 10.65941400410722, "learning_rate": 5.890047738173633e-06, "loss": 0.2545166015625, "step": 19995 }, { "epoch": 0.17293408617305514, "grad_norm": 10.334147063157067, "learning_rate": 5.889993080022713e-06, "loss": 0.26416168212890623, "step": 20000 }, { "epoch": 0.1729773196945984, "grad_norm": 29.01672391664138, "learning_rate": 5.889938408543399e-06, "loss": 0.23399887084960938, "step": 20005 }, { "epoch": 0.17302055321614165, "grad_norm": 8.775337955433363, "learning_rate": 5.889883723735945e-06, "loss": 0.1027557373046875, "step": 20010 }, { "epoch": 0.17306378673768494, "grad_norm": 1.5186144490296956, "learning_rate": 5.889829025600603e-06, "loss": 0.07191162109375, "step": 20015 }, { "epoch": 0.1731070202592282, "grad_norm": 19.5261009279317, "learning_rate": 5.889774314137625e-06, "loss": 0.5007118225097656, "step": 20020 }, { "epoch": 0.17315025378077145, "grad_norm": 7.337581616748138, "learning_rate": 5.889719589347262e-06, "loss": 0.136065673828125, "step": 20025 }, { "epoch": 0.17319348730231474, "grad_norm": 39.54910890164721, "learning_rate": 5.889664851229768e-06, "loss": 0.18707122802734374, "step": 20030 }, { "epoch": 0.173236720823858, "grad_norm": 7.074381944973889, "learning_rate": 5.889610099785396e-06, "loss": 0.077862548828125, "step": 20035 }, { "epoch": 0.17327995434540125, "grad_norm": 0.35659424310120846, "learning_rate": 5.889555335014397e-06, "loss": 0.19270782470703124, "step": 20040 }, { "epoch": 0.1733231878669445, "grad_norm": 28.360372767704575, "learning_rate": 5.889500556917023e-06, "loss": 0.2214569091796875, "step": 20045 }, { "epoch": 0.1733664213884878, "grad_norm": 2.1369492797739023, "learning_rate": 5.8894457654935295e-06, "loss": 0.25305023193359377, "step": 20050 }, { "epoch": 0.17340965491003105, "grad_norm": 7.0292632035311895, "learning_rate": 5.889390960744167e-06, "loss": 0.25223770141601565, "step": 20055 }, { "epoch": 0.1734528884315743, "grad_norm": 9.39818158503394, "learning_rate": 5.889336142669188e-06, "loss": 0.278515625, "step": 20060 }, { "epoch": 0.17349612195311756, "grad_norm": 0.6856247115702555, "learning_rate": 5.889281311268847e-06, "loss": 0.11894302368164063, "step": 20065 }, { "epoch": 0.17353935547466084, "grad_norm": 9.570738018788802, "learning_rate": 5.889226466543395e-06, "loss": 0.130108642578125, "step": 20070 }, { "epoch": 0.1735825889962041, "grad_norm": 0.773335936211954, "learning_rate": 5.889171608493086e-06, "loss": 0.0936431884765625, "step": 20075 }, { "epoch": 0.17362582251774736, "grad_norm": 10.290049937025012, "learning_rate": 5.889116737118172e-06, "loss": 0.5163856506347656, "step": 20080 }, { "epoch": 0.17366905603929061, "grad_norm": 21.422497194373125, "learning_rate": 5.889061852418908e-06, "loss": 0.4447998046875, "step": 20085 }, { "epoch": 0.1737122895608339, "grad_norm": 8.070151030474099, "learning_rate": 5.889006954395546e-06, "loss": 0.17216911315917968, "step": 20090 }, { "epoch": 0.17375552308237716, "grad_norm": 22.371805810199472, "learning_rate": 5.88895204304834e-06, "loss": 0.09418182373046875, "step": 20095 }, { "epoch": 0.1737987566039204, "grad_norm": 1.4800653652455713, "learning_rate": 5.888897118377541e-06, "loss": 0.1064697265625, "step": 20100 }, { "epoch": 0.17384199012546367, "grad_norm": 5.349290099030137, "learning_rate": 5.888842180383404e-06, "loss": 0.08854446411132813, "step": 20105 }, { "epoch": 0.17388522364700695, "grad_norm": 10.441594033352658, "learning_rate": 5.8887872290661825e-06, "loss": 0.23276786804199218, "step": 20110 }, { "epoch": 0.1739284571685502, "grad_norm": 1.5502378081080586, "learning_rate": 5.888732264426129e-06, "loss": 0.18853073120117186, "step": 20115 }, { "epoch": 0.17397169069009347, "grad_norm": 1.748199671805245, "learning_rate": 5.888677286463499e-06, "loss": 0.15020523071289063, "step": 20120 }, { "epoch": 0.17401492421163672, "grad_norm": 9.550518400153434, "learning_rate": 5.8886222951785435e-06, "loss": 0.16634521484375, "step": 20125 }, { "epoch": 0.17405815773318, "grad_norm": 13.114797102224593, "learning_rate": 5.888567290571517e-06, "loss": 0.13013114929199218, "step": 20130 }, { "epoch": 0.17410139125472326, "grad_norm": 13.853162988674166, "learning_rate": 5.888512272642674e-06, "loss": 0.08296966552734375, "step": 20135 }, { "epoch": 0.17414462477626652, "grad_norm": 18.323477358833745, "learning_rate": 5.8884572413922676e-06, "loss": 0.43302001953125, "step": 20140 }, { "epoch": 0.17418785829780978, "grad_norm": 4.723464163099276, "learning_rate": 5.8884021968205515e-06, "loss": 0.23311920166015626, "step": 20145 }, { "epoch": 0.17423109181935306, "grad_norm": 5.170403171375898, "learning_rate": 5.888347138927779e-06, "loss": 0.15469894409179688, "step": 20150 }, { "epoch": 0.17427432534089632, "grad_norm": 2.437604107909649, "learning_rate": 5.888292067714206e-06, "loss": 0.3313568115234375, "step": 20155 }, { "epoch": 0.17431755886243958, "grad_norm": 20.522153339024406, "learning_rate": 5.888236983180084e-06, "loss": 0.25267333984375, "step": 20160 }, { "epoch": 0.17436079238398283, "grad_norm": 3.9832007182066533, "learning_rate": 5.888181885325669e-06, "loss": 0.35603790283203124, "step": 20165 }, { "epoch": 0.17440402590552612, "grad_norm": 12.530099239387809, "learning_rate": 5.8881267741512135e-06, "loss": 0.3307697296142578, "step": 20170 }, { "epoch": 0.17444725942706937, "grad_norm": 7.900619155497907, "learning_rate": 5.8880716496569735e-06, "loss": 0.131463623046875, "step": 20175 }, { "epoch": 0.17449049294861263, "grad_norm": 3.3467645710291793, "learning_rate": 5.8880165118432015e-06, "loss": 0.18648681640625, "step": 20180 }, { "epoch": 0.1745337264701559, "grad_norm": 31.289121511762882, "learning_rate": 5.887961360710153e-06, "loss": 0.18477554321289064, "step": 20185 }, { "epoch": 0.17457695999169917, "grad_norm": 23.79635892527612, "learning_rate": 5.887906196258082e-06, "loss": 0.142242431640625, "step": 20190 }, { "epoch": 0.17462019351324243, "grad_norm": 87.19330239222712, "learning_rate": 5.887851018487242e-06, "loss": 0.3265209197998047, "step": 20195 }, { "epoch": 0.17466342703478568, "grad_norm": 0.8475927011641179, "learning_rate": 5.8877958273978895e-06, "loss": 0.0481353759765625, "step": 20200 }, { "epoch": 0.17470666055632897, "grad_norm": 33.02644753829308, "learning_rate": 5.887740622990277e-06, "loss": 0.1539520263671875, "step": 20205 }, { "epoch": 0.17474989407787223, "grad_norm": 19.322314914195033, "learning_rate": 5.8876854052646595e-06, "loss": 0.2625732421875, "step": 20210 }, { "epoch": 0.17479312759941548, "grad_norm": 31.272075038299498, "learning_rate": 5.887630174221293e-06, "loss": 0.2024810791015625, "step": 20215 }, { "epoch": 0.17483636112095874, "grad_norm": 27.11246282669758, "learning_rate": 5.887574929860431e-06, "loss": 0.6522071838378907, "step": 20220 }, { "epoch": 0.17487959464250202, "grad_norm": 15.82838936161555, "learning_rate": 5.887519672182328e-06, "loss": 0.16704788208007812, "step": 20225 }, { "epoch": 0.17492282816404528, "grad_norm": 10.580325440456788, "learning_rate": 5.887464401187241e-06, "loss": 0.0887054443359375, "step": 20230 }, { "epoch": 0.17496606168558854, "grad_norm": 6.526301390105611, "learning_rate": 5.8874091168754215e-06, "loss": 0.19377288818359376, "step": 20235 }, { "epoch": 0.1750092952071318, "grad_norm": 32.876182667705706, "learning_rate": 5.887353819247128e-06, "loss": 0.29461669921875, "step": 20240 }, { "epoch": 0.17505252872867508, "grad_norm": 3.2867926738834163, "learning_rate": 5.887298508302612e-06, "loss": 0.32408447265625, "step": 20245 }, { "epoch": 0.17509576225021833, "grad_norm": 17.186228629161093, "learning_rate": 5.8872431840421315e-06, "loss": 0.6325103759765625, "step": 20250 }, { "epoch": 0.1751389957717616, "grad_norm": 2.8546983050705736, "learning_rate": 5.8871878464659404e-06, "loss": 0.117413330078125, "step": 20255 }, { "epoch": 0.17518222929330485, "grad_norm": 0.6373767981038495, "learning_rate": 5.887132495574294e-06, "loss": 0.02772674560546875, "step": 20260 }, { "epoch": 0.17522546281484813, "grad_norm": 80.16533600210786, "learning_rate": 5.887077131367447e-06, "loss": 0.4083892822265625, "step": 20265 }, { "epoch": 0.1752686963363914, "grad_norm": 6.831102822662649, "learning_rate": 5.887021753845656e-06, "loss": 0.15771484375, "step": 20270 }, { "epoch": 0.17531192985793465, "grad_norm": 4.965520672821397, "learning_rate": 5.886966363009176e-06, "loss": 0.10999755859375, "step": 20275 }, { "epoch": 0.1753551633794779, "grad_norm": 43.9180565218922, "learning_rate": 5.8869109588582626e-06, "loss": 0.19210662841796874, "step": 20280 }, { "epoch": 0.1753983969010212, "grad_norm": 19.498577513451423, "learning_rate": 5.8868555413931706e-06, "loss": 0.174554443359375, "step": 20285 }, { "epoch": 0.17544163042256444, "grad_norm": 25.896381673133558, "learning_rate": 5.8868001106141555e-06, "loss": 0.30128021240234376, "step": 20290 }, { "epoch": 0.1754848639441077, "grad_norm": 6.461584655543459, "learning_rate": 5.886744666521473e-06, "loss": 0.099310302734375, "step": 20295 }, { "epoch": 0.17552809746565096, "grad_norm": 1.6940691498560332, "learning_rate": 5.88668920911538e-06, "loss": 0.16500701904296874, "step": 20300 }, { "epoch": 0.17557133098719424, "grad_norm": 5.7675457824732375, "learning_rate": 5.886633738396131e-06, "loss": 0.09557952880859374, "step": 20305 }, { "epoch": 0.1756145645087375, "grad_norm": 9.951625036569705, "learning_rate": 5.8865782543639825e-06, "loss": 0.13250732421875, "step": 20310 }, { "epoch": 0.17565779803028075, "grad_norm": 4.05670708232669, "learning_rate": 5.88652275701919e-06, "loss": 0.16605758666992188, "step": 20315 }, { "epoch": 0.175701031551824, "grad_norm": 2.163912012313471, "learning_rate": 5.88646724636201e-06, "loss": 0.1489501953125, "step": 20320 }, { "epoch": 0.1757442650733673, "grad_norm": 1.3790907613190602, "learning_rate": 5.886411722392698e-06, "loss": 0.0791046142578125, "step": 20325 }, { "epoch": 0.17578749859491055, "grad_norm": 27.3710083061107, "learning_rate": 5.88635618511151e-06, "loss": 0.19654693603515624, "step": 20330 }, { "epoch": 0.1758307321164538, "grad_norm": 6.353349843637689, "learning_rate": 5.886300634518701e-06, "loss": 0.15853118896484375, "step": 20335 }, { "epoch": 0.17587396563799707, "grad_norm": 6.01687601535223, "learning_rate": 5.886245070614531e-06, "loss": 0.047613525390625, "step": 20340 }, { "epoch": 0.17591719915954035, "grad_norm": 0.8766272555953475, "learning_rate": 5.886189493399253e-06, "loss": 0.05611419677734375, "step": 20345 }, { "epoch": 0.1759604326810836, "grad_norm": 1.1545607983383712, "learning_rate": 5.886133902873124e-06, "loss": 0.29300365447998045, "step": 20350 }, { "epoch": 0.17600366620262686, "grad_norm": 12.664555267656386, "learning_rate": 5.886078299036399e-06, "loss": 0.07386322021484375, "step": 20355 }, { "epoch": 0.17604689972417012, "grad_norm": 14.377997806313154, "learning_rate": 5.886022681889338e-06, "loss": 0.05372467041015625, "step": 20360 }, { "epoch": 0.1760901332457134, "grad_norm": 37.40320271666378, "learning_rate": 5.8859670514321936e-06, "loss": 0.23583984375, "step": 20365 }, { "epoch": 0.17613336676725666, "grad_norm": 4.262133313394518, "learning_rate": 5.885911407665225e-06, "loss": 0.23051605224609376, "step": 20370 }, { "epoch": 0.17617660028879992, "grad_norm": 36.28018965933038, "learning_rate": 5.885855750588688e-06, "loss": 0.44923095703125, "step": 20375 }, { "epoch": 0.17621983381034317, "grad_norm": 9.278750354989834, "learning_rate": 5.88580008020284e-06, "loss": 0.2690948486328125, "step": 20380 }, { "epoch": 0.17626306733188646, "grad_norm": 2.085435310834243, "learning_rate": 5.885744396507935e-06, "loss": 0.084326171875, "step": 20385 }, { "epoch": 0.17630630085342971, "grad_norm": 0.9681402064102658, "learning_rate": 5.8856886995042345e-06, "loss": 0.1080810546875, "step": 20390 }, { "epoch": 0.17634953437497297, "grad_norm": 4.221710273742367, "learning_rate": 5.885632989191991e-06, "loss": 0.36837158203125, "step": 20395 }, { "epoch": 0.17639276789651626, "grad_norm": 20.61061689445707, "learning_rate": 5.885577265571463e-06, "loss": 0.182305908203125, "step": 20400 }, { "epoch": 0.1764360014180595, "grad_norm": 1.1431655229078754, "learning_rate": 5.885521528642908e-06, "loss": 0.1372039794921875, "step": 20405 }, { "epoch": 0.17647923493960277, "grad_norm": 16.43895033686461, "learning_rate": 5.885465778406583e-06, "loss": 0.451043701171875, "step": 20410 }, { "epoch": 0.17652246846114603, "grad_norm": 0.13763876253051271, "learning_rate": 5.885410014862744e-06, "loss": 0.791741943359375, "step": 20415 }, { "epoch": 0.1765657019826893, "grad_norm": 3.561079523195175, "learning_rate": 5.88535423801165e-06, "loss": 0.12928848266601561, "step": 20420 }, { "epoch": 0.17660893550423257, "grad_norm": 4.07838142913592, "learning_rate": 5.885298447853557e-06, "loss": 0.12570343017578126, "step": 20425 }, { "epoch": 0.17665216902577582, "grad_norm": 4.841962529032994, "learning_rate": 5.885242644388722e-06, "loss": 0.1229736328125, "step": 20430 }, { "epoch": 0.17669540254731908, "grad_norm": 18.028570884559418, "learning_rate": 5.885186827617403e-06, "loss": 0.22572021484375, "step": 20435 }, { "epoch": 0.17673863606886236, "grad_norm": 9.589380895858849, "learning_rate": 5.885130997539858e-06, "loss": 0.07212066650390625, "step": 20440 }, { "epoch": 0.17678186959040562, "grad_norm": 14.112530255401255, "learning_rate": 5.885075154156343e-06, "loss": 0.2938411712646484, "step": 20445 }, { "epoch": 0.17682510311194888, "grad_norm": 2.5036953322739155, "learning_rate": 5.885019297467116e-06, "loss": 0.44122161865234377, "step": 20450 }, { "epoch": 0.17686833663349213, "grad_norm": 5.78451292852447, "learning_rate": 5.884963427472436e-06, "loss": 0.5156623840332031, "step": 20455 }, { "epoch": 0.17691157015503542, "grad_norm": 32.31280010558112, "learning_rate": 5.884907544172559e-06, "loss": 0.317822265625, "step": 20460 }, { "epoch": 0.17695480367657868, "grad_norm": 1.2803834531794915, "learning_rate": 5.884851647567743e-06, "loss": 0.267462158203125, "step": 20465 }, { "epoch": 0.17699803719812193, "grad_norm": 43.061380284725345, "learning_rate": 5.884795737658246e-06, "loss": 0.475115966796875, "step": 20470 }, { "epoch": 0.1770412707196652, "grad_norm": 3.6033226752669725, "learning_rate": 5.884739814444327e-06, "loss": 0.07650909423828126, "step": 20475 }, { "epoch": 0.17708450424120847, "grad_norm": 7.487374084161253, "learning_rate": 5.884683877926242e-06, "loss": 0.125006103515625, "step": 20480 }, { "epoch": 0.17712773776275173, "grad_norm": 9.476990238837116, "learning_rate": 5.88462792810425e-06, "loss": 0.154925537109375, "step": 20485 }, { "epoch": 0.177170971284295, "grad_norm": 11.203974681442224, "learning_rate": 5.88457196497861e-06, "loss": 0.2774862289428711, "step": 20490 }, { "epoch": 0.17721420480583824, "grad_norm": 28.292350758487917, "learning_rate": 5.884515988549578e-06, "loss": 0.2847900390625, "step": 20495 }, { "epoch": 0.17725743832738153, "grad_norm": 14.007751641215286, "learning_rate": 5.884459998817412e-06, "loss": 0.17320556640625, "step": 20500 }, { "epoch": 0.17730067184892478, "grad_norm": 13.541582211779385, "learning_rate": 5.884403995782372e-06, "loss": 0.60523681640625, "step": 20505 }, { "epoch": 0.17734390537046804, "grad_norm": 1.2881012901260187, "learning_rate": 5.884347979444717e-06, "loss": 0.1625396728515625, "step": 20510 }, { "epoch": 0.1773871388920113, "grad_norm": 2.3837356335420767, "learning_rate": 5.884291949804703e-06, "loss": 0.0842254638671875, "step": 20515 }, { "epoch": 0.17743037241355458, "grad_norm": 0.1477455655700565, "learning_rate": 5.8842359068625895e-06, "loss": 0.13549728393554689, "step": 20520 }, { "epoch": 0.17747360593509784, "grad_norm": 28.77200832883564, "learning_rate": 5.884179850618635e-06, "loss": 0.30901031494140624, "step": 20525 }, { "epoch": 0.1775168394566411, "grad_norm": 25.497010012366744, "learning_rate": 5.884123781073098e-06, "loss": 0.0937286376953125, "step": 20530 }, { "epoch": 0.17756007297818435, "grad_norm": 6.844864231488217, "learning_rate": 5.884067698226236e-06, "loss": 0.19227066040039062, "step": 20535 }, { "epoch": 0.17760330649972764, "grad_norm": 3.9844869412927677, "learning_rate": 5.884011602078309e-06, "loss": 0.050518798828125, "step": 20540 }, { "epoch": 0.1776465400212709, "grad_norm": 6.577545426953781, "learning_rate": 5.8839554926295765e-06, "loss": 0.1564117431640625, "step": 20545 }, { "epoch": 0.17768977354281415, "grad_norm": 36.72476897319931, "learning_rate": 5.883899369880295e-06, "loss": 0.23967475891113282, "step": 20550 }, { "epoch": 0.1777330070643574, "grad_norm": 49.02517966328327, "learning_rate": 5.883843233830725e-06, "loss": 0.4148681640625, "step": 20555 }, { "epoch": 0.1777762405859007, "grad_norm": 18.129558464928962, "learning_rate": 5.8837870844811245e-06, "loss": 0.14476318359375, "step": 20560 }, { "epoch": 0.17781947410744395, "grad_norm": 24.93513354939421, "learning_rate": 5.883730921831752e-06, "loss": 0.1555999755859375, "step": 20565 }, { "epoch": 0.1778627076289872, "grad_norm": 23.998687826877422, "learning_rate": 5.883674745882869e-06, "loss": 0.2106658935546875, "step": 20570 }, { "epoch": 0.1779059411505305, "grad_norm": 0.5807235458425836, "learning_rate": 5.883618556634732e-06, "loss": 0.062054443359375, "step": 20575 }, { "epoch": 0.17794917467207375, "grad_norm": 5.537833669468553, "learning_rate": 5.883562354087601e-06, "loss": 0.1599039077758789, "step": 20580 }, { "epoch": 0.177992408193617, "grad_norm": 22.445717680367792, "learning_rate": 5.883506138241735e-06, "loss": 0.14809112548828124, "step": 20585 }, { "epoch": 0.17803564171516026, "grad_norm": 8.891505589111395, "learning_rate": 5.8834499090973935e-06, "loss": 0.41282958984375, "step": 20590 }, { "epoch": 0.17807887523670354, "grad_norm": 39.616681199287754, "learning_rate": 5.883393666654837e-06, "loss": 0.30255889892578125, "step": 20595 }, { "epoch": 0.1781221087582468, "grad_norm": 7.309898098030012, "learning_rate": 5.883337410914322e-06, "loss": 0.08968429565429688, "step": 20600 }, { "epoch": 0.17816534227979006, "grad_norm": 3.9795196545948364, "learning_rate": 5.88328114187611e-06, "loss": 0.0928506851196289, "step": 20605 }, { "epoch": 0.1782085758013333, "grad_norm": 0.594662624439801, "learning_rate": 5.883224859540461e-06, "loss": 0.09317474365234375, "step": 20610 }, { "epoch": 0.1782518093228766, "grad_norm": 8.980033188265502, "learning_rate": 5.883168563907633e-06, "loss": 0.07813720703125, "step": 20615 }, { "epoch": 0.17829504284441985, "grad_norm": 1.2577412285075202, "learning_rate": 5.883112254977886e-06, "loss": 0.43222293853759763, "step": 20620 }, { "epoch": 0.1783382763659631, "grad_norm": 21.83471520282473, "learning_rate": 5.883055932751481e-06, "loss": 0.10965728759765625, "step": 20625 }, { "epoch": 0.17838150988750637, "grad_norm": 44.99042018443765, "learning_rate": 5.882999597228676e-06, "loss": 0.2867088317871094, "step": 20630 }, { "epoch": 0.17842474340904965, "grad_norm": 23.627022084562867, "learning_rate": 5.882943248409733e-06, "loss": 0.411651611328125, "step": 20635 }, { "epoch": 0.1784679769305929, "grad_norm": 15.790804605762913, "learning_rate": 5.882886886294908e-06, "loss": 0.3511634826660156, "step": 20640 }, { "epoch": 0.17851121045213617, "grad_norm": 15.039308170276525, "learning_rate": 5.882830510884466e-06, "loss": 0.13143692016601563, "step": 20645 }, { "epoch": 0.17855444397367942, "grad_norm": 7.493657564916958, "learning_rate": 5.8827741221786636e-06, "loss": 0.068939208984375, "step": 20650 }, { "epoch": 0.1785976774952227, "grad_norm": 17.203772916646944, "learning_rate": 5.88271772017776e-06, "loss": 0.352703857421875, "step": 20655 }, { "epoch": 0.17864091101676596, "grad_norm": 22.04382555151646, "learning_rate": 5.882661304882019e-06, "loss": 0.4058197021484375, "step": 20660 }, { "epoch": 0.17868414453830922, "grad_norm": 10.865889642206167, "learning_rate": 5.882604876291698e-06, "loss": 0.1023712158203125, "step": 20665 }, { "epoch": 0.17872737805985248, "grad_norm": 5.436886754690457, "learning_rate": 5.882548434407058e-06, "loss": 0.17402801513671876, "step": 20670 }, { "epoch": 0.17877061158139576, "grad_norm": 1.1284877581048236, "learning_rate": 5.88249197922836e-06, "loss": 0.11140518188476563, "step": 20675 }, { "epoch": 0.17881384510293902, "grad_norm": 53.485370242152015, "learning_rate": 5.882435510755863e-06, "loss": 0.314642333984375, "step": 20680 }, { "epoch": 0.17885707862448227, "grad_norm": 0.9208477179103253, "learning_rate": 5.882379028989828e-06, "loss": 0.315863037109375, "step": 20685 }, { "epoch": 0.17890031214602553, "grad_norm": 8.205719635554196, "learning_rate": 5.882322533930516e-06, "loss": 0.1715911865234375, "step": 20690 }, { "epoch": 0.17894354566756882, "grad_norm": 12.848970111707956, "learning_rate": 5.8822660255781875e-06, "loss": 0.20301132202148436, "step": 20695 }, { "epoch": 0.17898677918911207, "grad_norm": 12.85992116949283, "learning_rate": 5.882209503933101e-06, "loss": 0.425555419921875, "step": 20700 }, { "epoch": 0.17903001271065533, "grad_norm": 1.1044131712531984, "learning_rate": 5.882152968995521e-06, "loss": 0.42220611572265626, "step": 20705 }, { "epoch": 0.17907324623219859, "grad_norm": 18.066264440847796, "learning_rate": 5.882096420765705e-06, "loss": 0.12890625, "step": 20710 }, { "epoch": 0.17911647975374187, "grad_norm": 12.842121775717015, "learning_rate": 5.882039859243916e-06, "loss": 0.2138671875, "step": 20715 }, { "epoch": 0.17915971327528513, "grad_norm": 18.728648842513323, "learning_rate": 5.881983284430413e-06, "loss": 0.189105224609375, "step": 20720 }, { "epoch": 0.17920294679682838, "grad_norm": 18.13009042887377, "learning_rate": 5.881926696325458e-06, "loss": 0.23519744873046874, "step": 20725 }, { "epoch": 0.17924618031837164, "grad_norm": 4.359516168499685, "learning_rate": 5.881870094929312e-06, "loss": 0.3602642059326172, "step": 20730 }, { "epoch": 0.17928941383991492, "grad_norm": 12.079221640703832, "learning_rate": 5.881813480242235e-06, "loss": 0.5368011474609375, "step": 20735 }, { "epoch": 0.17933264736145818, "grad_norm": 9.513110166062662, "learning_rate": 5.88175685226449e-06, "loss": 0.06318359375, "step": 20740 }, { "epoch": 0.17937588088300144, "grad_norm": 51.3958745131504, "learning_rate": 5.881700210996336e-06, "loss": 0.354443359375, "step": 20745 }, { "epoch": 0.1794191144045447, "grad_norm": 4.211502374099748, "learning_rate": 5.881643556438035e-06, "loss": 0.32146224975585935, "step": 20750 }, { "epoch": 0.17946234792608798, "grad_norm": 6.366084277237497, "learning_rate": 5.88158688858985e-06, "loss": 0.2407379150390625, "step": 20755 }, { "epoch": 0.17950558144763124, "grad_norm": 3.555517013448199, "learning_rate": 5.88153020745204e-06, "loss": 0.2313751220703125, "step": 20760 }, { "epoch": 0.1795488149691745, "grad_norm": 21.39427366710603, "learning_rate": 5.8814735130248675e-06, "loss": 0.20904388427734374, "step": 20765 }, { "epoch": 0.17959204849071778, "grad_norm": 1.7648801121834317, "learning_rate": 5.881416805308594e-06, "loss": 0.19589691162109374, "step": 20770 }, { "epoch": 0.17963528201226103, "grad_norm": 12.982857143942665, "learning_rate": 5.88136008430348e-06, "loss": 0.21308155059814454, "step": 20775 }, { "epoch": 0.1796785155338043, "grad_norm": 0.866435593811762, "learning_rate": 5.881303350009788e-06, "loss": 0.3035003662109375, "step": 20780 }, { "epoch": 0.17972174905534755, "grad_norm": 25.942586681266416, "learning_rate": 5.88124660242778e-06, "loss": 0.20755615234375, "step": 20785 }, { "epoch": 0.17976498257689083, "grad_norm": 1.3022322959788701, "learning_rate": 5.881189841557717e-06, "loss": 0.13088455200195312, "step": 20790 }, { "epoch": 0.1798082160984341, "grad_norm": 2.1331129930868977, "learning_rate": 5.8811330673998615e-06, "loss": 0.14638137817382812, "step": 20795 }, { "epoch": 0.17985144961997734, "grad_norm": 7.551203942644651, "learning_rate": 5.881076279954474e-06, "loss": 0.24563140869140626, "step": 20800 }, { "epoch": 0.1798946831415206, "grad_norm": 35.136051903140824, "learning_rate": 5.881019479221818e-06, "loss": 0.21072998046875, "step": 20805 }, { "epoch": 0.17993791666306388, "grad_norm": 1.247241860538009, "learning_rate": 5.880962665202154e-06, "loss": 0.141363525390625, "step": 20810 }, { "epoch": 0.17998115018460714, "grad_norm": 19.72273152203791, "learning_rate": 5.8809058378957464e-06, "loss": 0.1467742919921875, "step": 20815 }, { "epoch": 0.1800243837061504, "grad_norm": 1.6745359938614177, "learning_rate": 5.8808489973028535e-06, "loss": 0.2194446563720703, "step": 20820 }, { "epoch": 0.18006761722769365, "grad_norm": 7.477188667907504, "learning_rate": 5.8807921434237414e-06, "loss": 0.08094558715820313, "step": 20825 }, { "epoch": 0.18011085074923694, "grad_norm": 18.232975105822238, "learning_rate": 5.880735276258669e-06, "loss": 0.42159500122070315, "step": 20830 }, { "epoch": 0.1801540842707802, "grad_norm": 2.7239804483862295, "learning_rate": 5.880678395807902e-06, "loss": 0.046068572998046876, "step": 20835 }, { "epoch": 0.18019731779232345, "grad_norm": 0.554118054557515, "learning_rate": 5.880621502071699e-06, "loss": 0.2348297119140625, "step": 20840 }, { "epoch": 0.1802405513138667, "grad_norm": 29.483893169424913, "learning_rate": 5.880564595050324e-06, "loss": 0.22118377685546875, "step": 20845 }, { "epoch": 0.18028378483541, "grad_norm": 15.429098095468609, "learning_rate": 5.880507674744041e-06, "loss": 0.33136444091796874, "step": 20850 }, { "epoch": 0.18032701835695325, "grad_norm": 29.545478432349775, "learning_rate": 5.8804507411531095e-06, "loss": 0.479620361328125, "step": 20855 }, { "epoch": 0.1803702518784965, "grad_norm": 6.2193435428771595, "learning_rate": 5.880393794277796e-06, "loss": 0.2135009765625, "step": 20860 }, { "epoch": 0.18041348540003976, "grad_norm": 8.032294356397468, "learning_rate": 5.880336834118359e-06, "loss": 0.03021697998046875, "step": 20865 }, { "epoch": 0.18045671892158305, "grad_norm": 6.512332157454313, "learning_rate": 5.880279860675064e-06, "loss": 0.23496551513671876, "step": 20870 }, { "epoch": 0.1804999524431263, "grad_norm": 0.13177645151304226, "learning_rate": 5.8802228739481715e-06, "loss": 0.3158843994140625, "step": 20875 }, { "epoch": 0.18054318596466956, "grad_norm": 26.320801893296117, "learning_rate": 5.880165873937946e-06, "loss": 0.3304046630859375, "step": 20880 }, { "epoch": 0.18058641948621282, "grad_norm": 0.29980919292690933, "learning_rate": 5.8801088606446516e-06, "loss": 0.07335472106933594, "step": 20885 }, { "epoch": 0.1806296530077561, "grad_norm": 36.562100116091216, "learning_rate": 5.880051834068548e-06, "loss": 0.271905517578125, "step": 20890 }, { "epoch": 0.18067288652929936, "grad_norm": 1.0525130139831689, "learning_rate": 5.879994794209901e-06, "loss": 0.2504249572753906, "step": 20895 }, { "epoch": 0.18071612005084262, "grad_norm": 21.627698195154746, "learning_rate": 5.879937741068971e-06, "loss": 0.16885986328125, "step": 20900 }, { "epoch": 0.18075935357238587, "grad_norm": 38.240012084566686, "learning_rate": 5.879880674646024e-06, "loss": 0.31091461181640623, "step": 20905 }, { "epoch": 0.18080258709392916, "grad_norm": 22.13424590035388, "learning_rate": 5.879823594941321e-06, "loss": 0.1558258056640625, "step": 20910 }, { "epoch": 0.1808458206154724, "grad_norm": 16.364173780396086, "learning_rate": 5.879766501955126e-06, "loss": 0.1675201416015625, "step": 20915 }, { "epoch": 0.18088905413701567, "grad_norm": 1.8733267599561787, "learning_rate": 5.879709395687702e-06, "loss": 0.05356922149658203, "step": 20920 }, { "epoch": 0.18093228765855893, "grad_norm": 13.472050122350364, "learning_rate": 5.879652276139313e-06, "loss": 0.053631591796875, "step": 20925 }, { "epoch": 0.1809755211801022, "grad_norm": 31.43756517664071, "learning_rate": 5.879595143310223e-06, "loss": 0.278369140625, "step": 20930 }, { "epoch": 0.18101875470164547, "grad_norm": 0.30403267921633925, "learning_rate": 5.879537997200694e-06, "loss": 0.09922027587890625, "step": 20935 }, { "epoch": 0.18106198822318872, "grad_norm": 2.3853425100080576, "learning_rate": 5.879480837810991e-06, "loss": 0.14736328125, "step": 20940 }, { "epoch": 0.181105221744732, "grad_norm": 2.231161249919396, "learning_rate": 5.8794236651413755e-06, "loss": 0.536474609375, "step": 20945 }, { "epoch": 0.18114845526627527, "grad_norm": 27.428531306023675, "learning_rate": 5.879366479192113e-06, "loss": 0.16770477294921876, "step": 20950 }, { "epoch": 0.18119168878781852, "grad_norm": 18.187155598455636, "learning_rate": 5.879309279963466e-06, "loss": 0.32613382339477537, "step": 20955 }, { "epoch": 0.18123492230936178, "grad_norm": 6.558554362579812, "learning_rate": 5.879252067455701e-06, "loss": 0.3829132080078125, "step": 20960 }, { "epoch": 0.18127815583090506, "grad_norm": 61.219404988134016, "learning_rate": 5.879194841669079e-06, "loss": 0.4983528137207031, "step": 20965 }, { "epoch": 0.18132138935244832, "grad_norm": 4.284280697190966, "learning_rate": 5.879137602603863e-06, "loss": 0.22584228515625, "step": 20970 }, { "epoch": 0.18136462287399158, "grad_norm": 1.0210915524706183, "learning_rate": 5.8790803502603214e-06, "loss": 0.0519073486328125, "step": 20975 }, { "epoch": 0.18140785639553483, "grad_norm": 4.362580131497282, "learning_rate": 5.879023084638714e-06, "loss": 0.08219070434570312, "step": 20980 }, { "epoch": 0.18145108991707812, "grad_norm": 30.787101521829467, "learning_rate": 5.878965805739308e-06, "loss": 0.5358110427856445, "step": 20985 }, { "epoch": 0.18149432343862137, "grad_norm": 8.280686726469577, "learning_rate": 5.878908513562364e-06, "loss": 0.6025543212890625, "step": 20990 }, { "epoch": 0.18153755696016463, "grad_norm": 30.73583760045838, "learning_rate": 5.87885120810815e-06, "loss": 0.1431884765625, "step": 20995 }, { "epoch": 0.1815807904817079, "grad_norm": 13.244354721627046, "learning_rate": 5.878793889376928e-06, "loss": 0.34077911376953124, "step": 21000 }, { "epoch": 0.18162402400325117, "grad_norm": 6.277995921614821, "learning_rate": 5.878736557368963e-06, "loss": 0.07233505249023438, "step": 21005 }, { "epoch": 0.18166725752479443, "grad_norm": 48.698555017138276, "learning_rate": 5.878679212084519e-06, "loss": 0.31813507080078124, "step": 21010 }, { "epoch": 0.18171049104633769, "grad_norm": 6.083726195581889, "learning_rate": 5.878621853523861e-06, "loss": 0.0984588623046875, "step": 21015 }, { "epoch": 0.18175372456788094, "grad_norm": 5.249951022214553, "learning_rate": 5.878564481687254e-06, "loss": 0.30972900390625, "step": 21020 }, { "epoch": 0.18179695808942423, "grad_norm": 11.384809749363841, "learning_rate": 5.878507096574962e-06, "loss": 0.36969528198242185, "step": 21025 }, { "epoch": 0.18184019161096748, "grad_norm": 1.6279715992921422, "learning_rate": 5.8784496981872485e-06, "loss": 0.0824005126953125, "step": 21030 }, { "epoch": 0.18188342513251074, "grad_norm": 21.86563789430987, "learning_rate": 5.878392286524381e-06, "loss": 0.23720932006835938, "step": 21035 }, { "epoch": 0.181926658654054, "grad_norm": 9.753212280949754, "learning_rate": 5.878334861586621e-06, "loss": 0.1508209228515625, "step": 21040 }, { "epoch": 0.18196989217559728, "grad_norm": 6.06890951170675, "learning_rate": 5.878277423374235e-06, "loss": 0.160791015625, "step": 21045 }, { "epoch": 0.18201312569714054, "grad_norm": 7.291526531872958, "learning_rate": 5.8782199718874885e-06, "loss": 0.11744422912597656, "step": 21050 }, { "epoch": 0.1820563592186838, "grad_norm": 23.813064536841996, "learning_rate": 5.878162507126646e-06, "loss": 0.601922607421875, "step": 21055 }, { "epoch": 0.18209959274022705, "grad_norm": 16.174267504398674, "learning_rate": 5.878105029091972e-06, "loss": 0.14524459838867188, "step": 21060 }, { "epoch": 0.18214282626177034, "grad_norm": 17.457890374159536, "learning_rate": 5.878047537783733e-06, "loss": 0.14921875, "step": 21065 }, { "epoch": 0.1821860597833136, "grad_norm": 3.920433252207952, "learning_rate": 5.877990033202191e-06, "loss": 0.0923828125, "step": 21070 }, { "epoch": 0.18222929330485685, "grad_norm": 65.54444518425429, "learning_rate": 5.877932515347614e-06, "loss": 0.705291748046875, "step": 21075 }, { "epoch": 0.1822725268264001, "grad_norm": 22.386017245310065, "learning_rate": 5.8778749842202675e-06, "loss": 0.14692840576171876, "step": 21080 }, { "epoch": 0.1823157603479434, "grad_norm": 14.602680538644861, "learning_rate": 5.877817439820415e-06, "loss": 0.2941253662109375, "step": 21085 }, { "epoch": 0.18235899386948665, "grad_norm": 2.2267634894131856, "learning_rate": 5.877759882148323e-06, "loss": 0.1958629608154297, "step": 21090 }, { "epoch": 0.1824022273910299, "grad_norm": 0.7775641637030071, "learning_rate": 5.877702311204257e-06, "loss": 0.189984130859375, "step": 21095 }, { "epoch": 0.18244546091257316, "grad_norm": 2.5569677491755725, "learning_rate": 5.877644726988482e-06, "loss": 0.07589111328125, "step": 21100 }, { "epoch": 0.18248869443411644, "grad_norm": 13.813201944574153, "learning_rate": 5.877587129501263e-06, "loss": 0.237530517578125, "step": 21105 }, { "epoch": 0.1825319279556597, "grad_norm": 22.459587501543655, "learning_rate": 5.877529518742867e-06, "loss": 0.07019271850585937, "step": 21110 }, { "epoch": 0.18257516147720296, "grad_norm": 15.701103706896822, "learning_rate": 5.877471894713559e-06, "loss": 0.1875152587890625, "step": 21115 }, { "epoch": 0.18261839499874621, "grad_norm": 16.550806912641512, "learning_rate": 5.877414257413606e-06, "loss": 0.1688446044921875, "step": 21120 }, { "epoch": 0.1826616285202895, "grad_norm": 1.0721492343123409, "learning_rate": 5.8773566068432715e-06, "loss": 0.27310791015625, "step": 21125 }, { "epoch": 0.18270486204183276, "grad_norm": 18.05818361754846, "learning_rate": 5.877298943002823e-06, "loss": 0.176275634765625, "step": 21130 }, { "epoch": 0.182748095563376, "grad_norm": 0.13049958943141146, "learning_rate": 5.877241265892526e-06, "loss": 0.055767822265625, "step": 21135 }, { "epoch": 0.1827913290849193, "grad_norm": 10.886094180023246, "learning_rate": 5.877183575512646e-06, "loss": 0.291583251953125, "step": 21140 }, { "epoch": 0.18283456260646255, "grad_norm": 28.351845848279506, "learning_rate": 5.87712587186345e-06, "loss": 0.19253921508789062, "step": 21145 }, { "epoch": 0.1828777961280058, "grad_norm": 15.992279433038828, "learning_rate": 5.8770681549452036e-06, "loss": 0.152740478515625, "step": 21150 }, { "epoch": 0.18292102964954907, "grad_norm": 10.169650396404853, "learning_rate": 5.877010424758174e-06, "loss": 0.06372947692871093, "step": 21155 }, { "epoch": 0.18296426317109235, "grad_norm": 3.6745826217007136, "learning_rate": 5.876952681302625e-06, "loss": 0.2916778564453125, "step": 21160 }, { "epoch": 0.1830074966926356, "grad_norm": 6.0057692076353675, "learning_rate": 5.876894924578826e-06, "loss": 0.13312225341796874, "step": 21165 }, { "epoch": 0.18305073021417886, "grad_norm": 0.664268744503775, "learning_rate": 5.8768371545870405e-06, "loss": 0.0862274169921875, "step": 21170 }, { "epoch": 0.18309396373572212, "grad_norm": 10.37537356544415, "learning_rate": 5.876779371327537e-06, "loss": 0.276275634765625, "step": 21175 }, { "epoch": 0.1831371972572654, "grad_norm": 3.9150673827787696, "learning_rate": 5.8767215748005805e-06, "loss": 0.09868316650390625, "step": 21180 }, { "epoch": 0.18318043077880866, "grad_norm": 7.143939309701952, "learning_rate": 5.87666376500644e-06, "loss": 0.0967742919921875, "step": 21185 }, { "epoch": 0.18322366430035192, "grad_norm": 10.981774573676566, "learning_rate": 5.876605941945379e-06, "loss": 0.3997314453125, "step": 21190 }, { "epoch": 0.18326689782189517, "grad_norm": 6.487943629832818, "learning_rate": 5.876548105617665e-06, "loss": 0.227001953125, "step": 21195 }, { "epoch": 0.18331013134343846, "grad_norm": 15.546214220123174, "learning_rate": 5.876490256023566e-06, "loss": 0.20160903930664062, "step": 21200 }, { "epoch": 0.18335336486498172, "grad_norm": 1.244799274626489, "learning_rate": 5.876432393163348e-06, "loss": 0.051025390625, "step": 21205 }, { "epoch": 0.18339659838652497, "grad_norm": 19.815803463194268, "learning_rate": 5.876374517037279e-06, "loss": 0.2514007568359375, "step": 21210 }, { "epoch": 0.18343983190806823, "grad_norm": 41.75651650722558, "learning_rate": 5.876316627645623e-06, "loss": 0.240789794921875, "step": 21215 }, { "epoch": 0.1834830654296115, "grad_norm": 17.868700336586837, "learning_rate": 5.876258724988651e-06, "loss": 0.4523681640625, "step": 21220 }, { "epoch": 0.18352629895115477, "grad_norm": 2.0784360981956658, "learning_rate": 5.876200809066626e-06, "loss": 0.18774337768554689, "step": 21225 }, { "epoch": 0.18356953247269803, "grad_norm": 17.753048443045174, "learning_rate": 5.876142879879818e-06, "loss": 0.260711669921875, "step": 21230 }, { "epoch": 0.18361276599424128, "grad_norm": 1.3996887569757976, "learning_rate": 5.876084937428493e-06, "loss": 0.2605194091796875, "step": 21235 }, { "epoch": 0.18365599951578457, "grad_norm": 6.5738761757290725, "learning_rate": 5.8760269817129185e-06, "loss": 0.22082157135009767, "step": 21240 }, { "epoch": 0.18369923303732782, "grad_norm": 3.6138791383869076, "learning_rate": 5.875969012733361e-06, "loss": 0.26627578735351565, "step": 21245 }, { "epoch": 0.18374246655887108, "grad_norm": 2.4714305246446893, "learning_rate": 5.875911030490089e-06, "loss": 0.05511627197265625, "step": 21250 }, { "epoch": 0.18378570008041434, "grad_norm": 1.8058937757969107, "learning_rate": 5.87585303498337e-06, "loss": 0.146563720703125, "step": 21255 }, { "epoch": 0.18382893360195762, "grad_norm": 43.28403912134169, "learning_rate": 5.875795026213471e-06, "loss": 0.2014089584350586, "step": 21260 }, { "epoch": 0.18387216712350088, "grad_norm": 27.659147936883695, "learning_rate": 5.875737004180658e-06, "loss": 0.33968505859375, "step": 21265 }, { "epoch": 0.18391540064504414, "grad_norm": 10.526844065055275, "learning_rate": 5.875678968885201e-06, "loss": 0.24710006713867189, "step": 21270 }, { "epoch": 0.1839586341665874, "grad_norm": 6.613396645274641, "learning_rate": 5.875620920327367e-06, "loss": 0.2482513427734375, "step": 21275 }, { "epoch": 0.18400186768813068, "grad_norm": 7.863069089721263, "learning_rate": 5.875562858507423e-06, "loss": 0.196893310546875, "step": 21280 }, { "epoch": 0.18404510120967393, "grad_norm": 11.255384125447607, "learning_rate": 5.875504783425638e-06, "loss": 0.3334564208984375, "step": 21285 }, { "epoch": 0.1840883347312172, "grad_norm": 4.989245509584092, "learning_rate": 5.8754466950822784e-06, "loss": 0.1465505599975586, "step": 21290 }, { "epoch": 0.18413156825276045, "grad_norm": 19.794868149201026, "learning_rate": 5.875388593477612e-06, "loss": 0.1364715576171875, "step": 21295 }, { "epoch": 0.18417480177430373, "grad_norm": 25.237381654760647, "learning_rate": 5.875330478611909e-06, "loss": 0.1450775146484375, "step": 21300 }, { "epoch": 0.184218035295847, "grad_norm": 3.6107883206372655, "learning_rate": 5.875272350485434e-06, "loss": 0.04890632629394531, "step": 21305 }, { "epoch": 0.18426126881739024, "grad_norm": 15.963136442739394, "learning_rate": 5.8752142090984585e-06, "loss": 0.044756317138671876, "step": 21310 }, { "epoch": 0.18430450233893353, "grad_norm": 7.688525475920161, "learning_rate": 5.8751560544512485e-06, "loss": 0.18595733642578124, "step": 21315 }, { "epoch": 0.18434773586047679, "grad_norm": 6.152638145011942, "learning_rate": 5.875097886544073e-06, "loss": 0.34970703125, "step": 21320 }, { "epoch": 0.18439096938202004, "grad_norm": 2.6175878431185136, "learning_rate": 5.8750397053772e-06, "loss": 0.0343109130859375, "step": 21325 }, { "epoch": 0.1844342029035633, "grad_norm": 25.042501900441, "learning_rate": 5.874981510950898e-06, "loss": 0.19412841796875, "step": 21330 }, { "epoch": 0.18447743642510658, "grad_norm": 8.740471222308202, "learning_rate": 5.8749233032654345e-06, "loss": 0.09729480743408203, "step": 21335 }, { "epoch": 0.18452066994664984, "grad_norm": 1.8194068868900393, "learning_rate": 5.874865082321079e-06, "loss": 0.2840904235839844, "step": 21340 }, { "epoch": 0.1845639034681931, "grad_norm": 3.7285291931559827, "learning_rate": 5.874806848118101e-06, "loss": 0.1801910400390625, "step": 21345 }, { "epoch": 0.18460713698973635, "grad_norm": 7.854263115076854, "learning_rate": 5.874748600656767e-06, "loss": 0.38015098571777345, "step": 21350 }, { "epoch": 0.18465037051127964, "grad_norm": 1.7216268162593646, "learning_rate": 5.8746903399373475e-06, "loss": 0.1073455810546875, "step": 21355 }, { "epoch": 0.1846936040328229, "grad_norm": 18.081612098285202, "learning_rate": 5.8746320659601095e-06, "loss": 0.3122261047363281, "step": 21360 }, { "epoch": 0.18473683755436615, "grad_norm": 41.29263539262714, "learning_rate": 5.874573778725321e-06, "loss": 0.160260009765625, "step": 21365 }, { "epoch": 0.1847800710759094, "grad_norm": 13.544073203996906, "learning_rate": 5.874515478233254e-06, "loss": 0.62144775390625, "step": 21370 }, { "epoch": 0.1848233045974527, "grad_norm": 14.046156626735309, "learning_rate": 5.8744571644841745e-06, "loss": 0.06530990600585937, "step": 21375 }, { "epoch": 0.18486653811899595, "grad_norm": 7.510675271533575, "learning_rate": 5.874398837478353e-06, "loss": 0.11317520141601563, "step": 21380 }, { "epoch": 0.1849097716405392, "grad_norm": 14.474104851288516, "learning_rate": 5.874340497216058e-06, "loss": 0.09023056030273438, "step": 21385 }, { "epoch": 0.18495300516208246, "grad_norm": 7.349382372304474, "learning_rate": 5.8742821436975595e-06, "loss": 0.19891128540039063, "step": 21390 }, { "epoch": 0.18499623868362575, "grad_norm": 21.829158476593125, "learning_rate": 5.874223776923125e-06, "loss": 0.125872802734375, "step": 21395 }, { "epoch": 0.185039472205169, "grad_norm": 19.625509388223715, "learning_rate": 5.874165396893024e-06, "loss": 0.27794342041015624, "step": 21400 }, { "epoch": 0.18508270572671226, "grad_norm": 42.825008885617734, "learning_rate": 5.874107003607526e-06, "loss": 0.39981765747070314, "step": 21405 }, { "epoch": 0.18512593924825552, "grad_norm": 4.864061522705223, "learning_rate": 5.874048597066902e-06, "loss": 0.38833770751953123, "step": 21410 }, { "epoch": 0.1851691727697988, "grad_norm": 27.806103160734658, "learning_rate": 5.873990177271418e-06, "loss": 0.17121124267578125, "step": 21415 }, { "epoch": 0.18521240629134206, "grad_norm": 76.83814351659662, "learning_rate": 5.873931744221346e-06, "loss": 0.360968017578125, "step": 21420 }, { "epoch": 0.18525563981288531, "grad_norm": 0.322720021614988, "learning_rate": 5.873873297916955e-06, "loss": 0.193212890625, "step": 21425 }, { "epoch": 0.18529887333442857, "grad_norm": 0.3356735181195824, "learning_rate": 5.8738148383585146e-06, "loss": 0.15351715087890624, "step": 21430 }, { "epoch": 0.18534210685597186, "grad_norm": 2.931592341144086, "learning_rate": 5.8737563655462935e-06, "loss": 0.13985595703125, "step": 21435 }, { "epoch": 0.1853853403775151, "grad_norm": 4.823450114319843, "learning_rate": 5.873697879480562e-06, "loss": 0.483734130859375, "step": 21440 }, { "epoch": 0.18542857389905837, "grad_norm": 9.270185041964124, "learning_rate": 5.873639380161589e-06, "loss": 0.18223495483398439, "step": 21445 }, { "epoch": 0.18547180742060163, "grad_norm": 0.6316892189199569, "learning_rate": 5.873580867589647e-06, "loss": 0.22694320678710939, "step": 21450 }, { "epoch": 0.1855150409421449, "grad_norm": 9.003634167058046, "learning_rate": 5.873522341765003e-06, "loss": 0.1073974609375, "step": 21455 }, { "epoch": 0.18555827446368817, "grad_norm": 14.43590956438359, "learning_rate": 5.8734638026879275e-06, "loss": 0.0869354248046875, "step": 21460 }, { "epoch": 0.18560150798523142, "grad_norm": 60.86973526681963, "learning_rate": 5.873405250358691e-06, "loss": 0.588525390625, "step": 21465 }, { "epoch": 0.18564474150677468, "grad_norm": 29.51865976020598, "learning_rate": 5.873346684777564e-06, "loss": 0.38738555908203126, "step": 21470 }, { "epoch": 0.18568797502831796, "grad_norm": 4.493537687959559, "learning_rate": 5.873288105944815e-06, "loss": 0.472650146484375, "step": 21475 }, { "epoch": 0.18573120854986122, "grad_norm": 5.115071190057457, "learning_rate": 5.8732295138607165e-06, "loss": 0.3153656005859375, "step": 21480 }, { "epoch": 0.18577444207140448, "grad_norm": 0.19389489773583451, "learning_rate": 5.8731709085255355e-06, "loss": 0.16612930297851564, "step": 21485 }, { "epoch": 0.18581767559294773, "grad_norm": 40.743315746390635, "learning_rate": 5.873112289939546e-06, "loss": 0.3138313293457031, "step": 21490 }, { "epoch": 0.18586090911449102, "grad_norm": 3.3352864514295066, "learning_rate": 5.873053658103015e-06, "loss": 0.08569259643554687, "step": 21495 }, { "epoch": 0.18590414263603428, "grad_norm": 2.444473683745108, "learning_rate": 5.872995013016215e-06, "loss": 0.161688232421875, "step": 21500 }, { "epoch": 0.18594737615757753, "grad_norm": 0.2989995230680304, "learning_rate": 5.872936354679417e-06, "loss": 0.03878173828125, "step": 21505 }, { "epoch": 0.18599060967912082, "grad_norm": 2.2778929580661558, "learning_rate": 5.872877683092889e-06, "loss": 0.388970947265625, "step": 21510 }, { "epoch": 0.18603384320066407, "grad_norm": 20.42550569948896, "learning_rate": 5.872818998256902e-06, "loss": 0.28830108642578123, "step": 21515 }, { "epoch": 0.18607707672220733, "grad_norm": 3.7028909999039845, "learning_rate": 5.8727603001717295e-06, "loss": 0.0566558837890625, "step": 21520 }, { "epoch": 0.1861203102437506, "grad_norm": 6.102710664751748, "learning_rate": 5.872701588837639e-06, "loss": 0.07188034057617188, "step": 21525 }, { "epoch": 0.18616354376529387, "grad_norm": 70.93970298441732, "learning_rate": 5.8726428642549035e-06, "loss": 0.34330215454101565, "step": 21530 }, { "epoch": 0.18620677728683713, "grad_norm": 7.405039959288593, "learning_rate": 5.872584126423793e-06, "loss": 0.2294342041015625, "step": 21535 }, { "epoch": 0.18625001080838038, "grad_norm": 31.506341492516, "learning_rate": 5.8725253753445784e-06, "loss": 0.3039115905761719, "step": 21540 }, { "epoch": 0.18629324432992364, "grad_norm": 2.866934924957292, "learning_rate": 5.87246661101753e-06, "loss": 0.1198486328125, "step": 21545 }, { "epoch": 0.18633647785146693, "grad_norm": 2.070772459379238, "learning_rate": 5.87240783344292e-06, "loss": 0.34414825439453123, "step": 21550 }, { "epoch": 0.18637971137301018, "grad_norm": 23.083584248277614, "learning_rate": 5.872349042621019e-06, "loss": 0.15300369262695312, "step": 21555 }, { "epoch": 0.18642294489455344, "grad_norm": 1.5673723616251711, "learning_rate": 5.872290238552097e-06, "loss": 0.073834228515625, "step": 21560 }, { "epoch": 0.1864661784160967, "grad_norm": 4.193672728865252, "learning_rate": 5.872231421236427e-06, "loss": 0.2086181640625, "step": 21565 }, { "epoch": 0.18650941193763998, "grad_norm": 7.968397348939357, "learning_rate": 5.87217259067428e-06, "loss": 0.1777679443359375, "step": 21570 }, { "epoch": 0.18655264545918324, "grad_norm": 2.7655815900702443, "learning_rate": 5.872113746865926e-06, "loss": 0.26619300842285154, "step": 21575 }, { "epoch": 0.1865958789807265, "grad_norm": 12.667968918968167, "learning_rate": 5.8720548898116365e-06, "loss": 0.1449554443359375, "step": 21580 }, { "epoch": 0.18663911250226975, "grad_norm": 0.10157506885393351, "learning_rate": 5.871996019511685e-06, "loss": 0.27976455688476565, "step": 21585 }, { "epoch": 0.18668234602381303, "grad_norm": 17.12562109305868, "learning_rate": 5.871937135966341e-06, "loss": 0.12190093994140624, "step": 21590 }, { "epoch": 0.1867255795453563, "grad_norm": 35.28049600861382, "learning_rate": 5.871878239175876e-06, "loss": 0.2783447265625, "step": 21595 }, { "epoch": 0.18676881306689955, "grad_norm": 6.957375651871472, "learning_rate": 5.871819329140564e-06, "loss": 0.364288330078125, "step": 21600 }, { "epoch": 0.1868120465884428, "grad_norm": 6.873190287294949, "learning_rate": 5.871760405860674e-06, "loss": 0.0915283203125, "step": 21605 }, { "epoch": 0.1868552801099861, "grad_norm": 4.3578548767678225, "learning_rate": 5.871701469336478e-06, "loss": 0.1156036376953125, "step": 21610 }, { "epoch": 0.18689851363152934, "grad_norm": 16.89158287544615, "learning_rate": 5.87164251956825e-06, "loss": 0.18517036437988282, "step": 21615 }, { "epoch": 0.1869417471530726, "grad_norm": 62.57817340065569, "learning_rate": 5.87158355655626e-06, "loss": 0.267523193359375, "step": 21620 }, { "epoch": 0.18698498067461586, "grad_norm": 36.0682800007237, "learning_rate": 5.87152458030078e-06, "loss": 0.555828857421875, "step": 21625 }, { "epoch": 0.18702821419615914, "grad_norm": 31.86724291760991, "learning_rate": 5.871465590802083e-06, "loss": 0.15604667663574218, "step": 21630 }, { "epoch": 0.1870714477177024, "grad_norm": 11.010820285330896, "learning_rate": 5.871406588060439e-06, "loss": 0.28895263671875, "step": 21635 }, { "epoch": 0.18711468123924566, "grad_norm": 0.5484750954171665, "learning_rate": 5.871347572076123e-06, "loss": 0.07703514099121093, "step": 21640 }, { "epoch": 0.1871579147607889, "grad_norm": 3.936667291275616, "learning_rate": 5.871288542849406e-06, "loss": 0.16003570556640626, "step": 21645 }, { "epoch": 0.1872011482823322, "grad_norm": 48.14679630876382, "learning_rate": 5.871229500380558e-06, "loss": 0.26051025390625, "step": 21650 }, { "epoch": 0.18724438180387545, "grad_norm": 7.573381727094162, "learning_rate": 5.871170444669855e-06, "loss": 0.40577144622802735, "step": 21655 }, { "epoch": 0.1872876153254187, "grad_norm": 22.166773761647374, "learning_rate": 5.871111375717567e-06, "loss": 0.20772476196289064, "step": 21660 }, { "epoch": 0.18733084884696197, "grad_norm": 40.02180656352921, "learning_rate": 5.871052293523967e-06, "loss": 0.5137252807617188, "step": 21665 }, { "epoch": 0.18737408236850525, "grad_norm": 2.9582207826704563, "learning_rate": 5.8709931980893276e-06, "loss": 0.22626876831054688, "step": 21670 }, { "epoch": 0.1874173158900485, "grad_norm": 58.756983291058845, "learning_rate": 5.8709340894139216e-06, "loss": 0.335467529296875, "step": 21675 }, { "epoch": 0.18746054941159176, "grad_norm": 2.510193395412185, "learning_rate": 5.870874967498021e-06, "loss": 0.126580810546875, "step": 21680 }, { "epoch": 0.18750378293313505, "grad_norm": 2.855788492392981, "learning_rate": 5.8708158323418985e-06, "loss": 0.09204559326171875, "step": 21685 }, { "epoch": 0.1875470164546783, "grad_norm": 33.528877600230445, "learning_rate": 5.870756683945827e-06, "loss": 0.0904571533203125, "step": 21690 }, { "epoch": 0.18759024997622156, "grad_norm": 1.2169061359827762, "learning_rate": 5.87069752231008e-06, "loss": 0.05661849975585938, "step": 21695 }, { "epoch": 0.18763348349776482, "grad_norm": 12.249559139335368, "learning_rate": 5.870638347434928e-06, "loss": 0.22839202880859374, "step": 21700 }, { "epoch": 0.1876767170193081, "grad_norm": 4.620040852990479, "learning_rate": 5.870579159320648e-06, "loss": 0.0919891357421875, "step": 21705 }, { "epoch": 0.18771995054085136, "grad_norm": 4.479639675291097, "learning_rate": 5.87051995796751e-06, "loss": 0.13033065795898438, "step": 21710 }, { "epoch": 0.18776318406239462, "grad_norm": 10.502424376660041, "learning_rate": 5.8704607433757865e-06, "loss": 0.0541168212890625, "step": 21715 }, { "epoch": 0.18780641758393787, "grad_norm": 33.350536966962174, "learning_rate": 5.870401515545752e-06, "loss": 0.4574249267578125, "step": 21720 }, { "epoch": 0.18784965110548116, "grad_norm": 0.16145544874980508, "learning_rate": 5.870342274477679e-06, "loss": 0.15100326538085937, "step": 21725 }, { "epoch": 0.18789288462702441, "grad_norm": 77.26649436136748, "learning_rate": 5.870283020171842e-06, "loss": 0.8060028076171875, "step": 21730 }, { "epoch": 0.18793611814856767, "grad_norm": 6.712572145738963, "learning_rate": 5.870223752628514e-06, "loss": 0.0880340576171875, "step": 21735 }, { "epoch": 0.18797935167011093, "grad_norm": 31.318941807288574, "learning_rate": 5.870164471847965e-06, "loss": 0.4479278564453125, "step": 21740 }, { "epoch": 0.1880225851916542, "grad_norm": 1.1667908692548457, "learning_rate": 5.870105177830473e-06, "loss": 0.45706787109375, "step": 21745 }, { "epoch": 0.18806581871319747, "grad_norm": 1.5096651592106136, "learning_rate": 5.870045870576309e-06, "loss": 0.41570587158203126, "step": 21750 }, { "epoch": 0.18810905223474073, "grad_norm": 4.138212971496417, "learning_rate": 5.869986550085747e-06, "loss": 0.44744873046875, "step": 21755 }, { "epoch": 0.18815228575628398, "grad_norm": 17.614690606463345, "learning_rate": 5.8699272163590615e-06, "loss": 0.14906005859375, "step": 21760 }, { "epoch": 0.18819551927782727, "grad_norm": 14.916010253649244, "learning_rate": 5.869867869396525e-06, "loss": 0.27501983642578126, "step": 21765 }, { "epoch": 0.18823875279937052, "grad_norm": 6.832659263896433, "learning_rate": 5.869808509198411e-06, "loss": 0.3213714599609375, "step": 21770 }, { "epoch": 0.18828198632091378, "grad_norm": 9.645305820827316, "learning_rate": 5.8697491357649935e-06, "loss": 0.22667083740234376, "step": 21775 }, { "epoch": 0.18832521984245704, "grad_norm": 20.089844508288518, "learning_rate": 5.869689749096546e-06, "loss": 0.158880615234375, "step": 21780 }, { "epoch": 0.18836845336400032, "grad_norm": 1.7011072631245103, "learning_rate": 5.869630349193345e-06, "loss": 0.5528228759765625, "step": 21785 }, { "epoch": 0.18841168688554358, "grad_norm": 4.54667794659182, "learning_rate": 5.86957093605566e-06, "loss": 0.0335601806640625, "step": 21790 }, { "epoch": 0.18845492040708683, "grad_norm": 42.642287141230454, "learning_rate": 5.8695115096837695e-06, "loss": 0.20259323120117187, "step": 21795 }, { "epoch": 0.1884981539286301, "grad_norm": 20.67540679321583, "learning_rate": 5.869452070077944e-06, "loss": 0.1060882568359375, "step": 21800 }, { "epoch": 0.18854138745017338, "grad_norm": 0.5372988133988945, "learning_rate": 5.869392617238461e-06, "loss": 0.22964630126953126, "step": 21805 }, { "epoch": 0.18858462097171663, "grad_norm": 6.1218673705833435, "learning_rate": 5.86933315116559e-06, "loss": 0.3286285400390625, "step": 21810 }, { "epoch": 0.1886278544932599, "grad_norm": 16.147121333313123, "learning_rate": 5.86927367185961e-06, "loss": 0.3448028564453125, "step": 21815 }, { "epoch": 0.18867108801480315, "grad_norm": 3.8933845674628174, "learning_rate": 5.869214179320794e-06, "loss": 0.13477783203125, "step": 21820 }, { "epoch": 0.18871432153634643, "grad_norm": 1.2096166029460884, "learning_rate": 5.869154673549413e-06, "loss": 0.19449996948242188, "step": 21825 }, { "epoch": 0.1887575550578897, "grad_norm": 34.29348467853847, "learning_rate": 5.869095154545747e-06, "loss": 0.2534149169921875, "step": 21830 }, { "epoch": 0.18880078857943294, "grad_norm": 25.921399924430933, "learning_rate": 5.8690356223100665e-06, "loss": 0.12001419067382812, "step": 21835 }, { "epoch": 0.1888440221009762, "grad_norm": 35.31289789716918, "learning_rate": 5.868976076842647e-06, "loss": 0.1888946533203125, "step": 21840 }, { "epoch": 0.18888725562251948, "grad_norm": 4.332640648422631, "learning_rate": 5.868916518143763e-06, "loss": 0.21834335327148438, "step": 21845 }, { "epoch": 0.18893048914406274, "grad_norm": 38.81491649951832, "learning_rate": 5.8688569462136905e-06, "loss": 0.430487060546875, "step": 21850 }, { "epoch": 0.188973722665606, "grad_norm": 2.561906926556733, "learning_rate": 5.868797361052703e-06, "loss": 0.2346466064453125, "step": 21855 }, { "epoch": 0.18901695618714925, "grad_norm": 30.109301601466655, "learning_rate": 5.868737762661075e-06, "loss": 0.33575439453125, "step": 21860 }, { "epoch": 0.18906018970869254, "grad_norm": 14.18933047268346, "learning_rate": 5.868678151039082e-06, "loss": 0.252716064453125, "step": 21865 }, { "epoch": 0.1891034232302358, "grad_norm": 6.346301796620062, "learning_rate": 5.868618526187e-06, "loss": 0.14566650390625, "step": 21870 }, { "epoch": 0.18914665675177905, "grad_norm": 0.14336176889029456, "learning_rate": 5.8685588881051015e-06, "loss": 0.127020263671875, "step": 21875 }, { "epoch": 0.18918989027332234, "grad_norm": 27.88099257338855, "learning_rate": 5.868499236793664e-06, "loss": 0.21579742431640625, "step": 21880 }, { "epoch": 0.1892331237948656, "grad_norm": 4.52706128284278, "learning_rate": 5.868439572252961e-06, "loss": 0.46593399047851564, "step": 21885 }, { "epoch": 0.18927635731640885, "grad_norm": 7.846608124189647, "learning_rate": 5.8683798944832676e-06, "loss": 0.2235321044921875, "step": 21890 }, { "epoch": 0.1893195908379521, "grad_norm": 4.962388208262404, "learning_rate": 5.868320203484859e-06, "loss": 0.4901702880859375, "step": 21895 }, { "epoch": 0.1893628243594954, "grad_norm": 16.097427602599144, "learning_rate": 5.868260499258012e-06, "loss": 0.2104217529296875, "step": 21900 }, { "epoch": 0.18940605788103865, "grad_norm": 25.894960262365462, "learning_rate": 5.868200781803002e-06, "loss": 0.53699951171875, "step": 21905 }, { "epoch": 0.1894492914025819, "grad_norm": 6.399108753970528, "learning_rate": 5.868141051120102e-06, "loss": 0.0273529052734375, "step": 21910 }, { "epoch": 0.18949252492412516, "grad_norm": 33.34811488510714, "learning_rate": 5.86808130720959e-06, "loss": 0.11507797241210938, "step": 21915 }, { "epoch": 0.18953575844566845, "grad_norm": 53.83858406299912, "learning_rate": 5.8680215500717395e-06, "loss": 0.1959197998046875, "step": 21920 }, { "epoch": 0.1895789919672117, "grad_norm": 9.488430142952334, "learning_rate": 5.867961779706827e-06, "loss": 0.0822052001953125, "step": 21925 }, { "epoch": 0.18962222548875496, "grad_norm": 1.9753128346069275, "learning_rate": 5.867901996115129e-06, "loss": 0.3820281982421875, "step": 21930 }, { "epoch": 0.18966545901029822, "grad_norm": 8.040290255892934, "learning_rate": 5.867842199296919e-06, "loss": 0.2437774658203125, "step": 21935 }, { "epoch": 0.1897086925318415, "grad_norm": 14.105743236124624, "learning_rate": 5.8677823892524755e-06, "loss": 0.236614990234375, "step": 21940 }, { "epoch": 0.18975192605338476, "grad_norm": 5.387871618453326, "learning_rate": 5.867722565982073e-06, "loss": 0.10797958374023438, "step": 21945 }, { "epoch": 0.189795159574928, "grad_norm": 2.2063994493728467, "learning_rate": 5.867662729485987e-06, "loss": 0.699530029296875, "step": 21950 }, { "epoch": 0.18983839309647127, "grad_norm": 17.304399289552087, "learning_rate": 5.867602879764494e-06, "loss": 0.329034423828125, "step": 21955 }, { "epoch": 0.18988162661801455, "grad_norm": 2.014658601179748, "learning_rate": 5.86754301681787e-06, "loss": 0.043952560424804686, "step": 21960 }, { "epoch": 0.1899248601395578, "grad_norm": 0.3142217848900013, "learning_rate": 5.867483140646391e-06, "loss": 0.06913909912109376, "step": 21965 }, { "epoch": 0.18996809366110107, "grad_norm": 37.02688999371535, "learning_rate": 5.8674232512503335e-06, "loss": 0.3913749694824219, "step": 21970 }, { "epoch": 0.19001132718264432, "grad_norm": 3.344264167176065, "learning_rate": 5.867363348629973e-06, "loss": 0.06228713989257813, "step": 21975 }, { "epoch": 0.1900545607041876, "grad_norm": 21.14040580651527, "learning_rate": 5.867303432785585e-06, "loss": 0.3863525390625, "step": 21980 }, { "epoch": 0.19009779422573087, "grad_norm": 2.6248645903546124, "learning_rate": 5.867243503717448e-06, "loss": 0.1903411865234375, "step": 21985 }, { "epoch": 0.19014102774727412, "grad_norm": 5.496045530314243, "learning_rate": 5.867183561425837e-06, "loss": 0.06944046020507813, "step": 21990 }, { "epoch": 0.19018426126881738, "grad_norm": 22.29417545714611, "learning_rate": 5.86712360591103e-06, "loss": 0.24349365234375, "step": 21995 }, { "epoch": 0.19022749479036066, "grad_norm": 6.342552022538197, "learning_rate": 5.867063637173301e-06, "loss": 0.09793167114257813, "step": 22000 }, { "epoch": 0.19027072831190392, "grad_norm": 12.275557042709963, "learning_rate": 5.867003655212927e-06, "loss": 0.234820556640625, "step": 22005 }, { "epoch": 0.19031396183344718, "grad_norm": 9.106873907743884, "learning_rate": 5.866943660030187e-06, "loss": 0.23506088256835939, "step": 22010 }, { "epoch": 0.19035719535499043, "grad_norm": 0.9080041961279115, "learning_rate": 5.866883651625356e-06, "loss": 0.19497909545898437, "step": 22015 }, { "epoch": 0.19040042887653372, "grad_norm": 4.107271841295899, "learning_rate": 5.86682362999871e-06, "loss": 0.18384284973144532, "step": 22020 }, { "epoch": 0.19044366239807697, "grad_norm": 3.190589758399019, "learning_rate": 5.8667635951505276e-06, "loss": 0.05800018310546875, "step": 22025 }, { "epoch": 0.19048689591962023, "grad_norm": 0.7770540250484931, "learning_rate": 5.866703547081084e-06, "loss": 0.130230712890625, "step": 22030 }, { "epoch": 0.1905301294411635, "grad_norm": 0.8398705138860193, "learning_rate": 5.866643485790658e-06, "loss": 0.16572723388671876, "step": 22035 }, { "epoch": 0.19057336296270677, "grad_norm": 0.4814000414348885, "learning_rate": 5.866583411279526e-06, "loss": 0.0608428955078125, "step": 22040 }, { "epoch": 0.19061659648425003, "grad_norm": 12.06408146466691, "learning_rate": 5.866523323547963e-06, "loss": 0.15632553100585939, "step": 22045 }, { "epoch": 0.19065983000579328, "grad_norm": 0.5824036190477265, "learning_rate": 5.866463222596249e-06, "loss": 0.2516323089599609, "step": 22050 }, { "epoch": 0.19070306352733657, "grad_norm": 5.775307841435832, "learning_rate": 5.866403108424658e-06, "loss": 0.23399810791015624, "step": 22055 }, { "epoch": 0.19074629704887983, "grad_norm": 16.465589673148095, "learning_rate": 5.8663429810334705e-06, "loss": 0.18978729248046874, "step": 22060 }, { "epoch": 0.19078953057042308, "grad_norm": 1.5909135390957687, "learning_rate": 5.866282840422963e-06, "loss": 0.3498382568359375, "step": 22065 }, { "epoch": 0.19083276409196634, "grad_norm": 7.0158270915779655, "learning_rate": 5.866222686593411e-06, "loss": 0.08135986328125, "step": 22070 }, { "epoch": 0.19087599761350962, "grad_norm": 2.0910475858365807, "learning_rate": 5.8661625195450944e-06, "loss": 0.2966796875, "step": 22075 }, { "epoch": 0.19091923113505288, "grad_norm": 6.92177665827523, "learning_rate": 5.866102339278289e-06, "loss": 0.09849853515625, "step": 22080 }, { "epoch": 0.19096246465659614, "grad_norm": 14.27225979147242, "learning_rate": 5.866042145793273e-06, "loss": 0.13708763122558593, "step": 22085 }, { "epoch": 0.1910056981781394, "grad_norm": 6.63260182642812, "learning_rate": 5.865981939090323e-06, "loss": 0.10392074584960938, "step": 22090 }, { "epoch": 0.19104893169968268, "grad_norm": 0.2599696038948343, "learning_rate": 5.865921719169719e-06, "loss": 0.41109466552734375, "step": 22095 }, { "epoch": 0.19109216522122593, "grad_norm": 1.7566259899490742, "learning_rate": 5.865861486031737e-06, "loss": 0.049604034423828124, "step": 22100 }, { "epoch": 0.1911353987427692, "grad_norm": 0.2610433028030771, "learning_rate": 5.8658012396766546e-06, "loss": 0.07852096557617187, "step": 22105 }, { "epoch": 0.19117863226431245, "grad_norm": 23.607710482441977, "learning_rate": 5.86574098010475e-06, "loss": 0.09896163940429688, "step": 22110 }, { "epoch": 0.19122186578585573, "grad_norm": 7.1834667463321225, "learning_rate": 5.865680707316301e-06, "loss": 0.4510498046875, "step": 22115 }, { "epoch": 0.191265099307399, "grad_norm": 4.148543865554011, "learning_rate": 5.865620421311586e-06, "loss": 0.2307307243347168, "step": 22120 }, { "epoch": 0.19130833282894225, "grad_norm": 1.5396230501550756, "learning_rate": 5.865560122090883e-06, "loss": 0.2881805419921875, "step": 22125 }, { "epoch": 0.1913515663504855, "grad_norm": 1.2610373771952195, "learning_rate": 5.86549980965447e-06, "loss": 0.23908157348632814, "step": 22130 }, { "epoch": 0.1913947998720288, "grad_norm": 17.510929657339375, "learning_rate": 5.865439484002625e-06, "loss": 0.113543701171875, "step": 22135 }, { "epoch": 0.19143803339357204, "grad_norm": 27.605828828821615, "learning_rate": 5.865379145135626e-06, "loss": 0.5954818725585938, "step": 22140 }, { "epoch": 0.1914812669151153, "grad_norm": 16.097907263328317, "learning_rate": 5.865318793053752e-06, "loss": 0.17833251953125, "step": 22145 }, { "epoch": 0.19152450043665856, "grad_norm": 3.349144331921233, "learning_rate": 5.865258427757281e-06, "loss": 0.11475067138671875, "step": 22150 }, { "epoch": 0.19156773395820184, "grad_norm": 5.8242682888344905, "learning_rate": 5.865198049246491e-06, "loss": 0.157861328125, "step": 22155 }, { "epoch": 0.1916109674797451, "grad_norm": 28.331411115966194, "learning_rate": 5.865137657521661e-06, "loss": 0.1083221435546875, "step": 22160 }, { "epoch": 0.19165420100128835, "grad_norm": 16.097047241835956, "learning_rate": 5.865077252583069e-06, "loss": 0.10525455474853515, "step": 22165 }, { "epoch": 0.1916974345228316, "grad_norm": 12.446439989474738, "learning_rate": 5.865016834430994e-06, "loss": 0.3334617614746094, "step": 22170 }, { "epoch": 0.1917406680443749, "grad_norm": 31.72551361431171, "learning_rate": 5.864956403065715e-06, "loss": 0.28712997436523435, "step": 22175 }, { "epoch": 0.19178390156591815, "grad_norm": 25.46623629408629, "learning_rate": 5.864895958487509e-06, "loss": 0.29596748352050783, "step": 22180 }, { "epoch": 0.1918271350874614, "grad_norm": 55.23930821575202, "learning_rate": 5.864835500696656e-06, "loss": 0.379205322265625, "step": 22185 }, { "epoch": 0.19187036860900467, "grad_norm": 3.185288631617384, "learning_rate": 5.864775029693436e-06, "loss": 0.17557373046875, "step": 22190 }, { "epoch": 0.19191360213054795, "grad_norm": 5.293045889050281, "learning_rate": 5.864714545478127e-06, "loss": 0.02821636199951172, "step": 22195 }, { "epoch": 0.1919568356520912, "grad_norm": 0.9900347506965943, "learning_rate": 5.864654048051007e-06, "loss": 0.08417510986328125, "step": 22200 }, { "epoch": 0.19200006917363446, "grad_norm": 23.919104989818358, "learning_rate": 5.8645935374123554e-06, "loss": 0.274017333984375, "step": 22205 }, { "epoch": 0.19204330269517772, "grad_norm": 5.245887504558426, "learning_rate": 5.864533013562452e-06, "loss": 0.13103790283203126, "step": 22210 }, { "epoch": 0.192086536216721, "grad_norm": 28.722278410719476, "learning_rate": 5.864472476501574e-06, "loss": 0.24517822265625, "step": 22215 }, { "epoch": 0.19212976973826426, "grad_norm": 48.04565493576998, "learning_rate": 5.864411926230004e-06, "loss": 0.42476806640625, "step": 22220 }, { "epoch": 0.19217300325980752, "grad_norm": 35.02020218852999, "learning_rate": 5.864351362748018e-06, "loss": 0.1750244140625, "step": 22225 }, { "epoch": 0.19221623678135077, "grad_norm": 0.5404396572266504, "learning_rate": 5.8642907860558976e-06, "loss": 0.240240478515625, "step": 22230 }, { "epoch": 0.19225947030289406, "grad_norm": 15.27480742592431, "learning_rate": 5.864230196153921e-06, "loss": 0.2543933868408203, "step": 22235 }, { "epoch": 0.19230270382443732, "grad_norm": 7.264772085789342, "learning_rate": 5.864169593042367e-06, "loss": 0.0937042236328125, "step": 22240 }, { "epoch": 0.19234593734598057, "grad_norm": 0.5683320074380758, "learning_rate": 5.864108976721516e-06, "loss": 0.11910400390625, "step": 22245 }, { "epoch": 0.19238917086752386, "grad_norm": 2.2736044376755773, "learning_rate": 5.864048347191648e-06, "loss": 0.5635406494140625, "step": 22250 }, { "epoch": 0.1924324043890671, "grad_norm": 20.810958470705486, "learning_rate": 5.863987704453042e-06, "loss": 0.114874267578125, "step": 22255 }, { "epoch": 0.19247563791061037, "grad_norm": 2.134457362184891, "learning_rate": 5.863927048505977e-06, "loss": 0.18774452209472656, "step": 22260 }, { "epoch": 0.19251887143215363, "grad_norm": 2.531022558258962, "learning_rate": 5.863866379350735e-06, "loss": 0.1555255889892578, "step": 22265 }, { "epoch": 0.1925621049536969, "grad_norm": 7.4352415046215405, "learning_rate": 5.863805696987594e-06, "loss": 0.09129486083984376, "step": 22270 }, { "epoch": 0.19260533847524017, "grad_norm": 8.812657233212507, "learning_rate": 5.863745001416833e-06, "loss": 0.1321990966796875, "step": 22275 }, { "epoch": 0.19264857199678342, "grad_norm": 3.1631169179572813, "learning_rate": 5.863684292638734e-06, "loss": 0.04957313537597656, "step": 22280 }, { "epoch": 0.19269180551832668, "grad_norm": 4.431789140704108, "learning_rate": 5.863623570653576e-06, "loss": 0.26738739013671875, "step": 22285 }, { "epoch": 0.19273503903986997, "grad_norm": 23.711062389787635, "learning_rate": 5.863562835461639e-06, "loss": 0.24573974609375, "step": 22290 }, { "epoch": 0.19277827256141322, "grad_norm": 4.584166736638355, "learning_rate": 5.863502087063203e-06, "loss": 0.1864837646484375, "step": 22295 }, { "epoch": 0.19282150608295648, "grad_norm": 19.6933033562399, "learning_rate": 5.863441325458549e-06, "loss": 0.45753021240234376, "step": 22300 }, { "epoch": 0.19286473960449974, "grad_norm": 13.536719870648914, "learning_rate": 5.863380550647956e-06, "loss": 0.30605545043945315, "step": 22305 }, { "epoch": 0.19290797312604302, "grad_norm": 5.232690271931493, "learning_rate": 5.863319762631706e-06, "loss": 0.1760650634765625, "step": 22310 }, { "epoch": 0.19295120664758628, "grad_norm": 11.487274562616234, "learning_rate": 5.8632589614100765e-06, "loss": 0.08151741027832031, "step": 22315 }, { "epoch": 0.19299444016912953, "grad_norm": 44.56088104442664, "learning_rate": 5.863198146983351e-06, "loss": 0.20796356201171876, "step": 22320 }, { "epoch": 0.1930376736906728, "grad_norm": 2.4054692760637373, "learning_rate": 5.8631373193518086e-06, "loss": 0.28399906158447263, "step": 22325 }, { "epoch": 0.19308090721221607, "grad_norm": 22.30970374894719, "learning_rate": 5.863076478515729e-06, "loss": 0.0831268310546875, "step": 22330 }, { "epoch": 0.19312414073375933, "grad_norm": 20.871242867924447, "learning_rate": 5.863015624475394e-06, "loss": 0.1897674560546875, "step": 22335 }, { "epoch": 0.1931673742553026, "grad_norm": 28.06622946061526, "learning_rate": 5.862954757231085e-06, "loss": 0.23481292724609376, "step": 22340 }, { "epoch": 0.19321060777684584, "grad_norm": 0.167511364402812, "learning_rate": 5.862893876783081e-06, "loss": 0.42522449493408204, "step": 22345 }, { "epoch": 0.19325384129838913, "grad_norm": 2.242432811551426, "learning_rate": 5.862832983131663e-06, "loss": 0.1756622314453125, "step": 22350 }, { "epoch": 0.19329707481993239, "grad_norm": 13.85076953971422, "learning_rate": 5.862772076277112e-06, "loss": 0.12917633056640626, "step": 22355 }, { "epoch": 0.19334030834147564, "grad_norm": 7.9701330959987535, "learning_rate": 5.862711156219709e-06, "loss": 0.29564208984375, "step": 22360 }, { "epoch": 0.1933835418630189, "grad_norm": 15.694487894078032, "learning_rate": 5.8626502229597365e-06, "loss": 0.2226654052734375, "step": 22365 }, { "epoch": 0.19342677538456218, "grad_norm": 3.4803190585055233, "learning_rate": 5.8625892764974735e-06, "loss": 0.1490264892578125, "step": 22370 }, { "epoch": 0.19347000890610544, "grad_norm": 21.788393641119722, "learning_rate": 5.862528316833201e-06, "loss": 0.2419219970703125, "step": 22375 }, { "epoch": 0.1935132424276487, "grad_norm": 0.7000363538270554, "learning_rate": 5.8624673439672015e-06, "loss": 0.3144340515136719, "step": 22380 }, { "epoch": 0.19355647594919195, "grad_norm": 6.321182345388734, "learning_rate": 5.862406357899756e-06, "loss": 0.11888313293457031, "step": 22385 }, { "epoch": 0.19359970947073524, "grad_norm": 43.909196958214416, "learning_rate": 5.862345358631145e-06, "loss": 0.3396888732910156, "step": 22390 }, { "epoch": 0.1936429429922785, "grad_norm": 18.37890050003508, "learning_rate": 5.862284346161649e-06, "loss": 0.1387847900390625, "step": 22395 }, { "epoch": 0.19368617651382175, "grad_norm": 39.641979627685735, "learning_rate": 5.862223320491551e-06, "loss": 0.372930908203125, "step": 22400 }, { "epoch": 0.193729410035365, "grad_norm": 9.786744133204346, "learning_rate": 5.862162281621132e-06, "loss": 0.15721893310546875, "step": 22405 }, { "epoch": 0.1937726435569083, "grad_norm": 3.710236246690078, "learning_rate": 5.862101229550675e-06, "loss": 0.337164306640625, "step": 22410 }, { "epoch": 0.19381587707845155, "grad_norm": 3.010931196279188, "learning_rate": 5.862040164280458e-06, "loss": 0.249188232421875, "step": 22415 }, { "epoch": 0.1938591105999948, "grad_norm": 12.953945509708701, "learning_rate": 5.861979085810766e-06, "loss": 0.1020172119140625, "step": 22420 }, { "epoch": 0.1939023441215381, "grad_norm": 16.63671107648014, "learning_rate": 5.861917994141878e-06, "loss": 0.15240936279296874, "step": 22425 }, { "epoch": 0.19394557764308135, "grad_norm": 5.196422995878376, "learning_rate": 5.861856889274077e-06, "loss": 0.32598876953125, "step": 22430 }, { "epoch": 0.1939888111646246, "grad_norm": 1.6892727199675222, "learning_rate": 5.861795771207647e-06, "loss": 0.08867340087890625, "step": 22435 }, { "epoch": 0.19403204468616786, "grad_norm": 3.1903675804178895, "learning_rate": 5.861734639942866e-06, "loss": 0.0494415283203125, "step": 22440 }, { "epoch": 0.19407527820771114, "grad_norm": 21.13102912226736, "learning_rate": 5.861673495480019e-06, "loss": 0.187725830078125, "step": 22445 }, { "epoch": 0.1941185117292544, "grad_norm": 29.986267188609396, "learning_rate": 5.861612337819386e-06, "loss": 0.4754920959472656, "step": 22450 }, { "epoch": 0.19416174525079766, "grad_norm": 1.5616541418153358, "learning_rate": 5.8615511669612494e-06, "loss": 0.0771575927734375, "step": 22455 }, { "epoch": 0.1942049787723409, "grad_norm": 5.239297951257349, "learning_rate": 5.8614899829058916e-06, "loss": 0.14698486328125, "step": 22460 }, { "epoch": 0.1942482122938842, "grad_norm": 11.87315089573837, "learning_rate": 5.861428785653595e-06, "loss": 0.1388916015625, "step": 22465 }, { "epoch": 0.19429144581542745, "grad_norm": 20.155679982846465, "learning_rate": 5.8613675752046424e-06, "loss": 0.318011474609375, "step": 22470 }, { "epoch": 0.1943346793369707, "grad_norm": 39.46451019144805, "learning_rate": 5.8613063515593144e-06, "loss": 0.54091796875, "step": 22475 }, { "epoch": 0.19437791285851397, "grad_norm": 3.099596381043489, "learning_rate": 5.861245114717896e-06, "loss": 0.19754791259765625, "step": 22480 }, { "epoch": 0.19442114638005725, "grad_norm": 0.884756555278389, "learning_rate": 5.861183864680665e-06, "loss": 0.10776557922363281, "step": 22485 }, { "epoch": 0.1944643799016005, "grad_norm": 32.45199299903796, "learning_rate": 5.861122601447909e-06, "loss": 0.30749053955078126, "step": 22490 }, { "epoch": 0.19450761342314377, "grad_norm": 2.0218760633601964, "learning_rate": 5.8610613250199085e-06, "loss": 0.1853729248046875, "step": 22495 }, { "epoch": 0.19455084694468702, "grad_norm": 24.38114532227512, "learning_rate": 5.861000035396946e-06, "loss": 0.152923583984375, "step": 22500 }, { "epoch": 0.1945940804662303, "grad_norm": 3.419490159368763, "learning_rate": 5.860938732579302e-06, "loss": 0.121185302734375, "step": 22505 }, { "epoch": 0.19463731398777356, "grad_norm": 34.6207291801196, "learning_rate": 5.860877416567264e-06, "loss": 0.3470947265625, "step": 22510 }, { "epoch": 0.19468054750931682, "grad_norm": 10.720418182761291, "learning_rate": 5.860816087361111e-06, "loss": 0.16121063232421876, "step": 22515 }, { "epoch": 0.19472378103086008, "grad_norm": 25.692024863011724, "learning_rate": 5.8607547449611275e-06, "loss": 0.4937713623046875, "step": 22520 }, { "epoch": 0.19476701455240336, "grad_norm": 7.293240400375852, "learning_rate": 5.860693389367595e-06, "loss": 0.327850341796875, "step": 22525 }, { "epoch": 0.19481024807394662, "grad_norm": 6.851532609872418, "learning_rate": 5.860632020580799e-06, "loss": 0.11160888671875, "step": 22530 }, { "epoch": 0.19485348159548987, "grad_norm": 0.5611751711253353, "learning_rate": 5.860570638601019e-06, "loss": 0.1134429931640625, "step": 22535 }, { "epoch": 0.19489671511703313, "grad_norm": 15.202909509282147, "learning_rate": 5.86050924342854e-06, "loss": 0.37227630615234375, "step": 22540 }, { "epoch": 0.19493994863857642, "grad_norm": 2.693503319483886, "learning_rate": 5.860447835063647e-06, "loss": 0.237567138671875, "step": 22545 }, { "epoch": 0.19498318216011967, "grad_norm": 6.378334887160531, "learning_rate": 5.86038641350662e-06, "loss": 0.12769775390625, "step": 22550 }, { "epoch": 0.19502641568166293, "grad_norm": 0.27820692795692203, "learning_rate": 5.860324978757744e-06, "loss": 0.0638397216796875, "step": 22555 }, { "epoch": 0.19506964920320619, "grad_norm": 9.720376814310296, "learning_rate": 5.860263530817301e-06, "loss": 0.192083740234375, "step": 22560 }, { "epoch": 0.19511288272474947, "grad_norm": 5.511240506772869, "learning_rate": 5.860202069685576e-06, "loss": 0.1613922119140625, "step": 22565 }, { "epoch": 0.19515611624629273, "grad_norm": 1.512231595426176, "learning_rate": 5.860140595362851e-06, "loss": 0.15387725830078125, "step": 22570 }, { "epoch": 0.19519934976783598, "grad_norm": 3.160470415420685, "learning_rate": 5.860079107849412e-06, "loss": 0.23897552490234375, "step": 22575 }, { "epoch": 0.19524258328937924, "grad_norm": 24.053506169653282, "learning_rate": 5.8600176071455396e-06, "loss": 0.46474609375, "step": 22580 }, { "epoch": 0.19528581681092252, "grad_norm": 17.32047097636393, "learning_rate": 5.85995609325152e-06, "loss": 0.264691162109375, "step": 22585 }, { "epoch": 0.19532905033246578, "grad_norm": 9.408323138148452, "learning_rate": 5.859894566167635e-06, "loss": 0.132952880859375, "step": 22590 }, { "epoch": 0.19537228385400904, "grad_norm": 16.595350512238188, "learning_rate": 5.859833025894169e-06, "loss": 0.105450439453125, "step": 22595 }, { "epoch": 0.1954155173755523, "grad_norm": 7.697034346476442, "learning_rate": 5.859771472431404e-06, "loss": 0.1700664520263672, "step": 22600 }, { "epoch": 0.19545875089709558, "grad_norm": 14.19911849725456, "learning_rate": 5.8597099057796276e-06, "loss": 0.1345947265625, "step": 22605 }, { "epoch": 0.19550198441863884, "grad_norm": 3.083200444312876, "learning_rate": 5.859648325939122e-06, "loss": 0.10961532592773438, "step": 22610 }, { "epoch": 0.1955452179401821, "grad_norm": 15.948955522197878, "learning_rate": 5.85958673291017e-06, "loss": 0.14294891357421874, "step": 22615 }, { "epoch": 0.19558845146172538, "grad_norm": 0.3830197541496184, "learning_rate": 5.859525126693057e-06, "loss": 0.2228790283203125, "step": 22620 }, { "epoch": 0.19563168498326863, "grad_norm": 0.5410022231483386, "learning_rate": 5.859463507288067e-06, "loss": 0.16266555786132814, "step": 22625 }, { "epoch": 0.1956749185048119, "grad_norm": 40.19490185740864, "learning_rate": 5.8594018746954835e-06, "loss": 0.3453155517578125, "step": 22630 }, { "epoch": 0.19571815202635515, "grad_norm": 21.22760377183231, "learning_rate": 5.859340228915591e-06, "loss": 0.114813232421875, "step": 22635 }, { "epoch": 0.19576138554789843, "grad_norm": 2.8990383777805646, "learning_rate": 5.859278569948675e-06, "loss": 0.05804367065429687, "step": 22640 }, { "epoch": 0.1958046190694417, "grad_norm": 9.067655364470452, "learning_rate": 5.859216897795019e-06, "loss": 0.2764739990234375, "step": 22645 }, { "epoch": 0.19584785259098494, "grad_norm": 28.094898396644865, "learning_rate": 5.859155212454905e-06, "loss": 0.28585205078125, "step": 22650 }, { "epoch": 0.1958910861125282, "grad_norm": 8.52854711823238, "learning_rate": 5.859093513928623e-06, "loss": 0.3207206726074219, "step": 22655 }, { "epoch": 0.19593431963407149, "grad_norm": 26.039878573791174, "learning_rate": 5.859031802216453e-06, "loss": 0.11593475341796874, "step": 22660 }, { "epoch": 0.19597755315561474, "grad_norm": 3.6154982553442148, "learning_rate": 5.85897007731868e-06, "loss": 0.1641204833984375, "step": 22665 }, { "epoch": 0.196020786677158, "grad_norm": 25.951698699368276, "learning_rate": 5.85890833923559e-06, "loss": 0.15360794067382813, "step": 22670 }, { "epoch": 0.19606402019870126, "grad_norm": 1.4662373245458127, "learning_rate": 5.8588465879674686e-06, "loss": 0.17361373901367189, "step": 22675 }, { "epoch": 0.19610725372024454, "grad_norm": 12.380899380703669, "learning_rate": 5.858784823514599e-06, "loss": 0.2240966796875, "step": 22680 }, { "epoch": 0.1961504872417878, "grad_norm": 17.603896717083142, "learning_rate": 5.858723045877265e-06, "loss": 0.12325592041015625, "step": 22685 }, { "epoch": 0.19619372076333105, "grad_norm": 7.276013508850264, "learning_rate": 5.858661255055754e-06, "loss": 0.1530792236328125, "step": 22690 }, { "epoch": 0.1962369542848743, "grad_norm": 2.360800242094855, "learning_rate": 5.85859945105035e-06, "loss": 0.4776954650878906, "step": 22695 }, { "epoch": 0.1962801878064176, "grad_norm": 1.4401580456236316, "learning_rate": 5.858537633861338e-06, "loss": 0.20728302001953125, "step": 22700 }, { "epoch": 0.19632342132796085, "grad_norm": 2.984597745169309, "learning_rate": 5.858475803489003e-06, "loss": 0.22579345703125, "step": 22705 }, { "epoch": 0.1963666548495041, "grad_norm": 0.381443750527335, "learning_rate": 5.8584139599336285e-06, "loss": 0.10422744750976562, "step": 22710 }, { "epoch": 0.19640988837104736, "grad_norm": 10.931450871652743, "learning_rate": 5.858352103195504e-06, "loss": 0.1031494140625, "step": 22715 }, { "epoch": 0.19645312189259065, "grad_norm": 21.009280333822442, "learning_rate": 5.85829023327491e-06, "loss": 0.06288299560546876, "step": 22720 }, { "epoch": 0.1964963554141339, "grad_norm": 1.1438317985585322, "learning_rate": 5.858228350172135e-06, "loss": 0.10816650390625, "step": 22725 }, { "epoch": 0.19653958893567716, "grad_norm": 3.3491213010398257, "learning_rate": 5.858166453887464e-06, "loss": 0.025248146057128905, "step": 22730 }, { "epoch": 0.19658282245722042, "grad_norm": 1.7255035641161052, "learning_rate": 5.8581045444211815e-06, "loss": 0.05365753173828125, "step": 22735 }, { "epoch": 0.1966260559787637, "grad_norm": 1.5852352385663302, "learning_rate": 5.858042621773573e-06, "loss": 0.44144134521484374, "step": 22740 }, { "epoch": 0.19666928950030696, "grad_norm": 18.58726377073656, "learning_rate": 5.857980685944924e-06, "loss": 0.4809783935546875, "step": 22745 }, { "epoch": 0.19671252302185022, "grad_norm": 27.429878032110782, "learning_rate": 5.857918736935521e-06, "loss": 0.22566757202148438, "step": 22750 }, { "epoch": 0.19675575654339347, "grad_norm": 6.648853885792301, "learning_rate": 5.85785677474565e-06, "loss": 0.10206680297851563, "step": 22755 }, { "epoch": 0.19679899006493676, "grad_norm": 11.028127619492428, "learning_rate": 5.857794799375595e-06, "loss": 0.28160781860351564, "step": 22760 }, { "epoch": 0.19684222358648001, "grad_norm": 14.60545475699284, "learning_rate": 5.857732810825644e-06, "loss": 0.20281982421875, "step": 22765 }, { "epoch": 0.19688545710802327, "grad_norm": 55.85011341688203, "learning_rate": 5.857670809096081e-06, "loss": 0.3260467529296875, "step": 22770 }, { "epoch": 0.19692869062956653, "grad_norm": 1.6558152760985143, "learning_rate": 5.8576087941871926e-06, "loss": 0.385675048828125, "step": 22775 }, { "epoch": 0.1969719241511098, "grad_norm": 47.399853324394094, "learning_rate": 5.857546766099265e-06, "loss": 0.40704345703125, "step": 22780 }, { "epoch": 0.19701515767265307, "grad_norm": 199.28137294474502, "learning_rate": 5.857484724832585e-06, "loss": 0.3189201354980469, "step": 22785 }, { "epoch": 0.19705839119419633, "grad_norm": 1.0769887019066857, "learning_rate": 5.857422670387437e-06, "loss": 0.09769287109375, "step": 22790 }, { "epoch": 0.1971016247157396, "grad_norm": 20.89660859801174, "learning_rate": 5.857360602764109e-06, "loss": 0.09589881896972656, "step": 22795 }, { "epoch": 0.19714485823728287, "grad_norm": 27.142171277599445, "learning_rate": 5.8572985219628866e-06, "loss": 0.42091064453125, "step": 22800 }, { "epoch": 0.19718809175882612, "grad_norm": 8.326076638300824, "learning_rate": 5.8572364279840555e-06, "loss": 0.162347412109375, "step": 22805 }, { "epoch": 0.19723132528036938, "grad_norm": 44.40733287263182, "learning_rate": 5.8571743208279015e-06, "loss": 0.45850830078125, "step": 22810 }, { "epoch": 0.19727455880191266, "grad_norm": 19.619113041697368, "learning_rate": 5.857112200494713e-06, "loss": 0.3905181884765625, "step": 22815 }, { "epoch": 0.19731779232345592, "grad_norm": 0.6143723529596941, "learning_rate": 5.857050066984775e-06, "loss": 0.0573394775390625, "step": 22820 }, { "epoch": 0.19736102584499918, "grad_norm": 1.9716987003829387, "learning_rate": 5.856987920298375e-06, "loss": 0.336431884765625, "step": 22825 }, { "epoch": 0.19740425936654243, "grad_norm": 44.82996315630749, "learning_rate": 5.856925760435799e-06, "loss": 0.2411876678466797, "step": 22830 }, { "epoch": 0.19744749288808572, "grad_norm": 16.39038858305754, "learning_rate": 5.856863587397334e-06, "loss": 0.342083740234375, "step": 22835 }, { "epoch": 0.19749072640962897, "grad_norm": 8.26961382087487, "learning_rate": 5.856801401183266e-06, "loss": 0.10743331909179688, "step": 22840 }, { "epoch": 0.19753395993117223, "grad_norm": 23.49061112408498, "learning_rate": 5.856739201793882e-06, "loss": 0.1868133544921875, "step": 22845 }, { "epoch": 0.1975771934527155, "grad_norm": 39.106351685367905, "learning_rate": 5.85667698922947e-06, "loss": 0.1740346908569336, "step": 22850 }, { "epoch": 0.19762042697425877, "grad_norm": 37.716937522779894, "learning_rate": 5.856614763490317e-06, "loss": 0.13524017333984376, "step": 22855 }, { "epoch": 0.19766366049580203, "grad_norm": 5.8742611908544164, "learning_rate": 5.856552524576708e-06, "loss": 0.15961761474609376, "step": 22860 }, { "epoch": 0.19770689401734529, "grad_norm": 0.8640039527745493, "learning_rate": 5.856490272488931e-06, "loss": 0.2708160400390625, "step": 22865 }, { "epoch": 0.19775012753888854, "grad_norm": 6.380315710505167, "learning_rate": 5.856428007227274e-06, "loss": 0.0461029052734375, "step": 22870 }, { "epoch": 0.19779336106043183, "grad_norm": 14.548699801624437, "learning_rate": 5.856365728792023e-06, "loss": 0.13182373046875, "step": 22875 }, { "epoch": 0.19783659458197508, "grad_norm": 41.701114349233, "learning_rate": 5.8563034371834655e-06, "loss": 0.32164764404296875, "step": 22880 }, { "epoch": 0.19787982810351834, "grad_norm": 2.90236352376096, "learning_rate": 5.85624113240189e-06, "loss": 0.04207534790039062, "step": 22885 }, { "epoch": 0.1979230616250616, "grad_norm": 2.3094989502103935, "learning_rate": 5.856178814447581e-06, "loss": 0.3834197998046875, "step": 22890 }, { "epoch": 0.19796629514660488, "grad_norm": 51.396259041536304, "learning_rate": 5.856116483320829e-06, "loss": 0.3884674072265625, "step": 22895 }, { "epoch": 0.19800952866814814, "grad_norm": 27.995936948372655, "learning_rate": 5.85605413902192e-06, "loss": 0.1602020263671875, "step": 22900 }, { "epoch": 0.1980527621896914, "grad_norm": 25.341871474230427, "learning_rate": 5.8559917815511415e-06, "loss": 0.21876373291015624, "step": 22905 }, { "epoch": 0.19809599571123465, "grad_norm": 0.4153091444341537, "learning_rate": 5.855929410908781e-06, "loss": 0.19220733642578125, "step": 22910 }, { "epoch": 0.19813922923277794, "grad_norm": 7.117906900665336, "learning_rate": 5.855867027095126e-06, "loss": 0.30882568359375, "step": 22915 }, { "epoch": 0.1981824627543212, "grad_norm": 27.041375718452862, "learning_rate": 5.855804630110466e-06, "loss": 0.24464111328125, "step": 22920 }, { "epoch": 0.19822569627586445, "grad_norm": 18.357372228162117, "learning_rate": 5.855742219955086e-06, "loss": 0.1920135498046875, "step": 22925 }, { "epoch": 0.1982689297974077, "grad_norm": 11.447541353716987, "learning_rate": 5.8556797966292765e-06, "loss": 0.15574951171875, "step": 22930 }, { "epoch": 0.198312163318951, "grad_norm": 18.256489411560548, "learning_rate": 5.855617360133323e-06, "loss": 0.254913330078125, "step": 22935 }, { "epoch": 0.19835539684049425, "grad_norm": 11.926786663064721, "learning_rate": 5.855554910467515e-06, "loss": 0.112255859375, "step": 22940 }, { "epoch": 0.1983986303620375, "grad_norm": 2.297840514894065, "learning_rate": 5.85549244763214e-06, "loss": 0.0889923095703125, "step": 22945 }, { "epoch": 0.19844186388358076, "grad_norm": 26.728370115648673, "learning_rate": 5.8554299716274865e-06, "loss": 0.4066375732421875, "step": 22950 }, { "epoch": 0.19848509740512404, "grad_norm": 5.000783055499771, "learning_rate": 5.855367482453841e-06, "loss": 0.118280029296875, "step": 22955 }, { "epoch": 0.1985283309266673, "grad_norm": 8.887390305148182, "learning_rate": 5.855304980111495e-06, "loss": 0.20194473266601562, "step": 22960 }, { "epoch": 0.19857156444821056, "grad_norm": 17.751125100360372, "learning_rate": 5.855242464600734e-06, "loss": 0.1510894775390625, "step": 22965 }, { "epoch": 0.19861479796975381, "grad_norm": 37.38091200111339, "learning_rate": 5.855179935921846e-06, "loss": 0.43593597412109375, "step": 22970 }, { "epoch": 0.1986580314912971, "grad_norm": 16.21870876090799, "learning_rate": 5.855117394075122e-06, "loss": 0.31238746643066406, "step": 22975 }, { "epoch": 0.19870126501284036, "grad_norm": 0.919291585354582, "learning_rate": 5.855054839060848e-06, "loss": 0.196490478515625, "step": 22980 }, { "epoch": 0.1987444985343836, "grad_norm": 25.485560726753466, "learning_rate": 5.854992270879313e-06, "loss": 0.1742389678955078, "step": 22985 }, { "epoch": 0.1987877320559269, "grad_norm": 10.749369222489081, "learning_rate": 5.854929689530807e-06, "loss": 0.1591644287109375, "step": 22990 }, { "epoch": 0.19883096557747015, "grad_norm": 10.459726488021868, "learning_rate": 5.854867095015617e-06, "loss": 0.125048828125, "step": 22995 }, { "epoch": 0.1988741990990134, "grad_norm": 57.03043065050666, "learning_rate": 5.854804487334031e-06, "loss": 0.4274559020996094, "step": 23000 }, { "epoch": 0.19891743262055667, "grad_norm": 10.740045425439277, "learning_rate": 5.854741866486341e-06, "loss": 0.2428558349609375, "step": 23005 }, { "epoch": 0.19896066614209995, "grad_norm": 13.652435003291572, "learning_rate": 5.8546792324728325e-06, "loss": 0.16514892578125, "step": 23010 }, { "epoch": 0.1990038996636432, "grad_norm": 21.826425727669196, "learning_rate": 5.854616585293796e-06, "loss": 0.230926513671875, "step": 23015 }, { "epoch": 0.19904713318518646, "grad_norm": 6.132410826440032, "learning_rate": 5.85455392494952e-06, "loss": 0.3145263671875, "step": 23020 }, { "epoch": 0.19909036670672972, "grad_norm": 9.65234489838843, "learning_rate": 5.854491251440294e-06, "loss": 0.38302001953125, "step": 23025 }, { "epoch": 0.199133600228273, "grad_norm": 10.267906528254283, "learning_rate": 5.854428564766406e-06, "loss": 0.1773143768310547, "step": 23030 }, { "epoch": 0.19917683374981626, "grad_norm": 11.940279244173887, "learning_rate": 5.854365864928145e-06, "loss": 0.147711181640625, "step": 23035 }, { "epoch": 0.19922006727135952, "grad_norm": 17.992450086802176, "learning_rate": 5.854303151925802e-06, "loss": 0.1346527099609375, "step": 23040 }, { "epoch": 0.19926330079290278, "grad_norm": 2.3290342831022612, "learning_rate": 5.854240425759664e-06, "loss": 0.03841743469238281, "step": 23045 }, { "epoch": 0.19930653431444606, "grad_norm": 1.6791429093565597, "learning_rate": 5.854177686430023e-06, "loss": 0.14162101745605468, "step": 23050 }, { "epoch": 0.19934976783598932, "grad_norm": 7.102102677968916, "learning_rate": 5.854114933937165e-06, "loss": 0.16911354064941406, "step": 23055 }, { "epoch": 0.19939300135753257, "grad_norm": 20.696634234880822, "learning_rate": 5.8540521682813815e-06, "loss": 0.6058685302734375, "step": 23060 }, { "epoch": 0.19943623487907583, "grad_norm": 4.378418790382426, "learning_rate": 5.8539893894629625e-06, "loss": 0.1071624755859375, "step": 23065 }, { "epoch": 0.19947946840061911, "grad_norm": 33.90910116160249, "learning_rate": 5.853926597482196e-06, "loss": 0.19142303466796876, "step": 23070 }, { "epoch": 0.19952270192216237, "grad_norm": 8.672505970898477, "learning_rate": 5.853863792339372e-06, "loss": 0.386309814453125, "step": 23075 }, { "epoch": 0.19956593544370563, "grad_norm": 2.809347014898976, "learning_rate": 5.853800974034781e-06, "loss": 0.12197723388671874, "step": 23080 }, { "epoch": 0.19960916896524888, "grad_norm": 19.730028072595122, "learning_rate": 5.8537381425687115e-06, "loss": 0.22581787109375, "step": 23085 }, { "epoch": 0.19965240248679217, "grad_norm": 1.228915930962338, "learning_rate": 5.853675297941454e-06, "loss": 0.105615234375, "step": 23090 }, { "epoch": 0.19969563600833543, "grad_norm": 0.3737294518652027, "learning_rate": 5.853612440153298e-06, "loss": 0.069683837890625, "step": 23095 }, { "epoch": 0.19973886952987868, "grad_norm": 9.917201421348269, "learning_rate": 5.853549569204534e-06, "loss": 0.3986358642578125, "step": 23100 }, { "epoch": 0.19978210305142194, "grad_norm": 0.19569074676258033, "learning_rate": 5.853486685095451e-06, "loss": 0.05915679931640625, "step": 23105 }, { "epoch": 0.19982533657296522, "grad_norm": 8.019443991619546, "learning_rate": 5.853423787826341e-06, "loss": 0.17716217041015625, "step": 23110 }, { "epoch": 0.19986857009450848, "grad_norm": 3.3741717961495272, "learning_rate": 5.8533608773974905e-06, "loss": 0.124908447265625, "step": 23115 }, { "epoch": 0.19991180361605174, "grad_norm": 31.588412800239407, "learning_rate": 5.8532979538091935e-06, "loss": 0.55931396484375, "step": 23120 }, { "epoch": 0.199955037137595, "grad_norm": 3.8371853583315043, "learning_rate": 5.853235017061737e-06, "loss": 0.19615631103515624, "step": 23125 }, { "epoch": 0.19999827065913828, "grad_norm": 3.7365281640127663, "learning_rate": 5.853172067155413e-06, "loss": 0.292578125, "step": 23130 }, { "epoch": 0.20004150418068153, "grad_norm": 3.814469819352131, "learning_rate": 5.853109104090513e-06, "loss": 0.1391448974609375, "step": 23135 }, { "epoch": 0.2000847377022248, "grad_norm": 5.828703013492606, "learning_rate": 5.853046127867325e-06, "loss": 0.17613525390625, "step": 23140 }, { "epoch": 0.20012797122376805, "grad_norm": 9.865944690704215, "learning_rate": 5.85298313848614e-06, "loss": 0.1801055908203125, "step": 23145 }, { "epoch": 0.20017120474531133, "grad_norm": 28.551315134356337, "learning_rate": 5.852920135947249e-06, "loss": 0.104010009765625, "step": 23150 }, { "epoch": 0.2002144382668546, "grad_norm": 28.006071779582424, "learning_rate": 5.852857120250943e-06, "loss": 0.4527411460876465, "step": 23155 }, { "epoch": 0.20025767178839785, "grad_norm": 9.660375477564175, "learning_rate": 5.852794091397512e-06, "loss": 0.1137725830078125, "step": 23160 }, { "epoch": 0.20030090530994113, "grad_norm": 8.864100269916854, "learning_rate": 5.852731049387247e-06, "loss": 0.09114990234375, "step": 23165 }, { "epoch": 0.2003441388314844, "grad_norm": 4.997209822199721, "learning_rate": 5.852667994220437e-06, "loss": 0.102227783203125, "step": 23170 }, { "epoch": 0.20038737235302764, "grad_norm": 0.542478627379296, "learning_rate": 5.852604925897374e-06, "loss": 0.17054615020751954, "step": 23175 }, { "epoch": 0.2004306058745709, "grad_norm": 6.399078976395625, "learning_rate": 5.852541844418351e-06, "loss": 0.12496261596679688, "step": 23180 }, { "epoch": 0.20047383939611418, "grad_norm": 29.45819887343614, "learning_rate": 5.852478749783656e-06, "loss": 0.5051174163818359, "step": 23185 }, { "epoch": 0.20051707291765744, "grad_norm": 7.084547650892551, "learning_rate": 5.852415641993582e-06, "loss": 0.21468276977539064, "step": 23190 }, { "epoch": 0.2005603064392007, "grad_norm": 42.01220222229918, "learning_rate": 5.852352521048418e-06, "loss": 0.32350502014160154, "step": 23195 }, { "epoch": 0.20060353996074395, "grad_norm": 1.839888631729216, "learning_rate": 5.852289386948457e-06, "loss": 0.1018280029296875, "step": 23200 }, { "epoch": 0.20064677348228724, "grad_norm": 5.376769238471683, "learning_rate": 5.852226239693989e-06, "loss": 0.2792144775390625, "step": 23205 }, { "epoch": 0.2006900070038305, "grad_norm": 4.615576226790585, "learning_rate": 5.852163079285305e-06, "loss": 0.324493408203125, "step": 23210 }, { "epoch": 0.20073324052537375, "grad_norm": 48.18230286293638, "learning_rate": 5.852099905722698e-06, "loss": 0.1758697509765625, "step": 23215 }, { "epoch": 0.200776474046917, "grad_norm": 7.1237465503685815, "learning_rate": 5.852036719006457e-06, "loss": 0.08209228515625, "step": 23220 }, { "epoch": 0.2008197075684603, "grad_norm": 5.157913724618283, "learning_rate": 5.8519735191368765e-06, "loss": 0.27132568359375, "step": 23225 }, { "epoch": 0.20086294109000355, "grad_norm": 26.327699639109113, "learning_rate": 5.851910306114244e-06, "loss": 0.201953125, "step": 23230 }, { "epoch": 0.2009061746115468, "grad_norm": 33.2201047851292, "learning_rate": 5.8518470799388545e-06, "loss": 0.3641143798828125, "step": 23235 }, { "epoch": 0.20094940813309006, "grad_norm": 8.217853456630104, "learning_rate": 5.851783840610999e-06, "loss": 0.08883056640625, "step": 23240 }, { "epoch": 0.20099264165463335, "grad_norm": 1.10419927919728, "learning_rate": 5.8517205881309665e-06, "loss": 0.09645004272460937, "step": 23245 }, { "epoch": 0.2010358751761766, "grad_norm": 16.09355026227659, "learning_rate": 5.851657322499052e-06, "loss": 0.23592529296875, "step": 23250 }, { "epoch": 0.20107910869771986, "grad_norm": 39.14153912231967, "learning_rate": 5.851594043715545e-06, "loss": 0.1023681640625, "step": 23255 }, { "epoch": 0.20112234221926312, "grad_norm": 5.32952953557582, "learning_rate": 5.851530751780739e-06, "loss": 0.19551239013671876, "step": 23260 }, { "epoch": 0.2011655757408064, "grad_norm": 26.01215471793653, "learning_rate": 5.851467446694925e-06, "loss": 0.176300048828125, "step": 23265 }, { "epoch": 0.20120880926234966, "grad_norm": 7.048751162551444, "learning_rate": 5.851404128458394e-06, "loss": 0.152496337890625, "step": 23270 }, { "epoch": 0.20125204278389291, "grad_norm": 36.89965563115395, "learning_rate": 5.851340797071441e-06, "loss": 0.14881057739257814, "step": 23275 }, { "epoch": 0.20129527630543617, "grad_norm": 7.341675676405665, "learning_rate": 5.851277452534354e-06, "loss": 0.0695892333984375, "step": 23280 }, { "epoch": 0.20133850982697946, "grad_norm": 1.529219992186839, "learning_rate": 5.85121409484743e-06, "loss": 0.1366363525390625, "step": 23285 }, { "epoch": 0.2013817433485227, "grad_norm": 10.40485108216937, "learning_rate": 5.851150724010957e-06, "loss": 0.112213134765625, "step": 23290 }, { "epoch": 0.20142497687006597, "grad_norm": 3.54422702082798, "learning_rate": 5.851087340025229e-06, "loss": 0.1562274932861328, "step": 23295 }, { "epoch": 0.20146821039160923, "grad_norm": 0.8432760315600786, "learning_rate": 5.851023942890538e-06, "loss": 0.448193359375, "step": 23300 }, { "epoch": 0.2015114439131525, "grad_norm": 28.417695817639515, "learning_rate": 5.850960532607176e-06, "loss": 0.35177459716796877, "step": 23305 }, { "epoch": 0.20155467743469577, "grad_norm": 17.381448460510413, "learning_rate": 5.850897109175436e-06, "loss": 0.29749221801757814, "step": 23310 }, { "epoch": 0.20159791095623902, "grad_norm": 33.18835935422533, "learning_rate": 5.850833672595611e-06, "loss": 0.189166259765625, "step": 23315 }, { "epoch": 0.20164114447778228, "grad_norm": 5.526388318853202, "learning_rate": 5.850770222867994e-06, "loss": 0.0792633056640625, "step": 23320 }, { "epoch": 0.20168437799932556, "grad_norm": 2.4878422976379584, "learning_rate": 5.8507067599928745e-06, "loss": 0.27763671875, "step": 23325 }, { "epoch": 0.20172761152086882, "grad_norm": 11.749933554800922, "learning_rate": 5.850643283970548e-06, "loss": 0.12555694580078125, "step": 23330 }, { "epoch": 0.20177084504241208, "grad_norm": 14.541853174676781, "learning_rate": 5.850579794801308e-06, "loss": 0.35316314697265627, "step": 23335 }, { "epoch": 0.20181407856395533, "grad_norm": 13.843234653088818, "learning_rate": 5.850516292485445e-06, "loss": 0.131005859375, "step": 23340 }, { "epoch": 0.20185731208549862, "grad_norm": 0.7666817953435704, "learning_rate": 5.850452777023252e-06, "loss": 0.08115119934082031, "step": 23345 }, { "epoch": 0.20190054560704188, "grad_norm": 1.9328675340306365, "learning_rate": 5.850389248415023e-06, "loss": 0.05680084228515625, "step": 23350 }, { "epoch": 0.20194377912858513, "grad_norm": 50.793574792396306, "learning_rate": 5.85032570666105e-06, "loss": 0.22396240234375, "step": 23355 }, { "epoch": 0.20198701265012842, "grad_norm": 0.7725033232934887, "learning_rate": 5.8502621517616285e-06, "loss": 0.27558746337890627, "step": 23360 }, { "epoch": 0.20203024617167167, "grad_norm": 4.1075083208578285, "learning_rate": 5.850198583717048e-06, "loss": 0.064324951171875, "step": 23365 }, { "epoch": 0.20207347969321493, "grad_norm": 3.96569817207576, "learning_rate": 5.850135002527605e-06, "loss": 0.0862640380859375, "step": 23370 }, { "epoch": 0.2021167132147582, "grad_norm": 11.805173510497005, "learning_rate": 5.850071408193591e-06, "loss": 0.6892578125, "step": 23375 }, { "epoch": 0.20215994673630147, "grad_norm": 7.526350155830267, "learning_rate": 5.850007800715298e-06, "loss": 0.15779991149902345, "step": 23380 }, { "epoch": 0.20220318025784473, "grad_norm": 6.551749030745714, "learning_rate": 5.849944180093022e-06, "loss": 0.13520965576171876, "step": 23385 }, { "epoch": 0.20224641377938798, "grad_norm": 20.224674150866086, "learning_rate": 5.8498805463270556e-06, "loss": 0.251348876953125, "step": 23390 }, { "epoch": 0.20228964730093124, "grad_norm": 24.901145758123903, "learning_rate": 5.849816899417691e-06, "loss": 0.2618011474609375, "step": 23395 }, { "epoch": 0.20233288082247453, "grad_norm": 5.647657760541022, "learning_rate": 5.8497532393652234e-06, "loss": 0.5567192077636719, "step": 23400 }, { "epoch": 0.20237611434401778, "grad_norm": 3.9797380439800745, "learning_rate": 5.849689566169945e-06, "loss": 0.27491111755371095, "step": 23405 }, { "epoch": 0.20241934786556104, "grad_norm": 0.938579849503575, "learning_rate": 5.849625879832152e-06, "loss": 0.1077606201171875, "step": 23410 }, { "epoch": 0.2024625813871043, "grad_norm": 1.6240677148113996, "learning_rate": 5.849562180352134e-06, "loss": 0.18213653564453125, "step": 23415 }, { "epoch": 0.20250581490864758, "grad_norm": 2.7107320237418935, "learning_rate": 5.849498467730187e-06, "loss": 0.28782958984375, "step": 23420 }, { "epoch": 0.20254904843019084, "grad_norm": 19.86466675688788, "learning_rate": 5.849434741966606e-06, "loss": 0.128387451171875, "step": 23425 }, { "epoch": 0.2025922819517341, "grad_norm": 12.798065157228125, "learning_rate": 5.849371003061684e-06, "loss": 0.212261962890625, "step": 23430 }, { "epoch": 0.20263551547327735, "grad_norm": 0.4801788706988941, "learning_rate": 5.8493072510157135e-06, "loss": 0.15225677490234374, "step": 23435 }, { "epoch": 0.20267874899482063, "grad_norm": 30.082661354823234, "learning_rate": 5.84924348582899e-06, "loss": 0.4337882995605469, "step": 23440 }, { "epoch": 0.2027219825163639, "grad_norm": 28.969662347413397, "learning_rate": 5.849179707501809e-06, "loss": 0.3457275390625, "step": 23445 }, { "epoch": 0.20276521603790715, "grad_norm": 6.935291602046648, "learning_rate": 5.849115916034461e-06, "loss": 0.144622802734375, "step": 23450 }, { "epoch": 0.2028084495594504, "grad_norm": 11.191324692029983, "learning_rate": 5.849052111427242e-06, "loss": 0.1594940185546875, "step": 23455 }, { "epoch": 0.2028516830809937, "grad_norm": 9.057375001271708, "learning_rate": 5.848988293680448e-06, "loss": 0.301568603515625, "step": 23460 }, { "epoch": 0.20289491660253695, "grad_norm": 10.153284363696768, "learning_rate": 5.84892446279437e-06, "loss": 0.33749542236328123, "step": 23465 }, { "epoch": 0.2029381501240802, "grad_norm": 20.085047843674896, "learning_rate": 5.848860618769306e-06, "loss": 0.0859344482421875, "step": 23470 }, { "epoch": 0.20298138364562346, "grad_norm": 0.18863542348989432, "learning_rate": 5.848796761605547e-06, "loss": 0.10720672607421874, "step": 23475 }, { "epoch": 0.20302461716716674, "grad_norm": 36.1783520840859, "learning_rate": 5.84873289130339e-06, "loss": 0.2271453857421875, "step": 23480 }, { "epoch": 0.20306785068871, "grad_norm": 12.605820195924737, "learning_rate": 5.848669007863128e-06, "loss": 0.318048095703125, "step": 23485 }, { "epoch": 0.20311108421025326, "grad_norm": 1.5658222491737883, "learning_rate": 5.848605111285056e-06, "loss": 0.07747650146484375, "step": 23490 }, { "epoch": 0.2031543177317965, "grad_norm": 16.484037748695734, "learning_rate": 5.848541201569469e-06, "loss": 0.18244705200195313, "step": 23495 }, { "epoch": 0.2031975512533398, "grad_norm": 10.854194686757555, "learning_rate": 5.848477278716663e-06, "loss": 0.12711811065673828, "step": 23500 }, { "epoch": 0.20324078477488305, "grad_norm": 18.63072757446888, "learning_rate": 5.8484133427269306e-06, "loss": 0.1793701171875, "step": 23505 }, { "epoch": 0.2032840182964263, "grad_norm": 5.702118611039557, "learning_rate": 5.848349393600567e-06, "loss": 0.2735504150390625, "step": 23510 }, { "epoch": 0.20332725181796957, "grad_norm": 14.945815599112985, "learning_rate": 5.8482854313378685e-06, "loss": 0.09361381530761718, "step": 23515 }, { "epoch": 0.20337048533951285, "grad_norm": 53.60441335158495, "learning_rate": 5.848221455939129e-06, "loss": 0.34754486083984376, "step": 23520 }, { "epoch": 0.2034137188610561, "grad_norm": 3.9308296448950415, "learning_rate": 5.848157467404644e-06, "loss": 0.290252685546875, "step": 23525 }, { "epoch": 0.20345695238259937, "grad_norm": 52.95825394913106, "learning_rate": 5.848093465734708e-06, "loss": 0.5192501068115234, "step": 23530 }, { "epoch": 0.20350018590414265, "grad_norm": 19.102160973869474, "learning_rate": 5.848029450929617e-06, "loss": 0.2825225830078125, "step": 23535 }, { "epoch": 0.2035434194256859, "grad_norm": 19.111741133024662, "learning_rate": 5.8479654229896656e-06, "loss": 0.17345428466796875, "step": 23540 }, { "epoch": 0.20358665294722916, "grad_norm": 6.237894563989984, "learning_rate": 5.847901381915149e-06, "loss": 0.224371337890625, "step": 23545 }, { "epoch": 0.20362988646877242, "grad_norm": 6.271701735092187, "learning_rate": 5.847837327706363e-06, "loss": 0.0653717041015625, "step": 23550 }, { "epoch": 0.2036731199903157, "grad_norm": 1.4142204518687993, "learning_rate": 5.8477732603636034e-06, "loss": 0.2074981689453125, "step": 23555 }, { "epoch": 0.20371635351185896, "grad_norm": 27.814953036115096, "learning_rate": 5.847709179887166e-06, "loss": 0.3418701171875, "step": 23560 }, { "epoch": 0.20375958703340222, "grad_norm": 34.10151436068776, "learning_rate": 5.847645086277343e-06, "loss": 0.1687744140625, "step": 23565 }, { "epoch": 0.20380282055494547, "grad_norm": 2.503132901841628, "learning_rate": 5.847580979534434e-06, "loss": 0.05743408203125, "step": 23570 }, { "epoch": 0.20384605407648876, "grad_norm": 10.327628200779744, "learning_rate": 5.847516859658733e-06, "loss": 0.26220855712890623, "step": 23575 }, { "epoch": 0.20388928759803202, "grad_norm": 73.44312123683476, "learning_rate": 5.847452726650536e-06, "loss": 0.0985382080078125, "step": 23580 }, { "epoch": 0.20393252111957527, "grad_norm": 7.8173670956857535, "learning_rate": 5.847388580510139e-06, "loss": 0.21907882690429686, "step": 23585 }, { "epoch": 0.20397575464111853, "grad_norm": 1.4599111822794595, "learning_rate": 5.847324421237836e-06, "loss": 0.0724212646484375, "step": 23590 }, { "epoch": 0.2040189881626618, "grad_norm": 1.9557922374228904, "learning_rate": 5.847260248833926e-06, "loss": 0.2076171875, "step": 23595 }, { "epoch": 0.20406222168420507, "grad_norm": 7.425683961931681, "learning_rate": 5.847196063298703e-06, "loss": 0.48988037109375, "step": 23600 }, { "epoch": 0.20410545520574833, "grad_norm": 8.572143998068345, "learning_rate": 5.847131864632462e-06, "loss": 0.141259765625, "step": 23605 }, { "epoch": 0.20414868872729158, "grad_norm": 0.5024986512422259, "learning_rate": 5.847067652835502e-06, "loss": 0.11726913452148438, "step": 23610 }, { "epoch": 0.20419192224883487, "grad_norm": 36.3059791681518, "learning_rate": 5.847003427908117e-06, "loss": 0.3355133056640625, "step": 23615 }, { "epoch": 0.20423515577037812, "grad_norm": 1.5149563011366538, "learning_rate": 5.846939189850603e-06, "loss": 0.09973564147949218, "step": 23620 }, { "epoch": 0.20427838929192138, "grad_norm": 20.75738918947986, "learning_rate": 5.8468749386632575e-06, "loss": 0.24176483154296874, "step": 23625 }, { "epoch": 0.20432162281346464, "grad_norm": 3.1504418258909865, "learning_rate": 5.846810674346377e-06, "loss": 0.0948974609375, "step": 23630 }, { "epoch": 0.20436485633500792, "grad_norm": 4.216732188204589, "learning_rate": 5.846746396900257e-06, "loss": 0.4363136291503906, "step": 23635 }, { "epoch": 0.20440808985655118, "grad_norm": 41.18253924800018, "learning_rate": 5.846682106325193e-06, "loss": 0.10401687622070313, "step": 23640 }, { "epoch": 0.20445132337809444, "grad_norm": 17.47499098326148, "learning_rate": 5.846617802621484e-06, "loss": 0.2333698272705078, "step": 23645 }, { "epoch": 0.2044945568996377, "grad_norm": 1.3423538869477454, "learning_rate": 5.846553485789424e-06, "loss": 0.08290252685546876, "step": 23650 }, { "epoch": 0.20453779042118098, "grad_norm": 37.25649191849691, "learning_rate": 5.846489155829313e-06, "loss": 0.582781982421875, "step": 23655 }, { "epoch": 0.20458102394272423, "grad_norm": 1.2534322597888623, "learning_rate": 5.846424812741444e-06, "loss": 0.08099441528320313, "step": 23660 }, { "epoch": 0.2046242574642675, "grad_norm": 4.5369293957980155, "learning_rate": 5.846360456526115e-06, "loss": 0.3647472381591797, "step": 23665 }, { "epoch": 0.20466749098581075, "grad_norm": 6.876656982927747, "learning_rate": 5.846296087183624e-06, "loss": 0.19932861328125, "step": 23670 }, { "epoch": 0.20471072450735403, "grad_norm": 53.43307043621085, "learning_rate": 5.8462317047142655e-06, "loss": 0.39546966552734375, "step": 23675 }, { "epoch": 0.2047539580288973, "grad_norm": 46.238220853362016, "learning_rate": 5.84616730911834e-06, "loss": 0.21834716796875, "step": 23680 }, { "epoch": 0.20479719155044054, "grad_norm": 10.17827469206076, "learning_rate": 5.846102900396141e-06, "loss": 0.0802734375, "step": 23685 }, { "epoch": 0.2048404250719838, "grad_norm": 36.492487730920985, "learning_rate": 5.846038478547967e-06, "loss": 0.354156494140625, "step": 23690 }, { "epoch": 0.20488365859352708, "grad_norm": 24.040395300594476, "learning_rate": 5.845974043574115e-06, "loss": 0.2539886474609375, "step": 23695 }, { "epoch": 0.20492689211507034, "grad_norm": 4.85796557053671, "learning_rate": 5.845909595474883e-06, "loss": 0.4495887756347656, "step": 23700 }, { "epoch": 0.2049701256366136, "grad_norm": 13.2034265975806, "learning_rate": 5.845845134250566e-06, "loss": 0.285760498046875, "step": 23705 }, { "epoch": 0.20501335915815685, "grad_norm": 1.595899899542441, "learning_rate": 5.8457806599014635e-06, "loss": 0.1952362060546875, "step": 23710 }, { "epoch": 0.20505659267970014, "grad_norm": 0.23064303975758907, "learning_rate": 5.845716172427872e-06, "loss": 0.21002044677734374, "step": 23715 }, { "epoch": 0.2050998262012434, "grad_norm": 11.156319451761112, "learning_rate": 5.845651671830089e-06, "loss": 0.177044677734375, "step": 23720 }, { "epoch": 0.20514305972278665, "grad_norm": 27.260191827030393, "learning_rate": 5.845587158108412e-06, "loss": 0.39937744140625, "step": 23725 }, { "epoch": 0.20518629324432994, "grad_norm": 7.03114822653209, "learning_rate": 5.845522631263139e-06, "loss": 0.07777786254882812, "step": 23730 }, { "epoch": 0.2052295267658732, "grad_norm": 11.321001971190851, "learning_rate": 5.845458091294566e-06, "loss": 0.23326377868652343, "step": 23735 }, { "epoch": 0.20527276028741645, "grad_norm": 0.29062706262517934, "learning_rate": 5.8453935382029915e-06, "loss": 0.11329345703125, "step": 23740 }, { "epoch": 0.2053159938089597, "grad_norm": 6.728448243396363, "learning_rate": 5.845328971988714e-06, "loss": 0.15838775634765626, "step": 23745 }, { "epoch": 0.205359227330503, "grad_norm": 7.315921355299094, "learning_rate": 5.845264392652032e-06, "loss": 0.06777191162109375, "step": 23750 }, { "epoch": 0.20540246085204625, "grad_norm": 12.275225862846867, "learning_rate": 5.84519980019324e-06, "loss": 0.11856956481933593, "step": 23755 }, { "epoch": 0.2054456943735895, "grad_norm": 0.8614092530395497, "learning_rate": 5.845135194612639e-06, "loss": 0.16461257934570311, "step": 23760 }, { "epoch": 0.20548892789513276, "grad_norm": 15.67324265942527, "learning_rate": 5.845070575910525e-06, "loss": 0.09024810791015625, "step": 23765 }, { "epoch": 0.20553216141667605, "grad_norm": 0.7415924853158053, "learning_rate": 5.845005944087198e-06, "loss": 0.3058769226074219, "step": 23770 }, { "epoch": 0.2055753949382193, "grad_norm": 5.1102854144522745, "learning_rate": 5.844941299142954e-06, "loss": 0.1118927001953125, "step": 23775 }, { "epoch": 0.20561862845976256, "grad_norm": 0.672761729895257, "learning_rate": 5.844876641078093e-06, "loss": 0.0602935791015625, "step": 23780 }, { "epoch": 0.20566186198130582, "grad_norm": 0.4118033105601813, "learning_rate": 5.844811969892912e-06, "loss": 0.1423828125, "step": 23785 }, { "epoch": 0.2057050955028491, "grad_norm": 4.775373870756705, "learning_rate": 5.844747285587709e-06, "loss": 0.0668487548828125, "step": 23790 }, { "epoch": 0.20574832902439236, "grad_norm": 54.52915356939206, "learning_rate": 5.844682588162784e-06, "loss": 0.4458953857421875, "step": 23795 }, { "epoch": 0.2057915625459356, "grad_norm": 6.6942102824960275, "learning_rate": 5.8446178776184334e-06, "loss": 0.354852294921875, "step": 23800 }, { "epoch": 0.20583479606747887, "grad_norm": 19.711115634659436, "learning_rate": 5.8445531539549565e-06, "loss": 0.183270263671875, "step": 23805 }, { "epoch": 0.20587802958902215, "grad_norm": 0.5333728197506702, "learning_rate": 5.844488417172653e-06, "loss": 0.052130126953125, "step": 23810 }, { "epoch": 0.2059212631105654, "grad_norm": 6.238103900916912, "learning_rate": 5.844423667271819e-06, "loss": 0.093524169921875, "step": 23815 }, { "epoch": 0.20596449663210867, "grad_norm": 6.6328523442447676, "learning_rate": 5.844358904252754e-06, "loss": 0.39159698486328126, "step": 23820 }, { "epoch": 0.20600773015365192, "grad_norm": 27.449738461304097, "learning_rate": 5.844294128115758e-06, "loss": 0.3302490234375, "step": 23825 }, { "epoch": 0.2060509636751952, "grad_norm": 56.604740865504375, "learning_rate": 5.844229338861129e-06, "loss": 0.36706390380859377, "step": 23830 }, { "epoch": 0.20609419719673847, "grad_norm": 2.1507029033073297, "learning_rate": 5.844164536489165e-06, "loss": 0.0655059814453125, "step": 23835 }, { "epoch": 0.20613743071828172, "grad_norm": 2.861126663436868, "learning_rate": 5.844099721000166e-06, "loss": 0.12562484741210939, "step": 23840 }, { "epoch": 0.20618066423982498, "grad_norm": 9.466590980461621, "learning_rate": 5.844034892394429e-06, "loss": 0.149835205078125, "step": 23845 }, { "epoch": 0.20622389776136826, "grad_norm": 11.998529905282918, "learning_rate": 5.843970050672255e-06, "loss": 0.19864501953125, "step": 23850 }, { "epoch": 0.20626713128291152, "grad_norm": 13.784489497076523, "learning_rate": 5.843905195833944e-06, "loss": 0.21590728759765626, "step": 23855 }, { "epoch": 0.20631036480445478, "grad_norm": 14.439927046637385, "learning_rate": 5.843840327879791e-06, "loss": 0.37375717163085936, "step": 23860 }, { "epoch": 0.20635359832599803, "grad_norm": 1.2436003348679567, "learning_rate": 5.8437754468101e-06, "loss": 0.08290481567382812, "step": 23865 }, { "epoch": 0.20639683184754132, "grad_norm": 2.3896722285991725, "learning_rate": 5.843710552625166e-06, "loss": 0.102734375, "step": 23870 }, { "epoch": 0.20644006536908457, "grad_norm": 11.61623882685201, "learning_rate": 5.843645645325292e-06, "loss": 0.16302337646484374, "step": 23875 }, { "epoch": 0.20648329889062783, "grad_norm": 7.284711694961996, "learning_rate": 5.843580724910775e-06, "loss": 0.237774658203125, "step": 23880 }, { "epoch": 0.2065265324121711, "grad_norm": 3.0873073060489578, "learning_rate": 5.843515791381915e-06, "loss": 0.150860595703125, "step": 23885 }, { "epoch": 0.20656976593371437, "grad_norm": 40.36048216934424, "learning_rate": 5.843450844739011e-06, "loss": 0.3068977355957031, "step": 23890 }, { "epoch": 0.20661299945525763, "grad_norm": 0.23642449899564924, "learning_rate": 5.843385884982362e-06, "loss": 0.34919586181640627, "step": 23895 }, { "epoch": 0.20665623297680089, "grad_norm": 19.623801417860243, "learning_rate": 5.843320912112271e-06, "loss": 0.20147857666015626, "step": 23900 }, { "epoch": 0.20669946649834417, "grad_norm": 0.2752105125710414, "learning_rate": 5.843255926129034e-06, "loss": 0.1526397705078125, "step": 23905 }, { "epoch": 0.20674270001988743, "grad_norm": 0.99446654469723, "learning_rate": 5.8431909270329505e-06, "loss": 0.08778076171875, "step": 23910 }, { "epoch": 0.20678593354143068, "grad_norm": 49.35031497347438, "learning_rate": 5.843125914824323e-06, "loss": 0.47239990234375, "step": 23915 }, { "epoch": 0.20682916706297394, "grad_norm": 6.6076368368104195, "learning_rate": 5.84306088950345e-06, "loss": 0.169903564453125, "step": 23920 }, { "epoch": 0.20687240058451722, "grad_norm": 31.82975853183062, "learning_rate": 5.842995851070631e-06, "loss": 0.11807098388671874, "step": 23925 }, { "epoch": 0.20691563410606048, "grad_norm": 23.06537646052466, "learning_rate": 5.842930799526167e-06, "loss": 0.275439453125, "step": 23930 }, { "epoch": 0.20695886762760374, "grad_norm": 19.782596591476512, "learning_rate": 5.842865734870357e-06, "loss": 0.2437744140625, "step": 23935 }, { "epoch": 0.207002101149147, "grad_norm": 1.2401728591923677, "learning_rate": 5.842800657103501e-06, "loss": 0.15274200439453126, "step": 23940 }, { "epoch": 0.20704533467069028, "grad_norm": 0.6996337095181298, "learning_rate": 5.8427355662259e-06, "loss": 0.1863311767578125, "step": 23945 }, { "epoch": 0.20708856819223354, "grad_norm": 16.32102494468886, "learning_rate": 5.842670462237854e-06, "loss": 0.10366058349609375, "step": 23950 }, { "epoch": 0.2071318017137768, "grad_norm": 1.5957935282953661, "learning_rate": 5.842605345139663e-06, "loss": 0.0848968505859375, "step": 23955 }, { "epoch": 0.20717503523532005, "grad_norm": 37.54848125847217, "learning_rate": 5.842540214931627e-06, "loss": 0.3549468994140625, "step": 23960 }, { "epoch": 0.20721826875686333, "grad_norm": 28.098685854371762, "learning_rate": 5.842475071614047e-06, "loss": 0.1900054931640625, "step": 23965 }, { "epoch": 0.2072615022784066, "grad_norm": 9.355335604130007, "learning_rate": 5.842409915187222e-06, "loss": 0.16030349731445312, "step": 23970 }, { "epoch": 0.20730473579994985, "grad_norm": 4.261462061592509, "learning_rate": 5.842344745651455e-06, "loss": 0.235150146484375, "step": 23975 }, { "epoch": 0.2073479693214931, "grad_norm": 20.203145129711945, "learning_rate": 5.842279563007044e-06, "loss": 0.22150192260742188, "step": 23980 }, { "epoch": 0.2073912028430364, "grad_norm": 2.8372940708702954, "learning_rate": 5.842214367254292e-06, "loss": 0.31943206787109374, "step": 23985 }, { "epoch": 0.20743443636457964, "grad_norm": 2.4301314904453446, "learning_rate": 5.842149158393498e-06, "loss": 0.19644927978515625, "step": 23990 }, { "epoch": 0.2074776698861229, "grad_norm": 28.718382882541796, "learning_rate": 5.842083936424964e-06, "loss": 0.1234130859375, "step": 23995 }, { "epoch": 0.20752090340766616, "grad_norm": 3.817997085164104, "learning_rate": 5.842018701348989e-06, "loss": 0.15555877685546876, "step": 24000 }, { "epoch": 0.20756413692920944, "grad_norm": 11.33451736401625, "learning_rate": 5.841953453165874e-06, "loss": 0.306597900390625, "step": 24005 }, { "epoch": 0.2076073704507527, "grad_norm": 22.494445999900556, "learning_rate": 5.841888191875923e-06, "loss": 0.29722442626953127, "step": 24010 }, { "epoch": 0.20765060397229596, "grad_norm": 15.08096053939879, "learning_rate": 5.841822917479433e-06, "loss": 0.26334228515625, "step": 24015 }, { "epoch": 0.2076938374938392, "grad_norm": 9.777734604787, "learning_rate": 5.841757629976708e-06, "loss": 0.12915687561035155, "step": 24020 }, { "epoch": 0.2077370710153825, "grad_norm": 17.482497009375646, "learning_rate": 5.841692329368048e-06, "loss": 0.17524032592773436, "step": 24025 }, { "epoch": 0.20778030453692575, "grad_norm": 12.891351240093247, "learning_rate": 5.841627015653752e-06, "loss": 0.210888671875, "step": 24030 }, { "epoch": 0.207823538058469, "grad_norm": 8.14573008009153, "learning_rate": 5.841561688834125e-06, "loss": 0.1129150390625, "step": 24035 }, { "epoch": 0.20786677158001227, "grad_norm": 20.084670528834597, "learning_rate": 5.841496348909467e-06, "loss": 0.26249542236328127, "step": 24040 }, { "epoch": 0.20791000510155555, "grad_norm": 14.298115144427028, "learning_rate": 5.841430995880078e-06, "loss": 0.0896697998046875, "step": 24045 }, { "epoch": 0.2079532386230988, "grad_norm": 4.970174273269182, "learning_rate": 5.841365629746261e-06, "loss": 0.069659423828125, "step": 24050 }, { "epoch": 0.20799647214464206, "grad_norm": 6.577518739761146, "learning_rate": 5.841300250508316e-06, "loss": 0.14285221099853515, "step": 24055 }, { "epoch": 0.20803970566618532, "grad_norm": 36.902203237175335, "learning_rate": 5.841234858166545e-06, "loss": 0.4991756439208984, "step": 24060 }, { "epoch": 0.2080829391877286, "grad_norm": 22.097944996704314, "learning_rate": 5.841169452721251e-06, "loss": 0.1393707275390625, "step": 24065 }, { "epoch": 0.20812617270927186, "grad_norm": 33.508080158101166, "learning_rate": 5.8411040341727345e-06, "loss": 0.243267822265625, "step": 24070 }, { "epoch": 0.20816940623081512, "grad_norm": 14.607609155666212, "learning_rate": 5.841038602521297e-06, "loss": 0.5013015747070313, "step": 24075 }, { "epoch": 0.20821263975235837, "grad_norm": 0.6762044160485473, "learning_rate": 5.8409731577672395e-06, "loss": 0.34692840576171874, "step": 24080 }, { "epoch": 0.20825587327390166, "grad_norm": 4.163268726403339, "learning_rate": 5.840907699910866e-06, "loss": 0.1858367919921875, "step": 24085 }, { "epoch": 0.20829910679544492, "grad_norm": 3.744051952807961, "learning_rate": 5.840842228952476e-06, "loss": 0.12026710510253906, "step": 24090 }, { "epoch": 0.20834234031698817, "grad_norm": 0.9567558330893091, "learning_rate": 5.840776744892374e-06, "loss": 0.30667724609375, "step": 24095 }, { "epoch": 0.20838557383853146, "grad_norm": 8.307281136445312, "learning_rate": 5.84071124773086e-06, "loss": 0.08142852783203125, "step": 24100 }, { "epoch": 0.2084288073600747, "grad_norm": 6.272330691547922, "learning_rate": 5.840645737468237e-06, "loss": 0.283978271484375, "step": 24105 }, { "epoch": 0.20847204088161797, "grad_norm": 1.2540959358159158, "learning_rate": 5.840580214104808e-06, "loss": 0.10729942321777344, "step": 24110 }, { "epoch": 0.20851527440316123, "grad_norm": 10.822726738931248, "learning_rate": 5.840514677640872e-06, "loss": 0.3088043212890625, "step": 24115 }, { "epoch": 0.2085585079247045, "grad_norm": 0.41994179634806705, "learning_rate": 5.840449128076734e-06, "loss": 0.2817474365234375, "step": 24120 }, { "epoch": 0.20860174144624777, "grad_norm": 32.31011503508629, "learning_rate": 5.840383565412696e-06, "loss": 0.20578765869140625, "step": 24125 }, { "epoch": 0.20864497496779102, "grad_norm": 9.747809458375226, "learning_rate": 5.84031798964906e-06, "loss": 0.18236618041992186, "step": 24130 }, { "epoch": 0.20868820848933428, "grad_norm": 8.647384426588669, "learning_rate": 5.840252400786128e-06, "loss": 0.19453125, "step": 24135 }, { "epoch": 0.20873144201087757, "grad_norm": 14.98835868423251, "learning_rate": 5.840186798824202e-06, "loss": 0.0703033447265625, "step": 24140 }, { "epoch": 0.20877467553242082, "grad_norm": 0.5646464464338269, "learning_rate": 5.840121183763587e-06, "loss": 0.21373748779296875, "step": 24145 }, { "epoch": 0.20881790905396408, "grad_norm": 130.92990854627234, "learning_rate": 5.840055555604584e-06, "loss": 0.15191650390625, "step": 24150 }, { "epoch": 0.20886114257550734, "grad_norm": 12.294213355069587, "learning_rate": 5.839989914347495e-06, "loss": 0.23843994140625, "step": 24155 }, { "epoch": 0.20890437609705062, "grad_norm": 2.475279702819386, "learning_rate": 5.839924259992624e-06, "loss": 0.2295745849609375, "step": 24160 }, { "epoch": 0.20894760961859388, "grad_norm": 1.1247242686121133, "learning_rate": 5.839858592540273e-06, "loss": 0.064178466796875, "step": 24165 }, { "epoch": 0.20899084314013713, "grad_norm": 7.2838063153014385, "learning_rate": 5.8397929119907446e-06, "loss": 0.19809112548828126, "step": 24170 }, { "epoch": 0.2090340766616804, "grad_norm": 4.725690065116859, "learning_rate": 5.839727218344343e-06, "loss": 0.110302734375, "step": 24175 }, { "epoch": 0.20907731018322367, "grad_norm": 25.434457505741346, "learning_rate": 5.8396615116013705e-06, "loss": 0.052639007568359375, "step": 24180 }, { "epoch": 0.20912054370476693, "grad_norm": 25.907694881079667, "learning_rate": 5.839595791762129e-06, "loss": 0.191680908203125, "step": 24185 }, { "epoch": 0.2091637772263102, "grad_norm": 4.64041194643551, "learning_rate": 5.839530058826924e-06, "loss": 0.0572265625, "step": 24190 }, { "epoch": 0.20920701074785344, "grad_norm": 0.34718755431026505, "learning_rate": 5.839464312796056e-06, "loss": 0.4582305908203125, "step": 24195 }, { "epoch": 0.20925024426939673, "grad_norm": 8.891130685712048, "learning_rate": 5.83939855366983e-06, "loss": 0.3181640625, "step": 24200 }, { "epoch": 0.20929347779093999, "grad_norm": 5.716430005144296, "learning_rate": 5.839332781448549e-06, "loss": 0.6643386840820312, "step": 24205 }, { "epoch": 0.20933671131248324, "grad_norm": 18.077584089648294, "learning_rate": 5.839266996132515e-06, "loss": 0.154766845703125, "step": 24210 }, { "epoch": 0.2093799448340265, "grad_norm": 2.332088282718102, "learning_rate": 5.839201197722034e-06, "loss": 0.198876953125, "step": 24215 }, { "epoch": 0.20942317835556978, "grad_norm": 2.6617484995821563, "learning_rate": 5.839135386217407e-06, "loss": 0.1343780517578125, "step": 24220 }, { "epoch": 0.20946641187711304, "grad_norm": 3.408988111466531, "learning_rate": 5.8390695616189386e-06, "loss": 0.4628868103027344, "step": 24225 }, { "epoch": 0.2095096453986563, "grad_norm": 78.85787130353005, "learning_rate": 5.839003723926933e-06, "loss": 0.1095672607421875, "step": 24230 }, { "epoch": 0.20955287892019955, "grad_norm": 4.7795003804588445, "learning_rate": 5.838937873141692e-06, "loss": 0.1750885009765625, "step": 24235 }, { "epoch": 0.20959611244174284, "grad_norm": 1.4575510086277204, "learning_rate": 5.838872009263521e-06, "loss": 0.0886260986328125, "step": 24240 }, { "epoch": 0.2096393459632861, "grad_norm": 0.6283467957329525, "learning_rate": 5.838806132292723e-06, "loss": 0.15692062377929689, "step": 24245 }, { "epoch": 0.20968257948482935, "grad_norm": 5.026492775058404, "learning_rate": 5.838740242229602e-06, "loss": 0.09656982421875, "step": 24250 }, { "epoch": 0.2097258130063726, "grad_norm": 2.2104029392932314, "learning_rate": 5.8386743390744625e-06, "loss": 0.2200103759765625, "step": 24255 }, { "epoch": 0.2097690465279159, "grad_norm": 2.647275137806961, "learning_rate": 5.838608422827607e-06, "loss": 0.104736328125, "step": 24260 }, { "epoch": 0.20981228004945915, "grad_norm": 8.531973588549144, "learning_rate": 5.83854249348934e-06, "loss": 0.41375732421875, "step": 24265 }, { "epoch": 0.2098555135710024, "grad_norm": 9.32982865647474, "learning_rate": 5.838476551059966e-06, "loss": 0.18541259765625, "step": 24270 }, { "epoch": 0.2098987470925457, "grad_norm": 2.3558816627593515, "learning_rate": 5.838410595539789e-06, "loss": 0.3568675994873047, "step": 24275 }, { "epoch": 0.20994198061408895, "grad_norm": 0.9529438500567772, "learning_rate": 5.838344626929113e-06, "loss": 0.11717033386230469, "step": 24280 }, { "epoch": 0.2099852141356322, "grad_norm": 28.665567061263356, "learning_rate": 5.838278645228244e-06, "loss": 0.1532806396484375, "step": 24285 }, { "epoch": 0.21002844765717546, "grad_norm": 70.04025701509863, "learning_rate": 5.8382126504374826e-06, "loss": 0.38365478515625, "step": 24290 }, { "epoch": 0.21007168117871874, "grad_norm": 23.18613795360198, "learning_rate": 5.8381466425571356e-06, "loss": 0.162744140625, "step": 24295 }, { "epoch": 0.210114914700262, "grad_norm": 50.02750097336696, "learning_rate": 5.838080621587508e-06, "loss": 0.4549720764160156, "step": 24300 }, { "epoch": 0.21015814822180526, "grad_norm": 9.327410399141538, "learning_rate": 5.838014587528903e-06, "loss": 0.3157985687255859, "step": 24305 }, { "epoch": 0.21020138174334851, "grad_norm": 15.235280624600854, "learning_rate": 5.837948540381625e-06, "loss": 0.16957931518554686, "step": 24310 }, { "epoch": 0.2102446152648918, "grad_norm": 3.139211421009012, "learning_rate": 5.837882480145979e-06, "loss": 0.1712890625, "step": 24315 }, { "epoch": 0.21028784878643506, "grad_norm": 9.547998826855048, "learning_rate": 5.837816406822271e-06, "loss": 0.29730911254882814, "step": 24320 }, { "epoch": 0.2103310823079783, "grad_norm": 3.0869093390797566, "learning_rate": 5.837750320410804e-06, "loss": 0.1256500244140625, "step": 24325 }, { "epoch": 0.21037431582952157, "grad_norm": 2.7522831614670085, "learning_rate": 5.837684220911883e-06, "loss": 0.45749645233154296, "step": 24330 }, { "epoch": 0.21041754935106485, "grad_norm": 3.3346897110743763, "learning_rate": 5.837618108325813e-06, "loss": 0.1552215576171875, "step": 24335 }, { "epoch": 0.2104607828726081, "grad_norm": 0.28208551872830795, "learning_rate": 5.837551982652898e-06, "loss": 0.3822353363037109, "step": 24340 }, { "epoch": 0.21050401639415137, "grad_norm": 20.600924497479262, "learning_rate": 5.837485843893445e-06, "loss": 0.1852996826171875, "step": 24345 }, { "epoch": 0.21054724991569462, "grad_norm": 1.4654041583451658, "learning_rate": 5.837419692047758e-06, "loss": 0.12362136840820312, "step": 24350 }, { "epoch": 0.2105904834372379, "grad_norm": 3.9595673093151253, "learning_rate": 5.8373535271161425e-06, "loss": 0.07061309814453125, "step": 24355 }, { "epoch": 0.21063371695878116, "grad_norm": 2.6905635711684486, "learning_rate": 5.837287349098903e-06, "loss": 0.10611724853515625, "step": 24360 }, { "epoch": 0.21067695048032442, "grad_norm": 3.028041298789882, "learning_rate": 5.837221157996345e-06, "loss": 0.021460914611816408, "step": 24365 }, { "epoch": 0.21072018400186768, "grad_norm": 4.736204499669762, "learning_rate": 5.8371549538087735e-06, "loss": 0.2036881446838379, "step": 24370 }, { "epoch": 0.21076341752341096, "grad_norm": 8.525182396395387, "learning_rate": 5.837088736536494e-06, "loss": 0.3072601318359375, "step": 24375 }, { "epoch": 0.21080665104495422, "grad_norm": 1.0964790280070322, "learning_rate": 5.837022506179813e-06, "loss": 0.25088958740234374, "step": 24380 }, { "epoch": 0.21084988456649748, "grad_norm": 0.014839114990732316, "learning_rate": 5.836956262739033e-06, "loss": 0.31881256103515626, "step": 24385 }, { "epoch": 0.21089311808804073, "grad_norm": 1.336417786819306, "learning_rate": 5.836890006214462e-06, "loss": 0.171282958984375, "step": 24390 }, { "epoch": 0.21093635160958402, "grad_norm": 0.680733202969233, "learning_rate": 5.836823736606406e-06, "loss": 0.3995964050292969, "step": 24395 }, { "epoch": 0.21097958513112727, "grad_norm": 3.1385918951500043, "learning_rate": 5.836757453915169e-06, "loss": 0.24361572265625, "step": 24400 }, { "epoch": 0.21102281865267053, "grad_norm": 52.00429057326359, "learning_rate": 5.836691158141057e-06, "loss": 0.3245025634765625, "step": 24405 }, { "epoch": 0.2110660521742138, "grad_norm": 6.004454675828276, "learning_rate": 5.836624849284376e-06, "loss": 0.075164794921875, "step": 24410 }, { "epoch": 0.21110928569575707, "grad_norm": 0.9082565665345396, "learning_rate": 5.836558527345432e-06, "loss": 0.16083984375, "step": 24415 }, { "epoch": 0.21115251921730033, "grad_norm": 42.70355404901238, "learning_rate": 5.836492192324532e-06, "loss": 0.264892578125, "step": 24420 }, { "epoch": 0.21119575273884358, "grad_norm": 6.408931707660811, "learning_rate": 5.836425844221978e-06, "loss": 0.6400054931640625, "step": 24425 }, { "epoch": 0.21123898626038684, "grad_norm": 9.071754653885609, "learning_rate": 5.83635948303808e-06, "loss": 0.12167930603027344, "step": 24430 }, { "epoch": 0.21128221978193013, "grad_norm": 18.018893410952614, "learning_rate": 5.836293108773143e-06, "loss": 0.2245452880859375, "step": 24435 }, { "epoch": 0.21132545330347338, "grad_norm": 2.496069427730961, "learning_rate": 5.836226721427473e-06, "loss": 0.08822174072265625, "step": 24440 }, { "epoch": 0.21136868682501664, "grad_norm": 24.88033520291254, "learning_rate": 5.836160321001375e-06, "loss": 0.23286895751953124, "step": 24445 }, { "epoch": 0.2114119203465599, "grad_norm": 0.680206637830523, "learning_rate": 5.836093907495157e-06, "loss": 0.06450958251953125, "step": 24450 }, { "epoch": 0.21145515386810318, "grad_norm": 7.991429740040007, "learning_rate": 5.836027480909124e-06, "loss": 0.3700439453125, "step": 24455 }, { "epoch": 0.21149838738964644, "grad_norm": 46.48218676659349, "learning_rate": 5.835961041243584e-06, "loss": 0.46702880859375, "step": 24460 }, { "epoch": 0.2115416209111897, "grad_norm": 0.6584448739901418, "learning_rate": 5.835894588498841e-06, "loss": 0.07021293640136719, "step": 24465 }, { "epoch": 0.21158485443273298, "grad_norm": 4.309054003353484, "learning_rate": 5.835828122675202e-06, "loss": 0.07904052734375, "step": 24470 }, { "epoch": 0.21162808795427623, "grad_norm": 4.93599341148508, "learning_rate": 5.835761643772976e-06, "loss": 0.15081787109375, "step": 24475 }, { "epoch": 0.2116713214758195, "grad_norm": 11.343796235004927, "learning_rate": 5.835695151792468e-06, "loss": 0.10136566162109376, "step": 24480 }, { "epoch": 0.21171455499736275, "grad_norm": 12.879507285220619, "learning_rate": 5.835628646733983e-06, "loss": 0.0791351318359375, "step": 24485 }, { "epoch": 0.21175778851890603, "grad_norm": 2.2546869564783987, "learning_rate": 5.83556212859783e-06, "loss": 0.10748977661132812, "step": 24490 }, { "epoch": 0.2118010220404493, "grad_norm": 5.374645449887589, "learning_rate": 5.835495597384315e-06, "loss": 0.3473823547363281, "step": 24495 }, { "epoch": 0.21184425556199254, "grad_norm": 27.129858784035125, "learning_rate": 5.835429053093745e-06, "loss": 0.22806396484375, "step": 24500 }, { "epoch": 0.2118874890835358, "grad_norm": 38.463262661700604, "learning_rate": 5.8353624957264265e-06, "loss": 0.6274261474609375, "step": 24505 }, { "epoch": 0.21193072260507909, "grad_norm": 14.703228538491022, "learning_rate": 5.835295925282668e-06, "loss": 0.2890380859375, "step": 24510 }, { "epoch": 0.21197395612662234, "grad_norm": 26.023853144529674, "learning_rate": 5.8352293417627736e-06, "loss": 0.331396484375, "step": 24515 }, { "epoch": 0.2120171896481656, "grad_norm": 6.6486808382545135, "learning_rate": 5.835162745167051e-06, "loss": 0.2673492431640625, "step": 24520 }, { "epoch": 0.21206042316970886, "grad_norm": 4.574108623519575, "learning_rate": 5.835096135495811e-06, "loss": 0.06342048645019531, "step": 24525 }, { "epoch": 0.21210365669125214, "grad_norm": 3.043869242357519, "learning_rate": 5.835029512749358e-06, "loss": 0.2580860137939453, "step": 24530 }, { "epoch": 0.2121468902127954, "grad_norm": 1.5080171947232488, "learning_rate": 5.834962876927997e-06, "loss": 0.16358642578125, "step": 24535 }, { "epoch": 0.21219012373433865, "grad_norm": 13.802020859209513, "learning_rate": 5.83489622803204e-06, "loss": 0.1706207275390625, "step": 24540 }, { "epoch": 0.2122333572558819, "grad_norm": 16.76772029763116, "learning_rate": 5.8348295660617905e-06, "loss": 0.1531829833984375, "step": 24545 }, { "epoch": 0.2122765907774252, "grad_norm": 9.274045178634772, "learning_rate": 5.834762891017558e-06, "loss": 0.2355712890625, "step": 24550 }, { "epoch": 0.21231982429896845, "grad_norm": 12.380280489558638, "learning_rate": 5.8346962028996504e-06, "loss": 0.11400775909423828, "step": 24555 }, { "epoch": 0.2123630578205117, "grad_norm": 3.3387849499046713, "learning_rate": 5.834629501708373e-06, "loss": 0.40296249389648436, "step": 24560 }, { "epoch": 0.21240629134205496, "grad_norm": 0.5149354991701118, "learning_rate": 5.834562787444036e-06, "loss": 0.3274444580078125, "step": 24565 }, { "epoch": 0.21244952486359825, "grad_norm": 43.134032701979216, "learning_rate": 5.834496060106945e-06, "loss": 0.19080047607421874, "step": 24570 }, { "epoch": 0.2124927583851415, "grad_norm": 52.924746307026, "learning_rate": 5.834429319697409e-06, "loss": 0.23846569061279296, "step": 24575 }, { "epoch": 0.21253599190668476, "grad_norm": 1.503669205378823, "learning_rate": 5.834362566215735e-06, "loss": 0.691168212890625, "step": 24580 }, { "epoch": 0.21257922542822802, "grad_norm": 13.502321351782035, "learning_rate": 5.834295799662232e-06, "loss": 0.18099365234375, "step": 24585 }, { "epoch": 0.2126224589497713, "grad_norm": 9.093493818776604, "learning_rate": 5.834229020037207e-06, "loss": 0.20466079711914062, "step": 24590 }, { "epoch": 0.21266569247131456, "grad_norm": 67.01210059412976, "learning_rate": 5.834162227340968e-06, "loss": 0.5118148803710938, "step": 24595 }, { "epoch": 0.21270892599285782, "grad_norm": 45.97874462189334, "learning_rate": 5.834095421573823e-06, "loss": 0.666796875, "step": 24600 }, { "epoch": 0.21275215951440107, "grad_norm": 9.70850743715071, "learning_rate": 5.8340286027360815e-06, "loss": 0.1073333740234375, "step": 24605 }, { "epoch": 0.21279539303594436, "grad_norm": 3.274704228268605, "learning_rate": 5.83396177082805e-06, "loss": 0.12718963623046875, "step": 24610 }, { "epoch": 0.21283862655748761, "grad_norm": 6.422214691199235, "learning_rate": 5.833894925850036e-06, "loss": 0.19236679077148439, "step": 24615 }, { "epoch": 0.21288186007903087, "grad_norm": 52.313405321396715, "learning_rate": 5.8338280678023505e-06, "loss": 0.551300048828125, "step": 24620 }, { "epoch": 0.21292509360057413, "grad_norm": 0.4762409843110066, "learning_rate": 5.8337611966853e-06, "loss": 0.3085186004638672, "step": 24625 }, { "epoch": 0.2129683271221174, "grad_norm": 20.965154155278626, "learning_rate": 5.833694312499193e-06, "loss": 0.14770278930664063, "step": 24630 }, { "epoch": 0.21301156064366067, "grad_norm": 1.2518534979043048, "learning_rate": 5.833627415244339e-06, "loss": 0.0675384521484375, "step": 24635 }, { "epoch": 0.21305479416520393, "grad_norm": 8.023083369360666, "learning_rate": 5.833560504921044e-06, "loss": 0.1248931884765625, "step": 24640 }, { "epoch": 0.2130980276867472, "grad_norm": 15.835348525095, "learning_rate": 5.83349358152962e-06, "loss": 0.20149078369140624, "step": 24645 }, { "epoch": 0.21314126120829047, "grad_norm": 32.470600989825115, "learning_rate": 5.833426645070372e-06, "loss": 0.277203369140625, "step": 24650 }, { "epoch": 0.21318449472983372, "grad_norm": 4.446134081486194, "learning_rate": 5.833359695543613e-06, "loss": 0.28062744140625, "step": 24655 }, { "epoch": 0.21322772825137698, "grad_norm": 36.815584643617, "learning_rate": 5.8332927329496485e-06, "loss": 0.4231292724609375, "step": 24660 }, { "epoch": 0.21327096177292026, "grad_norm": 1.0684446398903111, "learning_rate": 5.833225757288789e-06, "loss": 0.2783599853515625, "step": 24665 }, { "epoch": 0.21331419529446352, "grad_norm": 0.4779574339710118, "learning_rate": 5.8331587685613404e-06, "loss": 0.081640625, "step": 24670 }, { "epoch": 0.21335742881600678, "grad_norm": 11.984738890833862, "learning_rate": 5.833091766767616e-06, "loss": 0.3689605712890625, "step": 24675 }, { "epoch": 0.21340066233755003, "grad_norm": 0.8572154785086713, "learning_rate": 5.8330247519079215e-06, "loss": 0.251556396484375, "step": 24680 }, { "epoch": 0.21344389585909332, "grad_norm": 12.120376949672455, "learning_rate": 5.832957723982568e-06, "loss": 0.18963394165039063, "step": 24685 }, { "epoch": 0.21348712938063658, "grad_norm": 0.17071943211734028, "learning_rate": 5.832890682991863e-06, "loss": 0.14523391723632811, "step": 24690 }, { "epoch": 0.21353036290217983, "grad_norm": 2.5877786967486918, "learning_rate": 5.832823628936118e-06, "loss": 0.13945159912109376, "step": 24695 }, { "epoch": 0.2135735964237231, "grad_norm": 0.9434782055008666, "learning_rate": 5.83275656181564e-06, "loss": 0.06766281127929688, "step": 24700 }, { "epoch": 0.21361682994526637, "grad_norm": 3.423098215591583, "learning_rate": 5.832689481630738e-06, "loss": 0.11063232421875, "step": 24705 }, { "epoch": 0.21366006346680963, "grad_norm": 0.9434410778301372, "learning_rate": 5.832622388381724e-06, "loss": 0.19172019958496095, "step": 24710 }, { "epoch": 0.2137032969883529, "grad_norm": 3.3467044845577574, "learning_rate": 5.8325552820689045e-06, "loss": 0.17847900390625, "step": 24715 }, { "epoch": 0.21374653050989614, "grad_norm": 9.763195209535287, "learning_rate": 5.832488162692591e-06, "loss": 0.0917724609375, "step": 24720 }, { "epoch": 0.21378976403143943, "grad_norm": 4.490113138070229, "learning_rate": 5.832421030253092e-06, "loss": 0.0736724853515625, "step": 24725 }, { "epoch": 0.21383299755298268, "grad_norm": 10.280622349824863, "learning_rate": 5.832353884750718e-06, "loss": 0.0363800048828125, "step": 24730 }, { "epoch": 0.21387623107452594, "grad_norm": 2.165612911531282, "learning_rate": 5.8322867261857784e-06, "loss": 0.136297607421875, "step": 24735 }, { "epoch": 0.2139194645960692, "grad_norm": 11.904616620139553, "learning_rate": 5.832219554558582e-06, "loss": 0.20522918701171874, "step": 24740 }, { "epoch": 0.21396269811761248, "grad_norm": 25.00820381635767, "learning_rate": 5.83215236986944e-06, "loss": 0.15635528564453124, "step": 24745 }, { "epoch": 0.21400593163915574, "grad_norm": 22.650785052257525, "learning_rate": 5.8320851721186605e-06, "loss": 0.130120849609375, "step": 24750 }, { "epoch": 0.214049165160699, "grad_norm": 28.68453246883182, "learning_rate": 5.832017961306555e-06, "loss": 0.36043853759765626, "step": 24755 }, { "epoch": 0.21409239868224225, "grad_norm": 3.741902027314926, "learning_rate": 5.831950737433432e-06, "loss": 0.1648101806640625, "step": 24760 }, { "epoch": 0.21413563220378554, "grad_norm": 28.8925736241363, "learning_rate": 5.8318835004996045e-06, "loss": 0.26719322204589846, "step": 24765 }, { "epoch": 0.2141788657253288, "grad_norm": 17.583159109376787, "learning_rate": 5.831816250505379e-06, "loss": 0.3044189453125, "step": 24770 }, { "epoch": 0.21422209924687205, "grad_norm": 2.933918484111793, "learning_rate": 5.831748987451067e-06, "loss": 0.055279541015625, "step": 24775 }, { "epoch": 0.2142653327684153, "grad_norm": 4.001996945176057, "learning_rate": 5.83168171133698e-06, "loss": 0.18806304931640624, "step": 24780 }, { "epoch": 0.2143085662899586, "grad_norm": 9.854266371131011, "learning_rate": 5.831614422163426e-06, "loss": 0.1920928955078125, "step": 24785 }, { "epoch": 0.21435179981150185, "grad_norm": 6.1256173416321245, "learning_rate": 5.831547119930718e-06, "loss": 0.3854393005371094, "step": 24790 }, { "epoch": 0.2143950333330451, "grad_norm": 40.11801547230161, "learning_rate": 5.8314798046391634e-06, "loss": 0.6272796630859375, "step": 24795 }, { "epoch": 0.21443826685458836, "grad_norm": 51.627214283791815, "learning_rate": 5.831412476289074e-06, "loss": 0.2686805725097656, "step": 24800 }, { "epoch": 0.21448150037613165, "grad_norm": 2.8258334930969045, "learning_rate": 5.831345134880762e-06, "loss": 0.263055419921875, "step": 24805 }, { "epoch": 0.2145247338976749, "grad_norm": 27.970734248395832, "learning_rate": 5.831277780414535e-06, "loss": 0.07011756896972657, "step": 24810 }, { "epoch": 0.21456796741921816, "grad_norm": 3.5414849127392385, "learning_rate": 5.831210412890705e-06, "loss": 0.18631591796875, "step": 24815 }, { "epoch": 0.21461120094076142, "grad_norm": 9.771218108830734, "learning_rate": 5.831143032309585e-06, "loss": 0.1605224609375, "step": 24820 }, { "epoch": 0.2146544344623047, "grad_norm": 2.5608597386249734, "learning_rate": 5.831075638671481e-06, "loss": 0.0626983642578125, "step": 24825 }, { "epoch": 0.21469766798384796, "grad_norm": 2.3751734679160927, "learning_rate": 5.831008231976707e-06, "loss": 0.2564697265625, "step": 24830 }, { "epoch": 0.2147409015053912, "grad_norm": 8.52978392169137, "learning_rate": 5.830940812225573e-06, "loss": 0.16126480102539062, "step": 24835 }, { "epoch": 0.2147841350269345, "grad_norm": 2.314143562307942, "learning_rate": 5.830873379418391e-06, "loss": 0.1691192626953125, "step": 24840 }, { "epoch": 0.21482736854847775, "grad_norm": 37.172595296134546, "learning_rate": 5.830805933555469e-06, "loss": 0.2333740234375, "step": 24845 }, { "epoch": 0.214870602070021, "grad_norm": 3.1422173113214114, "learning_rate": 5.8307384746371225e-06, "loss": 0.13458404541015626, "step": 24850 }, { "epoch": 0.21491383559156427, "grad_norm": 18.625178447740115, "learning_rate": 5.830671002663659e-06, "loss": 0.2722503662109375, "step": 24855 }, { "epoch": 0.21495706911310755, "grad_norm": 5.803993033649772, "learning_rate": 5.830603517635392e-06, "loss": 0.22046279907226562, "step": 24860 }, { "epoch": 0.2150003026346508, "grad_norm": 0.35953068696715357, "learning_rate": 5.830536019552631e-06, "loss": 0.098553466796875, "step": 24865 }, { "epoch": 0.21504353615619407, "grad_norm": 25.092870757167695, "learning_rate": 5.830468508415688e-06, "loss": 0.13040809631347655, "step": 24870 }, { "epoch": 0.21508676967773732, "grad_norm": 4.395423266152057, "learning_rate": 5.830400984224875e-06, "loss": 0.1567169189453125, "step": 24875 }, { "epoch": 0.2151300031992806, "grad_norm": 1.2518421250815275, "learning_rate": 5.830333446980502e-06, "loss": 0.09739990234375, "step": 24880 }, { "epoch": 0.21517323672082386, "grad_norm": 2.9349402059012735, "learning_rate": 5.830265896682881e-06, "loss": 0.14611892700195311, "step": 24885 }, { "epoch": 0.21521647024236712, "grad_norm": 9.824063993082271, "learning_rate": 5.830198333332325e-06, "loss": 0.20748214721679686, "step": 24890 }, { "epoch": 0.21525970376391038, "grad_norm": 5.991278092980972, "learning_rate": 5.830130756929143e-06, "loss": 0.2737701416015625, "step": 24895 }, { "epoch": 0.21530293728545366, "grad_norm": 14.120679305263321, "learning_rate": 5.830063167473649e-06, "loss": 0.19441070556640624, "step": 24900 }, { "epoch": 0.21534617080699692, "grad_norm": 22.6942782707087, "learning_rate": 5.829995564966153e-06, "loss": 0.09117431640625, "step": 24905 }, { "epoch": 0.21538940432854017, "grad_norm": 0.6333642181471859, "learning_rate": 5.829927949406968e-06, "loss": 0.04317207336425781, "step": 24910 }, { "epoch": 0.21543263785008343, "grad_norm": 8.868041748039047, "learning_rate": 5.829860320796406e-06, "loss": 0.56805419921875, "step": 24915 }, { "epoch": 0.21547587137162671, "grad_norm": 28.576832096473417, "learning_rate": 5.829792679134777e-06, "loss": 0.13499603271484376, "step": 24920 }, { "epoch": 0.21551910489316997, "grad_norm": 12.767542033154454, "learning_rate": 5.829725024422395e-06, "loss": 0.0967987060546875, "step": 24925 }, { "epoch": 0.21556233841471323, "grad_norm": 13.916997029978265, "learning_rate": 5.829657356659571e-06, "loss": 0.3310882568359375, "step": 24930 }, { "epoch": 0.21560557193625648, "grad_norm": 30.62781281844369, "learning_rate": 5.829589675846617e-06, "loss": 0.17193603515625, "step": 24935 }, { "epoch": 0.21564880545779977, "grad_norm": 11.58816500786239, "learning_rate": 5.8295219819838456e-06, "loss": 0.09269866943359376, "step": 24940 }, { "epoch": 0.21569203897934303, "grad_norm": 37.43076157694929, "learning_rate": 5.8294542750715684e-06, "loss": 0.184307861328125, "step": 24945 }, { "epoch": 0.21573527250088628, "grad_norm": 48.350521268260096, "learning_rate": 5.829386555110099e-06, "loss": 0.535205078125, "step": 24950 }, { "epoch": 0.21577850602242954, "grad_norm": 0.5823480504581902, "learning_rate": 5.829318822099748e-06, "loss": 0.4045867919921875, "step": 24955 }, { "epoch": 0.21582173954397282, "grad_norm": 32.72397266109491, "learning_rate": 5.829251076040829e-06, "loss": 0.2647430419921875, "step": 24960 }, { "epoch": 0.21586497306551608, "grad_norm": 1.4322306431358696, "learning_rate": 5.829183316933655e-06, "loss": 0.22497911453247071, "step": 24965 }, { "epoch": 0.21590820658705934, "grad_norm": 46.88891194221741, "learning_rate": 5.829115544778536e-06, "loss": 0.219903564453125, "step": 24970 }, { "epoch": 0.2159514401086026, "grad_norm": 2.349830968711366, "learning_rate": 5.8290477595757865e-06, "loss": 0.20594940185546876, "step": 24975 }, { "epoch": 0.21599467363014588, "grad_norm": 2.4504764627159865, "learning_rate": 5.828979961325719e-06, "loss": 0.1483827590942383, "step": 24980 }, { "epoch": 0.21603790715168913, "grad_norm": 9.489294993711134, "learning_rate": 5.828912150028645e-06, "loss": 0.15023956298828126, "step": 24985 }, { "epoch": 0.2160811406732324, "grad_norm": 1.1395228271390871, "learning_rate": 5.828844325684879e-06, "loss": 0.22479705810546874, "step": 24990 }, { "epoch": 0.21612437419477565, "grad_norm": 14.283626485025914, "learning_rate": 5.828776488294734e-06, "loss": 0.3923053741455078, "step": 24995 }, { "epoch": 0.21616760771631893, "grad_norm": 33.88506428830287, "learning_rate": 5.82870863785852e-06, "loss": 0.28084869384765626, "step": 25000 }, { "epoch": 0.2162108412378622, "grad_norm": 2.692130664678852, "learning_rate": 5.828640774376553e-06, "loss": 0.1720703125, "step": 25005 }, { "epoch": 0.21625407475940545, "grad_norm": 0.8449937181758832, "learning_rate": 5.8285728978491436e-06, "loss": 0.07324981689453125, "step": 25010 }, { "epoch": 0.21629730828094873, "grad_norm": 5.370149690994604, "learning_rate": 5.828505008276607e-06, "loss": 0.024381637573242188, "step": 25015 }, { "epoch": 0.216340541802492, "grad_norm": 28.146107957067663, "learning_rate": 5.8284371056592536e-06, "loss": 0.1865081787109375, "step": 25020 }, { "epoch": 0.21638377532403524, "grad_norm": 22.685561149559913, "learning_rate": 5.828369189997399e-06, "loss": 0.13275413513183593, "step": 25025 }, { "epoch": 0.2164270088455785, "grad_norm": 11.643585555087421, "learning_rate": 5.8283012612913564e-06, "loss": 0.15627059936523438, "step": 25030 }, { "epoch": 0.21647024236712178, "grad_norm": 9.227004983839244, "learning_rate": 5.828233319541437e-06, "loss": 0.17672805786132811, "step": 25035 }, { "epoch": 0.21651347588866504, "grad_norm": 36.349687118783656, "learning_rate": 5.828165364747957e-06, "loss": 0.2235870361328125, "step": 25040 }, { "epoch": 0.2165567094102083, "grad_norm": 45.066933698206256, "learning_rate": 5.8280973969112275e-06, "loss": 0.21251373291015624, "step": 25045 }, { "epoch": 0.21659994293175155, "grad_norm": 18.768394259902635, "learning_rate": 5.828029416031562e-06, "loss": 0.1733613967895508, "step": 25050 }, { "epoch": 0.21664317645329484, "grad_norm": 22.948638212248984, "learning_rate": 5.827961422109275e-06, "loss": 0.14684600830078126, "step": 25055 }, { "epoch": 0.2166864099748381, "grad_norm": 3.809192148867104, "learning_rate": 5.82789341514468e-06, "loss": 0.1682575225830078, "step": 25060 }, { "epoch": 0.21672964349638135, "grad_norm": 8.81578199556255, "learning_rate": 5.827825395138091e-06, "loss": 0.06575927734375, "step": 25065 }, { "epoch": 0.2167728770179246, "grad_norm": 21.587080359614596, "learning_rate": 5.827757362089821e-06, "loss": 0.181671142578125, "step": 25070 }, { "epoch": 0.2168161105394679, "grad_norm": 20.781823469047417, "learning_rate": 5.827689316000183e-06, "loss": 0.11727294921875, "step": 25075 }, { "epoch": 0.21685934406101115, "grad_norm": 11.73123555627425, "learning_rate": 5.827621256869493e-06, "loss": 0.06363897323608399, "step": 25080 }, { "epoch": 0.2169025775825544, "grad_norm": 0.22914592310493523, "learning_rate": 5.827553184698063e-06, "loss": 0.1514068603515625, "step": 25085 }, { "epoch": 0.21694581110409766, "grad_norm": 6.417247962013896, "learning_rate": 5.827485099486207e-06, "loss": 0.1585357666015625, "step": 25090 }, { "epoch": 0.21698904462564095, "grad_norm": 6.263284315768331, "learning_rate": 5.82741700123424e-06, "loss": 0.09857444763183594, "step": 25095 }, { "epoch": 0.2170322781471842, "grad_norm": 6.372925760320328, "learning_rate": 5.827348889942476e-06, "loss": 0.14729461669921876, "step": 25100 }, { "epoch": 0.21707551166872746, "grad_norm": 5.482080713885376, "learning_rate": 5.827280765611228e-06, "loss": 0.2671028137207031, "step": 25105 }, { "epoch": 0.21711874519027072, "grad_norm": 2.2072732080525093, "learning_rate": 5.827212628240812e-06, "loss": 0.3993961334228516, "step": 25110 }, { "epoch": 0.217161978711814, "grad_norm": 30.222552437761404, "learning_rate": 5.827144477831541e-06, "loss": 0.3313079833984375, "step": 25115 }, { "epoch": 0.21720521223335726, "grad_norm": 0.3283293003441428, "learning_rate": 5.827076314383728e-06, "loss": 0.34199676513671873, "step": 25120 }, { "epoch": 0.21724844575490052, "grad_norm": 7.050488358071579, "learning_rate": 5.827008137897689e-06, "loss": 0.2580718994140625, "step": 25125 }, { "epoch": 0.21729167927644377, "grad_norm": 0.810793223634119, "learning_rate": 5.82693994837374e-06, "loss": 0.0254791259765625, "step": 25130 }, { "epoch": 0.21733491279798706, "grad_norm": 10.729898228360073, "learning_rate": 5.826871745812193e-06, "loss": 0.1959014892578125, "step": 25135 }, { "epoch": 0.2173781463195303, "grad_norm": 13.01826494533821, "learning_rate": 5.826803530213364e-06, "loss": 0.19911041259765624, "step": 25140 }, { "epoch": 0.21742137984107357, "grad_norm": 9.813727389855366, "learning_rate": 5.826735301577565e-06, "loss": 0.1381622314453125, "step": 25145 }, { "epoch": 0.21746461336261683, "grad_norm": 7.661280689843665, "learning_rate": 5.826667059905114e-06, "loss": 0.3891937255859375, "step": 25150 }, { "epoch": 0.2175078468841601, "grad_norm": 18.736548976950733, "learning_rate": 5.826598805196324e-06, "loss": 0.31654815673828124, "step": 25155 }, { "epoch": 0.21755108040570337, "grad_norm": 12.120455475397879, "learning_rate": 5.82653053745151e-06, "loss": 0.3198760986328125, "step": 25160 }, { "epoch": 0.21759431392724662, "grad_norm": 1.6209329323613053, "learning_rate": 5.826462256670987e-06, "loss": 0.068243408203125, "step": 25165 }, { "epoch": 0.21763754744878988, "grad_norm": 14.606815405155956, "learning_rate": 5.82639396285507e-06, "loss": 0.14729766845703124, "step": 25170 }, { "epoch": 0.21768078097033317, "grad_norm": 12.89748814175932, "learning_rate": 5.826325656004073e-06, "loss": 0.3329559326171875, "step": 25175 }, { "epoch": 0.21772401449187642, "grad_norm": 13.099078272654188, "learning_rate": 5.8262573361183126e-06, "loss": 0.3671409606933594, "step": 25180 }, { "epoch": 0.21776724801341968, "grad_norm": 17.95570861446287, "learning_rate": 5.826189003198103e-06, "loss": 0.07764339447021484, "step": 25185 }, { "epoch": 0.21781048153496294, "grad_norm": 0.7186146966997031, "learning_rate": 5.826120657243759e-06, "loss": 0.198956298828125, "step": 25190 }, { "epoch": 0.21785371505650622, "grad_norm": 42.81081624982518, "learning_rate": 5.826052298255596e-06, "loss": 0.24889984130859374, "step": 25195 }, { "epoch": 0.21789694857804948, "grad_norm": 26.18172936604297, "learning_rate": 5.82598392623393e-06, "loss": 0.3292682647705078, "step": 25200 }, { "epoch": 0.21794018209959273, "grad_norm": 27.96787428021308, "learning_rate": 5.825915541179076e-06, "loss": 0.325933837890625, "step": 25205 }, { "epoch": 0.21798341562113602, "grad_norm": 0.5731574949422275, "learning_rate": 5.82584714309135e-06, "loss": 0.08006134033203124, "step": 25210 }, { "epoch": 0.21802664914267927, "grad_norm": 13.834716413570192, "learning_rate": 5.825778731971065e-06, "loss": 0.08699951171875, "step": 25215 }, { "epoch": 0.21806988266422253, "grad_norm": 19.796106033753016, "learning_rate": 5.825710307818539e-06, "loss": 0.157159423828125, "step": 25220 }, { "epoch": 0.2181131161857658, "grad_norm": 18.16781303808143, "learning_rate": 5.825641870634087e-06, "loss": 0.3520111083984375, "step": 25225 }, { "epoch": 0.21815634970730907, "grad_norm": 3.1572255371603246, "learning_rate": 5.825573420418024e-06, "loss": 0.1256134033203125, "step": 25230 }, { "epoch": 0.21819958322885233, "grad_norm": 35.2695289136389, "learning_rate": 5.825504957170666e-06, "loss": 0.30977592468261717, "step": 25235 }, { "epoch": 0.21824281675039559, "grad_norm": 8.227750871518586, "learning_rate": 5.825436480892329e-06, "loss": 0.1109893798828125, "step": 25240 }, { "epoch": 0.21828605027193884, "grad_norm": 2.583190889102068, "learning_rate": 5.8253679915833285e-06, "loss": 0.03887863159179687, "step": 25245 }, { "epoch": 0.21832928379348213, "grad_norm": 18.686748755464322, "learning_rate": 5.82529948924398e-06, "loss": 0.52557373046875, "step": 25250 }, { "epoch": 0.21837251731502538, "grad_norm": 5.9824225779195395, "learning_rate": 5.825230973874601e-06, "loss": 0.11147613525390625, "step": 25255 }, { "epoch": 0.21841575083656864, "grad_norm": 25.530780334426762, "learning_rate": 5.825162445475506e-06, "loss": 0.2074726104736328, "step": 25260 }, { "epoch": 0.2184589843581119, "grad_norm": 0.6227010762853293, "learning_rate": 5.8250939040470114e-06, "loss": 0.06305313110351562, "step": 25265 }, { "epoch": 0.21850221787965518, "grad_norm": 16.58799504097456, "learning_rate": 5.825025349589432e-06, "loss": 0.15829391479492189, "step": 25270 }, { "epoch": 0.21854545140119844, "grad_norm": 9.628357042339518, "learning_rate": 5.8249567821030876e-06, "loss": 0.1517333984375, "step": 25275 }, { "epoch": 0.2185886849227417, "grad_norm": 27.357298468924817, "learning_rate": 5.824888201588291e-06, "loss": 0.42571272850036623, "step": 25280 }, { "epoch": 0.21863191844428495, "grad_norm": 2.9978558138571243, "learning_rate": 5.824819608045359e-06, "loss": 0.07730712890625, "step": 25285 }, { "epoch": 0.21867515196582824, "grad_norm": 0.9126777678748857, "learning_rate": 5.82475100147461e-06, "loss": 0.20704498291015624, "step": 25290 }, { "epoch": 0.2187183854873715, "grad_norm": 1.7325991631494666, "learning_rate": 5.824682381876358e-06, "loss": 0.08599777221679687, "step": 25295 }, { "epoch": 0.21876161900891475, "grad_norm": 36.30716135735567, "learning_rate": 5.824613749250921e-06, "loss": 0.3130653381347656, "step": 25300 }, { "epoch": 0.218804852530458, "grad_norm": 3.326978817832301, "learning_rate": 5.824545103598614e-06, "loss": 0.067803955078125, "step": 25305 }, { "epoch": 0.2188480860520013, "grad_norm": 0.2575007633655405, "learning_rate": 5.824476444919755e-06, "loss": 0.28379058837890625, "step": 25310 }, { "epoch": 0.21889131957354455, "grad_norm": 7.678708311196391, "learning_rate": 5.824407773214661e-06, "loss": 0.129400634765625, "step": 25315 }, { "epoch": 0.2189345530950878, "grad_norm": 12.57166440104466, "learning_rate": 5.824339088483647e-06, "loss": 0.17884521484375, "step": 25320 }, { "epoch": 0.21897778661663106, "grad_norm": 1.054771843768298, "learning_rate": 5.824270390727031e-06, "loss": 0.054308319091796876, "step": 25325 }, { "epoch": 0.21902102013817434, "grad_norm": 6.549423997790667, "learning_rate": 5.8242016799451294e-06, "loss": 0.2103424072265625, "step": 25330 }, { "epoch": 0.2190642536597176, "grad_norm": 5.954255588089411, "learning_rate": 5.82413295613826e-06, "loss": 0.16848182678222656, "step": 25335 }, { "epoch": 0.21910748718126086, "grad_norm": 2.814033778260658, "learning_rate": 5.824064219306738e-06, "loss": 0.07877960205078124, "step": 25340 }, { "epoch": 0.2191507207028041, "grad_norm": 14.058507938316207, "learning_rate": 5.823995469450882e-06, "loss": 0.10598983764648437, "step": 25345 }, { "epoch": 0.2191939542243474, "grad_norm": 9.936572703130777, "learning_rate": 5.823926706571007e-06, "loss": 0.11707420349121093, "step": 25350 }, { "epoch": 0.21923718774589065, "grad_norm": 9.250840374322788, "learning_rate": 5.823857930667433e-06, "loss": 0.0815826416015625, "step": 25355 }, { "epoch": 0.2192804212674339, "grad_norm": 20.55219272623324, "learning_rate": 5.823789141740476e-06, "loss": 0.43187103271484373, "step": 25360 }, { "epoch": 0.21932365478897717, "grad_norm": 12.600431981723105, "learning_rate": 5.823720339790452e-06, "loss": 0.16148147583007813, "step": 25365 }, { "epoch": 0.21936688831052045, "grad_norm": 1.2552130799145245, "learning_rate": 5.8236515248176805e-06, "loss": 0.10971412658691407, "step": 25370 }, { "epoch": 0.2194101218320637, "grad_norm": 10.123811622495978, "learning_rate": 5.823582696822475e-06, "loss": 0.29798431396484376, "step": 25375 }, { "epoch": 0.21945335535360697, "grad_norm": 31.948602721018478, "learning_rate": 5.823513855805158e-06, "loss": 0.12159500122070313, "step": 25380 }, { "epoch": 0.21949658887515025, "grad_norm": 14.021085137466027, "learning_rate": 5.823445001766045e-06, "loss": 0.48514556884765625, "step": 25385 }, { "epoch": 0.2195398223966935, "grad_norm": 16.584652367239848, "learning_rate": 5.8233761347054514e-06, "loss": 0.338616943359375, "step": 25390 }, { "epoch": 0.21958305591823676, "grad_norm": 41.90683588920464, "learning_rate": 5.8233072546236975e-06, "loss": 0.5764907836914063, "step": 25395 }, { "epoch": 0.21962628943978002, "grad_norm": 13.245555392286569, "learning_rate": 5.823238361521099e-06, "loss": 0.10927581787109375, "step": 25400 }, { "epoch": 0.2196695229613233, "grad_norm": 9.20641732562667, "learning_rate": 5.823169455397976e-06, "loss": 0.1553131103515625, "step": 25405 }, { "epoch": 0.21971275648286656, "grad_norm": 14.456684291113334, "learning_rate": 5.823100536254645e-06, "loss": 0.0817047119140625, "step": 25410 }, { "epoch": 0.21975599000440982, "grad_norm": 0.33902422255751835, "learning_rate": 5.823031604091423e-06, "loss": 0.09417572021484374, "step": 25415 }, { "epoch": 0.21979922352595307, "grad_norm": 1.2614328308201912, "learning_rate": 5.822962658908629e-06, "loss": 0.06998443603515625, "step": 25420 }, { "epoch": 0.21984245704749636, "grad_norm": 4.019828449479732, "learning_rate": 5.82289370070658e-06, "loss": 0.551904296875, "step": 25425 }, { "epoch": 0.21988569056903962, "grad_norm": 23.100774950056685, "learning_rate": 5.822824729485595e-06, "loss": 0.1706268310546875, "step": 25430 }, { "epoch": 0.21992892409058287, "grad_norm": 8.682483404525843, "learning_rate": 5.822755745245992e-06, "loss": 0.17250518798828124, "step": 25435 }, { "epoch": 0.21997215761212613, "grad_norm": 4.831208450563552, "learning_rate": 5.822686747988089e-06, "loss": 0.1006927490234375, "step": 25440 }, { "epoch": 0.2200153911336694, "grad_norm": 48.975669230193894, "learning_rate": 5.822617737712204e-06, "loss": 0.16958961486816407, "step": 25445 }, { "epoch": 0.22005862465521267, "grad_norm": 1.9139387956143041, "learning_rate": 5.822548714418655e-06, "loss": 0.08058319091796876, "step": 25450 }, { "epoch": 0.22010185817675593, "grad_norm": 2.4567392383683933, "learning_rate": 5.822479678107762e-06, "loss": 0.21885986328125, "step": 25455 }, { "epoch": 0.22014509169829918, "grad_norm": 0.453033247766341, "learning_rate": 5.822410628779842e-06, "loss": 0.5675254821777344, "step": 25460 }, { "epoch": 0.22018832521984247, "grad_norm": 5.02291350353667, "learning_rate": 5.822341566435212e-06, "loss": 0.1246917724609375, "step": 25465 }, { "epoch": 0.22023155874138572, "grad_norm": 3.735089908028627, "learning_rate": 5.822272491074193e-06, "loss": 0.1099639892578125, "step": 25470 }, { "epoch": 0.22027479226292898, "grad_norm": 123.95451850394318, "learning_rate": 5.822203402697102e-06, "loss": 0.2892333984375, "step": 25475 }, { "epoch": 0.22031802578447224, "grad_norm": 2.3054014996813557, "learning_rate": 5.82213430130426e-06, "loss": 0.17143478393554687, "step": 25480 }, { "epoch": 0.22036125930601552, "grad_norm": 15.442014790189921, "learning_rate": 5.822065186895982e-06, "loss": 0.09572906494140625, "step": 25485 }, { "epoch": 0.22040449282755878, "grad_norm": 4.354305595914568, "learning_rate": 5.82199605947259e-06, "loss": 0.211761474609375, "step": 25490 }, { "epoch": 0.22044772634910204, "grad_norm": 34.797183189902626, "learning_rate": 5.821926919034401e-06, "loss": 0.32127685546875, "step": 25495 }, { "epoch": 0.2204909598706453, "grad_norm": 39.47222576418826, "learning_rate": 5.821857765581735e-06, "loss": 0.19878158569335938, "step": 25500 }, { "epoch": 0.22053419339218858, "grad_norm": 2.6222425293244087, "learning_rate": 5.821788599114909e-06, "loss": 0.060845947265625, "step": 25505 }, { "epoch": 0.22057742691373183, "grad_norm": 11.458178296730457, "learning_rate": 5.821719419634245e-06, "loss": 0.1412994384765625, "step": 25510 }, { "epoch": 0.2206206604352751, "grad_norm": 33.02104573462116, "learning_rate": 5.821650227140059e-06, "loss": 0.16356353759765624, "step": 25515 }, { "epoch": 0.22066389395681835, "grad_norm": 4.632432734569488, "learning_rate": 5.821581021632672e-06, "loss": 0.1551116943359375, "step": 25520 }, { "epoch": 0.22070712747836163, "grad_norm": 3.12188533887498, "learning_rate": 5.821511803112403e-06, "loss": 0.45707550048828127, "step": 25525 }, { "epoch": 0.2207503609999049, "grad_norm": 14.761570788372218, "learning_rate": 5.82144257157957e-06, "loss": 0.690728759765625, "step": 25530 }, { "epoch": 0.22079359452144814, "grad_norm": 20.538891612095593, "learning_rate": 5.821373327034494e-06, "loss": 0.34072265625, "step": 25535 }, { "epoch": 0.2208368280429914, "grad_norm": 22.735963991966308, "learning_rate": 5.821304069477493e-06, "loss": 0.15746307373046875, "step": 25540 }, { "epoch": 0.22088006156453469, "grad_norm": 3.1994288018942223, "learning_rate": 5.821234798908887e-06, "loss": 0.3909393310546875, "step": 25545 }, { "epoch": 0.22092329508607794, "grad_norm": 0.48763088736900884, "learning_rate": 5.821165515328996e-06, "loss": 0.12425308227539063, "step": 25550 }, { "epoch": 0.2209665286076212, "grad_norm": 2.4288334131621974, "learning_rate": 5.821096218738138e-06, "loss": 0.09930763244628907, "step": 25555 }, { "epoch": 0.22100976212916446, "grad_norm": 0.4562340042255274, "learning_rate": 5.821026909136634e-06, "loss": 0.23492355346679689, "step": 25560 }, { "epoch": 0.22105299565070774, "grad_norm": 6.1040054946236575, "learning_rate": 5.820957586524803e-06, "loss": 0.4743988037109375, "step": 25565 }, { "epoch": 0.221096229172251, "grad_norm": 1.1185637778821338, "learning_rate": 5.820888250902965e-06, "loss": 0.1766021728515625, "step": 25570 }, { "epoch": 0.22113946269379425, "grad_norm": 2.775651837832494, "learning_rate": 5.82081890227144e-06, "loss": 0.06116218566894531, "step": 25575 }, { "epoch": 0.22118269621533754, "grad_norm": 8.494950964471691, "learning_rate": 5.820749540630546e-06, "loss": 0.225128173828125, "step": 25580 }, { "epoch": 0.2212259297368808, "grad_norm": 51.03417867861632, "learning_rate": 5.820680165980607e-06, "loss": 0.21938552856445312, "step": 25585 }, { "epoch": 0.22126916325842405, "grad_norm": 2.680253317146913, "learning_rate": 5.820610778321938e-06, "loss": 0.06427383422851562, "step": 25590 }, { "epoch": 0.2213123967799673, "grad_norm": 9.412458392435235, "learning_rate": 5.820541377654862e-06, "loss": 0.2517364501953125, "step": 25595 }, { "epoch": 0.2213556303015106, "grad_norm": 3.9996449181631877, "learning_rate": 5.820471963979698e-06, "loss": 0.10043869018554688, "step": 25600 }, { "epoch": 0.22139886382305385, "grad_norm": 1.096845134074595, "learning_rate": 5.820402537296767e-06, "loss": 0.028433990478515626, "step": 25605 }, { "epoch": 0.2214420973445971, "grad_norm": 32.04012528428138, "learning_rate": 5.820333097606389e-06, "loss": 0.3325164794921875, "step": 25610 }, { "epoch": 0.22148533086614036, "grad_norm": 4.167832775025494, "learning_rate": 5.820263644908885e-06, "loss": 0.11171321868896485, "step": 25615 }, { "epoch": 0.22152856438768365, "grad_norm": 2.2778191629174835, "learning_rate": 5.820194179204572e-06, "loss": 0.08509674072265624, "step": 25620 }, { "epoch": 0.2215717979092269, "grad_norm": 5.498013151228639, "learning_rate": 5.820124700493773e-06, "loss": 0.18227691650390626, "step": 25625 }, { "epoch": 0.22161503143077016, "grad_norm": 30.094114625160266, "learning_rate": 5.820055208776809e-06, "loss": 0.4230255126953125, "step": 25630 }, { "epoch": 0.22165826495231342, "grad_norm": 4.519768560246945, "learning_rate": 5.819985704054e-06, "loss": 0.110125732421875, "step": 25635 }, { "epoch": 0.2217014984738567, "grad_norm": 12.492100940964262, "learning_rate": 5.8199161863256656e-06, "loss": 0.38677139282226564, "step": 25640 }, { "epoch": 0.22174473199539996, "grad_norm": 4.290868925114096, "learning_rate": 5.819846655592126e-06, "loss": 0.1357707977294922, "step": 25645 }, { "epoch": 0.22178796551694321, "grad_norm": 11.440172284960756, "learning_rate": 5.819777111853704e-06, "loss": 0.09881134033203125, "step": 25650 }, { "epoch": 0.22183119903848647, "grad_norm": 7.448085615490204, "learning_rate": 5.819707555110718e-06, "loss": 0.06728668212890625, "step": 25655 }, { "epoch": 0.22187443256002976, "grad_norm": 14.220096903943352, "learning_rate": 5.819637985363491e-06, "loss": 0.31525459289550783, "step": 25660 }, { "epoch": 0.221917666081573, "grad_norm": 13.984014271234534, "learning_rate": 5.819568402612342e-06, "loss": 0.110235595703125, "step": 25665 }, { "epoch": 0.22196089960311627, "grad_norm": 52.093265322452055, "learning_rate": 5.819498806857593e-06, "loss": 0.5665817260742188, "step": 25670 }, { "epoch": 0.22200413312465953, "grad_norm": 12.777533169004027, "learning_rate": 5.8194291980995644e-06, "loss": 0.09911956787109374, "step": 25675 }, { "epoch": 0.2220473666462028, "grad_norm": 1.6943189631426578, "learning_rate": 5.819359576338578e-06, "loss": 0.14105224609375, "step": 25680 }, { "epoch": 0.22209060016774607, "grad_norm": 32.75117139487882, "learning_rate": 5.819289941574954e-06, "loss": 0.32725982666015624, "step": 25685 }, { "epoch": 0.22213383368928932, "grad_norm": 55.84720975325476, "learning_rate": 5.819220293809013e-06, "loss": 0.23580780029296874, "step": 25690 }, { "epoch": 0.22217706721083258, "grad_norm": 47.89613452703918, "learning_rate": 5.819150633041079e-06, "loss": 0.245965576171875, "step": 25695 }, { "epoch": 0.22222030073237586, "grad_norm": 12.313983215518602, "learning_rate": 5.81908095927147e-06, "loss": 0.189239501953125, "step": 25700 }, { "epoch": 0.22226353425391912, "grad_norm": 6.048909713568842, "learning_rate": 5.819011272500509e-06, "loss": 0.1217559814453125, "step": 25705 }, { "epoch": 0.22230676777546238, "grad_norm": 1.9824932569593599, "learning_rate": 5.818941572728518e-06, "loss": 0.1638120651245117, "step": 25710 }, { "epoch": 0.22235000129700563, "grad_norm": 2.2795767937064366, "learning_rate": 5.8188718599558165e-06, "loss": 0.2288970947265625, "step": 25715 }, { "epoch": 0.22239323481854892, "grad_norm": 9.699295586726702, "learning_rate": 5.818802134182727e-06, "loss": 0.178985595703125, "step": 25720 }, { "epoch": 0.22243646834009217, "grad_norm": 3.18579552971348, "learning_rate": 5.8187323954095715e-06, "loss": 0.11405715942382813, "step": 25725 }, { "epoch": 0.22247970186163543, "grad_norm": 15.727331371379304, "learning_rate": 5.8186626436366715e-06, "loss": 0.11202392578125, "step": 25730 }, { "epoch": 0.2225229353831787, "grad_norm": 33.094911980782896, "learning_rate": 5.818592878864348e-06, "loss": 0.290692138671875, "step": 25735 }, { "epoch": 0.22256616890472197, "grad_norm": 8.132323468520164, "learning_rate": 5.818523101092923e-06, "loss": 0.14851226806640624, "step": 25740 }, { "epoch": 0.22260940242626523, "grad_norm": 0.9102303757361114, "learning_rate": 5.818453310322719e-06, "loss": 0.0315093994140625, "step": 25745 }, { "epoch": 0.22265263594780849, "grad_norm": 26.82093048972473, "learning_rate": 5.818383506554058e-06, "loss": 0.34085540771484374, "step": 25750 }, { "epoch": 0.22269586946935177, "grad_norm": 0.6500002086242429, "learning_rate": 5.81831368978726e-06, "loss": 0.1109466552734375, "step": 25755 }, { "epoch": 0.22273910299089503, "grad_norm": 3.4229962822319595, "learning_rate": 5.818243860022649e-06, "loss": 0.24886474609375, "step": 25760 }, { "epoch": 0.22278233651243828, "grad_norm": 0.8914194305444733, "learning_rate": 5.818174017260545e-06, "loss": 0.04273300170898438, "step": 25765 }, { "epoch": 0.22282557003398154, "grad_norm": 4.434460882258766, "learning_rate": 5.818104161501274e-06, "loss": 0.06710357666015625, "step": 25770 }, { "epoch": 0.22286880355552482, "grad_norm": 1.1926066453801976, "learning_rate": 5.818034292745154e-06, "loss": 0.08885040283203124, "step": 25775 }, { "epoch": 0.22291203707706808, "grad_norm": 4.4863562428338675, "learning_rate": 5.817964410992509e-06, "loss": 0.1502716064453125, "step": 25780 }, { "epoch": 0.22295527059861134, "grad_norm": 11.30573938822315, "learning_rate": 5.8178945162436616e-06, "loss": 0.1083749771118164, "step": 25785 }, { "epoch": 0.2229985041201546, "grad_norm": 6.163594681252614, "learning_rate": 5.817824608498933e-06, "loss": 0.06976184844970704, "step": 25790 }, { "epoch": 0.22304173764169788, "grad_norm": 0.9257092856829112, "learning_rate": 5.817754687758647e-06, "loss": 0.11743392944335937, "step": 25795 }, { "epoch": 0.22308497116324114, "grad_norm": 53.3888551693154, "learning_rate": 5.817684754023124e-06, "loss": 0.40719146728515626, "step": 25800 }, { "epoch": 0.2231282046847844, "grad_norm": 27.41813958595524, "learning_rate": 5.81761480729269e-06, "loss": 0.31671142578125, "step": 25805 }, { "epoch": 0.22317143820632765, "grad_norm": 7.905451746265885, "learning_rate": 5.817544847567663e-06, "loss": 0.09303131103515624, "step": 25810 }, { "epoch": 0.22321467172787093, "grad_norm": 0.6329248839852332, "learning_rate": 5.8174748748483695e-06, "loss": 0.25113601684570314, "step": 25815 }, { "epoch": 0.2232579052494142, "grad_norm": 6.9135440084949, "learning_rate": 5.817404889135132e-06, "loss": 0.34400634765625, "step": 25820 }, { "epoch": 0.22330113877095745, "grad_norm": 23.83103129911678, "learning_rate": 5.81733489042827e-06, "loss": 0.4794708251953125, "step": 25825 }, { "epoch": 0.2233443722925007, "grad_norm": 45.74691438137485, "learning_rate": 5.8172648787281096e-06, "loss": 0.22309112548828125, "step": 25830 }, { "epoch": 0.223387605814044, "grad_norm": 2.515322409956157, "learning_rate": 5.8171948540349724e-06, "loss": 0.027776336669921874, "step": 25835 }, { "epoch": 0.22343083933558724, "grad_norm": 0.4531160741866392, "learning_rate": 5.817124816349181e-06, "loss": 0.1198089599609375, "step": 25840 }, { "epoch": 0.2234740728571305, "grad_norm": 7.063913950631645, "learning_rate": 5.81705476567106e-06, "loss": 0.05534820556640625, "step": 25845 }, { "epoch": 0.22351730637867376, "grad_norm": 6.851977155172563, "learning_rate": 5.81698470200093e-06, "loss": 0.118353271484375, "step": 25850 }, { "epoch": 0.22356053990021704, "grad_norm": 27.32928859131286, "learning_rate": 5.816914625339116e-06, "loss": 0.06677322387695313, "step": 25855 }, { "epoch": 0.2236037734217603, "grad_norm": 21.24097738784261, "learning_rate": 5.816844535685941e-06, "loss": 0.240924072265625, "step": 25860 }, { "epoch": 0.22364700694330356, "grad_norm": 5.031520500856941, "learning_rate": 5.816774433041727e-06, "loss": 0.21784210205078125, "step": 25865 }, { "epoch": 0.2236902404648468, "grad_norm": 0.1792073524572208, "learning_rate": 5.816704317406799e-06, "loss": 0.1857757568359375, "step": 25870 }, { "epoch": 0.2237334739863901, "grad_norm": 9.491121720377338, "learning_rate": 5.816634188781479e-06, "loss": 0.070050048828125, "step": 25875 }, { "epoch": 0.22377670750793335, "grad_norm": 17.92423150315839, "learning_rate": 5.816564047166091e-06, "loss": 0.23959426879882811, "step": 25880 }, { "epoch": 0.2238199410294766, "grad_norm": 11.18380127289862, "learning_rate": 5.816493892560959e-06, "loss": 0.08887958526611328, "step": 25885 }, { "epoch": 0.22386317455101987, "grad_norm": 4.418972132000027, "learning_rate": 5.816423724966406e-06, "loss": 0.1209259033203125, "step": 25890 }, { "epoch": 0.22390640807256315, "grad_norm": 21.006934199288615, "learning_rate": 5.8163535443827555e-06, "loss": 0.21713485717773437, "step": 25895 }, { "epoch": 0.2239496415941064, "grad_norm": 3.2885295110145916, "learning_rate": 5.8162833508103315e-06, "loss": 0.2422210693359375, "step": 25900 }, { "epoch": 0.22399287511564966, "grad_norm": 1.2282567287754893, "learning_rate": 5.816213144249457e-06, "loss": 0.07576980590820312, "step": 25905 }, { "epoch": 0.22403610863719292, "grad_norm": 1.630802149298134, "learning_rate": 5.816142924700457e-06, "loss": 0.1951568603515625, "step": 25910 }, { "epoch": 0.2240793421587362, "grad_norm": 4.501156632526596, "learning_rate": 5.816072692163654e-06, "loss": 0.15830574035644532, "step": 25915 }, { "epoch": 0.22412257568027946, "grad_norm": 12.05749372677869, "learning_rate": 5.816002446639373e-06, "loss": 0.20311279296875, "step": 25920 }, { "epoch": 0.22416580920182272, "grad_norm": 8.475660820228738, "learning_rate": 5.8159321881279375e-06, "loss": 0.22588653564453126, "step": 25925 }, { "epoch": 0.22420904272336598, "grad_norm": 4.586026044099138, "learning_rate": 5.815861916629672e-06, "loss": 0.10192413330078125, "step": 25930 }, { "epoch": 0.22425227624490926, "grad_norm": 3.0254529827261565, "learning_rate": 5.8157916321449e-06, "loss": 0.15615310668945312, "step": 25935 }, { "epoch": 0.22429550976645252, "grad_norm": 20.823704316669662, "learning_rate": 5.815721334673945e-06, "loss": 0.10272789001464844, "step": 25940 }, { "epoch": 0.22433874328799577, "grad_norm": 0.06684049238170213, "learning_rate": 5.815651024217132e-06, "loss": 0.10966873168945312, "step": 25945 }, { "epoch": 0.22438197680953906, "grad_norm": 1.527936533033905, "learning_rate": 5.815580700774786e-06, "loss": 0.07327785491943359, "step": 25950 }, { "epoch": 0.22442521033108231, "grad_norm": 11.001681873998857, "learning_rate": 5.8155103643472305e-06, "loss": 0.5255950927734375, "step": 25955 }, { "epoch": 0.22446844385262557, "grad_norm": 17.42544961215378, "learning_rate": 5.8154400149347906e-06, "loss": 0.21298904418945314, "step": 25960 }, { "epoch": 0.22451167737416883, "grad_norm": 74.65055408600425, "learning_rate": 5.815369652537789e-06, "loss": 0.6133228302001953, "step": 25965 }, { "epoch": 0.2245549108957121, "grad_norm": 4.704759476283139, "learning_rate": 5.815299277156551e-06, "loss": 0.03263378143310547, "step": 25970 }, { "epoch": 0.22459814441725537, "grad_norm": 4.013728445967779, "learning_rate": 5.815228888791402e-06, "loss": 0.0640228271484375, "step": 25975 }, { "epoch": 0.22464137793879863, "grad_norm": 19.910411060820962, "learning_rate": 5.815158487442667e-06, "loss": 0.17318115234375, "step": 25980 }, { "epoch": 0.22468461146034188, "grad_norm": 10.513496721878411, "learning_rate": 5.815088073110669e-06, "loss": 0.0929718017578125, "step": 25985 }, { "epoch": 0.22472784498188517, "grad_norm": 0.2032204487625674, "learning_rate": 5.815017645795733e-06, "loss": 0.34031028747558595, "step": 25990 }, { "epoch": 0.22477107850342842, "grad_norm": 10.694735670382666, "learning_rate": 5.814947205498186e-06, "loss": 0.16570968627929689, "step": 25995 }, { "epoch": 0.22481431202497168, "grad_norm": 41.57473412486229, "learning_rate": 5.814876752218349e-06, "loss": 0.17954788208007813, "step": 26000 }, { "epoch": 0.22485754554651494, "grad_norm": 35.55652833108414, "learning_rate": 5.814806285956551e-06, "loss": 0.5811607360839843, "step": 26005 }, { "epoch": 0.22490077906805822, "grad_norm": 28.387766646471704, "learning_rate": 5.8147358067131146e-06, "loss": 0.371295166015625, "step": 26010 }, { "epoch": 0.22494401258960148, "grad_norm": 2.1357570076899215, "learning_rate": 5.814665314488365e-06, "loss": 0.2024658203125, "step": 26015 }, { "epoch": 0.22498724611114473, "grad_norm": 91.11224384360638, "learning_rate": 5.814594809282629e-06, "loss": 0.15553131103515624, "step": 26020 }, { "epoch": 0.225030479632688, "grad_norm": 0.9715050859238215, "learning_rate": 5.814524291096229e-06, "loss": 0.2846771240234375, "step": 26025 }, { "epoch": 0.22507371315423128, "grad_norm": 6.868017387719313, "learning_rate": 5.814453759929492e-06, "loss": 0.19093170166015624, "step": 26030 }, { "epoch": 0.22511694667577453, "grad_norm": 8.824453719338047, "learning_rate": 5.8143832157827435e-06, "loss": 0.2754100799560547, "step": 26035 }, { "epoch": 0.2251601801973178, "grad_norm": 5.448413562013261, "learning_rate": 5.814312658656307e-06, "loss": 0.3206596374511719, "step": 26040 }, { "epoch": 0.22520341371886105, "grad_norm": 11.410767935291332, "learning_rate": 5.814242088550511e-06, "loss": 0.12416534423828125, "step": 26045 }, { "epoch": 0.22524664724040433, "grad_norm": 7.629773395624566, "learning_rate": 5.814171505465678e-06, "loss": 0.08182449340820312, "step": 26050 }, { "epoch": 0.2252898807619476, "grad_norm": 0.5033521550653207, "learning_rate": 5.8141009094021346e-06, "loss": 0.297601318359375, "step": 26055 }, { "epoch": 0.22533311428349084, "grad_norm": 2.7061438989013187, "learning_rate": 5.814030300360207e-06, "loss": 0.22818603515625, "step": 26060 }, { "epoch": 0.2253763478050341, "grad_norm": 0.397297649389728, "learning_rate": 5.8139596783402195e-06, "loss": 0.202301025390625, "step": 26065 }, { "epoch": 0.22541958132657738, "grad_norm": 63.83386720367278, "learning_rate": 5.813889043342498e-06, "loss": 0.17428970336914062, "step": 26070 }, { "epoch": 0.22546281484812064, "grad_norm": 24.303029366674707, "learning_rate": 5.813818395367371e-06, "loss": 0.3463165283203125, "step": 26075 }, { "epoch": 0.2255060483696639, "grad_norm": 7.737536813876754, "learning_rate": 5.813747734415161e-06, "loss": 0.527728271484375, "step": 26080 }, { "epoch": 0.22554928189120715, "grad_norm": 7.630592762489024, "learning_rate": 5.813677060486195e-06, "loss": 0.1810527801513672, "step": 26085 }, { "epoch": 0.22559251541275044, "grad_norm": 7.5116787287842355, "learning_rate": 5.8136063735808e-06, "loss": 0.07487030029296875, "step": 26090 }, { "epoch": 0.2256357489342937, "grad_norm": 26.008270244717995, "learning_rate": 5.8135356736993e-06, "loss": 0.14433364868164061, "step": 26095 }, { "epoch": 0.22567898245583695, "grad_norm": 1.5190842910943119, "learning_rate": 5.813464960842022e-06, "loss": 0.1260528564453125, "step": 26100 }, { "epoch": 0.2257222159773802, "grad_norm": 38.03894962113632, "learning_rate": 5.813394235009293e-06, "loss": 0.29230194091796874, "step": 26105 }, { "epoch": 0.2257654494989235, "grad_norm": 2.3874022804801704, "learning_rate": 5.8133234962014376e-06, "loss": 0.06909942626953125, "step": 26110 }, { "epoch": 0.22580868302046675, "grad_norm": 6.180371317209079, "learning_rate": 5.813252744418784e-06, "loss": 0.2550384521484375, "step": 26115 }, { "epoch": 0.22585191654201, "grad_norm": 14.68436602297059, "learning_rate": 5.813181979661657e-06, "loss": 0.10644073486328125, "step": 26120 }, { "epoch": 0.2258951500635533, "grad_norm": 11.946010402429554, "learning_rate": 5.8131112019303824e-06, "loss": 0.0659576416015625, "step": 26125 }, { "epoch": 0.22593838358509655, "grad_norm": 0.7158439686276798, "learning_rate": 5.813040411225287e-06, "loss": 0.0959503173828125, "step": 26130 }, { "epoch": 0.2259816171066398, "grad_norm": 11.177638246275684, "learning_rate": 5.812969607546699e-06, "loss": 0.11246261596679688, "step": 26135 }, { "epoch": 0.22602485062818306, "grad_norm": 38.74231065946183, "learning_rate": 5.8128987908949444e-06, "loss": 0.30489501953125, "step": 26140 }, { "epoch": 0.22606808414972634, "grad_norm": 1.9794234836730003, "learning_rate": 5.812827961270348e-06, "loss": 0.0990966796875, "step": 26145 }, { "epoch": 0.2261113176712696, "grad_norm": 6.191424209746124, "learning_rate": 5.812757118673237e-06, "loss": 0.3074455261230469, "step": 26150 }, { "epoch": 0.22615455119281286, "grad_norm": 18.81984094871375, "learning_rate": 5.8126862631039395e-06, "loss": 0.36279754638671874, "step": 26155 }, { "epoch": 0.22619778471435611, "grad_norm": 11.880189460429438, "learning_rate": 5.812615394562781e-06, "loss": 0.1857208251953125, "step": 26160 }, { "epoch": 0.2262410182358994, "grad_norm": 17.011446615073723, "learning_rate": 5.81254451305009e-06, "loss": 0.22405548095703126, "step": 26165 }, { "epoch": 0.22628425175744266, "grad_norm": 38.908733990704235, "learning_rate": 5.812473618566191e-06, "loss": 0.19219026565551758, "step": 26170 }, { "epoch": 0.2263274852789859, "grad_norm": 12.032591662025974, "learning_rate": 5.8124027111114134e-06, "loss": 0.37847900390625, "step": 26175 }, { "epoch": 0.22637071880052917, "grad_norm": 11.012730005202968, "learning_rate": 5.812331790686083e-06, "loss": 0.178515625, "step": 26180 }, { "epoch": 0.22641395232207245, "grad_norm": 16.18499112990027, "learning_rate": 5.8122608572905255e-06, "loss": 0.4275848388671875, "step": 26185 }, { "epoch": 0.2264571858436157, "grad_norm": 16.20455867992363, "learning_rate": 5.812189910925069e-06, "loss": 0.11394805908203125, "step": 26190 }, { "epoch": 0.22650041936515897, "grad_norm": 8.066351632390004, "learning_rate": 5.812118951590043e-06, "loss": 0.1696075439453125, "step": 26195 }, { "epoch": 0.22654365288670222, "grad_norm": 23.12586257060927, "learning_rate": 5.812047979285772e-06, "loss": 0.129571533203125, "step": 26200 }, { "epoch": 0.2265868864082455, "grad_norm": 17.911074125858445, "learning_rate": 5.811976994012584e-06, "loss": 0.10953369140625, "step": 26205 }, { "epoch": 0.22663011992978876, "grad_norm": 17.57067748289049, "learning_rate": 5.811905995770808e-06, "loss": 0.20863800048828124, "step": 26210 }, { "epoch": 0.22667335345133202, "grad_norm": 4.379329246191548, "learning_rate": 5.811834984560768e-06, "loss": 0.22965774536132813, "step": 26215 }, { "epoch": 0.22671658697287528, "grad_norm": 11.307648570702892, "learning_rate": 5.811763960382794e-06, "loss": 0.21675262451171876, "step": 26220 }, { "epoch": 0.22675982049441856, "grad_norm": 45.259250795274376, "learning_rate": 5.811692923237214e-06, "loss": 0.332373046875, "step": 26225 }, { "epoch": 0.22680305401596182, "grad_norm": 4.9586596087419235, "learning_rate": 5.811621873124354e-06, "loss": 0.179193115234375, "step": 26230 }, { "epoch": 0.22684628753750508, "grad_norm": 9.905924849518794, "learning_rate": 5.8115508100445425e-06, "loss": 0.27417755126953125, "step": 26235 }, { "epoch": 0.22688952105904833, "grad_norm": 10.714909333522181, "learning_rate": 5.8114797339981074e-06, "loss": 0.176416015625, "step": 26240 }, { "epoch": 0.22693275458059162, "grad_norm": 8.130227443848506, "learning_rate": 5.811408644985376e-06, "loss": 0.06959514617919922, "step": 26245 }, { "epoch": 0.22697598810213487, "grad_norm": 0.7997662212402546, "learning_rate": 5.8113375430066756e-06, "loss": 0.5541046142578125, "step": 26250 }, { "epoch": 0.22701922162367813, "grad_norm": 6.088861790047627, "learning_rate": 5.811266428062336e-06, "loss": 0.227337646484375, "step": 26255 }, { "epoch": 0.2270624551452214, "grad_norm": 2.800480264090222, "learning_rate": 5.811195300152683e-06, "loss": 0.06504974365234376, "step": 26260 }, { "epoch": 0.22710568866676467, "grad_norm": 6.165753725374706, "learning_rate": 5.811124159278046e-06, "loss": 0.09599075317382813, "step": 26265 }, { "epoch": 0.22714892218830793, "grad_norm": 5.255681206523729, "learning_rate": 5.8110530054387536e-06, "loss": 0.11220703125, "step": 26270 }, { "epoch": 0.22719215570985118, "grad_norm": 30.012539187609715, "learning_rate": 5.810981838635133e-06, "loss": 0.16846923828125, "step": 26275 }, { "epoch": 0.22723538923139444, "grad_norm": 0.6731743159447247, "learning_rate": 5.810910658867512e-06, "loss": 0.03770904541015625, "step": 26280 }, { "epoch": 0.22727862275293773, "grad_norm": 8.361909508709562, "learning_rate": 5.810839466136219e-06, "loss": 0.205035400390625, "step": 26285 }, { "epoch": 0.22732185627448098, "grad_norm": 4.603702043898383, "learning_rate": 5.810768260441584e-06, "loss": 0.164599609375, "step": 26290 }, { "epoch": 0.22736508979602424, "grad_norm": 10.706487486414492, "learning_rate": 5.810697041783932e-06, "loss": 0.137261962890625, "step": 26295 }, { "epoch": 0.2274083233175675, "grad_norm": 22.526427464932492, "learning_rate": 5.810625810163595e-06, "loss": 0.240185546875, "step": 26300 }, { "epoch": 0.22745155683911078, "grad_norm": 26.598663945472868, "learning_rate": 5.8105545655809004e-06, "loss": 0.18484420776367189, "step": 26305 }, { "epoch": 0.22749479036065404, "grad_norm": 7.481282540540808, "learning_rate": 5.8104833080361765e-06, "loss": 0.34638671875, "step": 26310 }, { "epoch": 0.2275380238821973, "grad_norm": 0.5856537363230181, "learning_rate": 5.810412037529751e-06, "loss": 0.20369949340820312, "step": 26315 }, { "epoch": 0.22758125740374058, "grad_norm": 5.55529686614856, "learning_rate": 5.810340754061955e-06, "loss": 0.080560302734375, "step": 26320 }, { "epoch": 0.22762449092528383, "grad_norm": 0.9456102885697579, "learning_rate": 5.8102694576331145e-06, "loss": 0.300506591796875, "step": 26325 }, { "epoch": 0.2276677244468271, "grad_norm": 2.1054551640454244, "learning_rate": 5.81019814824356e-06, "loss": 0.403082275390625, "step": 26330 }, { "epoch": 0.22771095796837035, "grad_norm": 4.590139307234638, "learning_rate": 5.8101268258936205e-06, "loss": 0.265673828125, "step": 26335 }, { "epoch": 0.22775419148991363, "grad_norm": 41.01935125485195, "learning_rate": 5.810055490583623e-06, "loss": 0.37776947021484375, "step": 26340 }, { "epoch": 0.2277974250114569, "grad_norm": 24.18011754227653, "learning_rate": 5.809984142313899e-06, "loss": 0.2127166748046875, "step": 26345 }, { "epoch": 0.22784065853300015, "grad_norm": 3.058662070325427, "learning_rate": 5.809912781084777e-06, "loss": 0.14519195556640624, "step": 26350 }, { "epoch": 0.2278838920545434, "grad_norm": 24.357487308379408, "learning_rate": 5.809841406896583e-06, "loss": 0.238201904296875, "step": 26355 }, { "epoch": 0.2279271255760867, "grad_norm": 3.5434259549277276, "learning_rate": 5.809770019749651e-06, "loss": 0.095849609375, "step": 26360 }, { "epoch": 0.22797035909762994, "grad_norm": 7.470391962143171, "learning_rate": 5.809698619644307e-06, "loss": 0.32177581787109377, "step": 26365 }, { "epoch": 0.2280135926191732, "grad_norm": 1.4464626020268683, "learning_rate": 5.809627206580882e-06, "loss": 0.2712047576904297, "step": 26370 }, { "epoch": 0.22805682614071646, "grad_norm": 3.8664506001674117, "learning_rate": 5.809555780559704e-06, "loss": 0.3270263671875, "step": 26375 }, { "epoch": 0.22810005966225974, "grad_norm": 3.1684753728622197, "learning_rate": 5.809484341581103e-06, "loss": 0.14888916015625, "step": 26380 }, { "epoch": 0.228143293183803, "grad_norm": 3.8131124731932973, "learning_rate": 5.809412889645408e-06, "loss": 0.04997100830078125, "step": 26385 }, { "epoch": 0.22818652670534625, "grad_norm": 2.56960256131103, "learning_rate": 5.80934142475295e-06, "loss": 0.06124858856201172, "step": 26390 }, { "epoch": 0.2282297602268895, "grad_norm": 9.79671578190334, "learning_rate": 5.809269946904057e-06, "loss": 0.411553955078125, "step": 26395 }, { "epoch": 0.2282729937484328, "grad_norm": 25.11913851121071, "learning_rate": 5.809198456099059e-06, "loss": 0.46378936767578127, "step": 26400 }, { "epoch": 0.22831622726997605, "grad_norm": 75.37326249689461, "learning_rate": 5.809126952338287e-06, "loss": 0.7716796875, "step": 26405 }, { "epoch": 0.2283594607915193, "grad_norm": 45.15560395251805, "learning_rate": 5.80905543562207e-06, "loss": 0.37069091796875, "step": 26410 }, { "epoch": 0.22840269431306257, "grad_norm": 6.266904493091642, "learning_rate": 5.808983905950736e-06, "loss": 0.1662994384765625, "step": 26415 }, { "epoch": 0.22844592783460585, "grad_norm": 5.432171377291413, "learning_rate": 5.8089123633246165e-06, "loss": 0.06753273010253906, "step": 26420 }, { "epoch": 0.2284891613561491, "grad_norm": 3.5711634920257946, "learning_rate": 5.808840807744043e-06, "loss": 0.3549102783203125, "step": 26425 }, { "epoch": 0.22853239487769236, "grad_norm": 1.1968335292860006, "learning_rate": 5.808769239209343e-06, "loss": 0.345587158203125, "step": 26430 }, { "epoch": 0.22857562839923562, "grad_norm": 6.2567617755936675, "learning_rate": 5.808697657720846e-06, "loss": 0.1304473876953125, "step": 26435 }, { "epoch": 0.2286188619207789, "grad_norm": 29.952039291944374, "learning_rate": 5.8086260632788856e-06, "loss": 0.15886573791503905, "step": 26440 }, { "epoch": 0.22866209544232216, "grad_norm": 6.312812688205817, "learning_rate": 5.8085544558837885e-06, "loss": 0.18624420166015626, "step": 26445 }, { "epoch": 0.22870532896386542, "grad_norm": 25.13287278551407, "learning_rate": 5.808482835535888e-06, "loss": 0.20149993896484375, "step": 26450 }, { "epoch": 0.22874856248540867, "grad_norm": 5.2751670521020015, "learning_rate": 5.8084112022355115e-06, "loss": 0.15145263671875, "step": 26455 }, { "epoch": 0.22879179600695196, "grad_norm": 0.7095409165183224, "learning_rate": 5.8083395559829914e-06, "loss": 0.08066024780273437, "step": 26460 }, { "epoch": 0.22883502952849522, "grad_norm": 15.844321781169272, "learning_rate": 5.808267896778657e-06, "loss": 0.5864494323730469, "step": 26465 }, { "epoch": 0.22887826305003847, "grad_norm": 4.071040191858094, "learning_rate": 5.80819622462284e-06, "loss": 0.1004364013671875, "step": 26470 }, { "epoch": 0.22892149657158173, "grad_norm": 2.534723686106914, "learning_rate": 5.808124539515869e-06, "loss": 0.3058921813964844, "step": 26475 }, { "epoch": 0.228964730093125, "grad_norm": 18.04430723568588, "learning_rate": 5.808052841458076e-06, "loss": 0.2623046875, "step": 26480 }, { "epoch": 0.22900796361466827, "grad_norm": 5.699098013428692, "learning_rate": 5.8079811304497915e-06, "loss": 0.10789108276367188, "step": 26485 }, { "epoch": 0.22905119713621153, "grad_norm": 47.89140729013285, "learning_rate": 5.807909406491346e-06, "loss": 0.31783294677734375, "step": 26490 }, { "epoch": 0.2290944306577548, "grad_norm": 0.5164301951054029, "learning_rate": 5.807837669583071e-06, "loss": 0.3527069091796875, "step": 26495 }, { "epoch": 0.22913766417929807, "grad_norm": 1.6222695928405593, "learning_rate": 5.807765919725297e-06, "loss": 0.16116485595703126, "step": 26500 }, { "epoch": 0.22918089770084132, "grad_norm": 13.87511699935762, "learning_rate": 5.807694156918354e-06, "loss": 0.06410369873046876, "step": 26505 }, { "epoch": 0.22922413122238458, "grad_norm": 20.62983792457027, "learning_rate": 5.807622381162574e-06, "loss": 0.144134521484375, "step": 26510 }, { "epoch": 0.22926736474392787, "grad_norm": 2.8273782776891268, "learning_rate": 5.807550592458288e-06, "loss": 0.1673675537109375, "step": 26515 }, { "epoch": 0.22931059826547112, "grad_norm": 8.936026838400984, "learning_rate": 5.807478790805826e-06, "loss": 0.111163330078125, "step": 26520 }, { "epoch": 0.22935383178701438, "grad_norm": 1.859386890402599, "learning_rate": 5.80740697620552e-06, "loss": 0.18479537963867188, "step": 26525 }, { "epoch": 0.22939706530855763, "grad_norm": 5.236169392851561, "learning_rate": 5.807335148657701e-06, "loss": 0.3483551025390625, "step": 26530 }, { "epoch": 0.22944029883010092, "grad_norm": 2.046640255889789, "learning_rate": 5.8072633081627e-06, "loss": 0.28349151611328127, "step": 26535 }, { "epoch": 0.22948353235164418, "grad_norm": 8.163905175201558, "learning_rate": 5.807191454720849e-06, "loss": 0.1284637451171875, "step": 26540 }, { "epoch": 0.22952676587318743, "grad_norm": 2.383519481717226, "learning_rate": 5.80711958833248e-06, "loss": 0.0484527587890625, "step": 26545 }, { "epoch": 0.2295699993947307, "grad_norm": 36.14061023953904, "learning_rate": 5.807047708997923e-06, "loss": 0.279498291015625, "step": 26550 }, { "epoch": 0.22961323291627397, "grad_norm": 8.99591068702847, "learning_rate": 5.806975816717511e-06, "loss": 0.2334930419921875, "step": 26555 }, { "epoch": 0.22965646643781723, "grad_norm": 8.601202661891618, "learning_rate": 5.806903911491573e-06, "loss": 0.20201416015625, "step": 26560 }, { "epoch": 0.2296996999593605, "grad_norm": 45.12500039190703, "learning_rate": 5.806831993320441e-06, "loss": 0.26514663696289065, "step": 26565 }, { "epoch": 0.22974293348090374, "grad_norm": 15.907616079386392, "learning_rate": 5.806760062204451e-06, "loss": 0.06954307556152343, "step": 26570 }, { "epoch": 0.22978616700244703, "grad_norm": 1.3159223396050792, "learning_rate": 5.80668811814393e-06, "loss": 0.08641700744628907, "step": 26575 }, { "epoch": 0.22982940052399028, "grad_norm": 0.4815453368045794, "learning_rate": 5.806616161139211e-06, "loss": 0.35244140625, "step": 26580 }, { "epoch": 0.22987263404553354, "grad_norm": 7.9220965463542425, "learning_rate": 5.806544191190627e-06, "loss": 0.17517776489257814, "step": 26585 }, { "epoch": 0.2299158675670768, "grad_norm": 16.751599537746877, "learning_rate": 5.806472208298509e-06, "loss": 0.25699005126953123, "step": 26590 }, { "epoch": 0.22995910108862008, "grad_norm": 47.08619237201414, "learning_rate": 5.8064002124631885e-06, "loss": 0.3747589111328125, "step": 26595 }, { "epoch": 0.23000233461016334, "grad_norm": 26.723294667896262, "learning_rate": 5.806328203684999e-06, "loss": 0.327008056640625, "step": 26600 }, { "epoch": 0.2300455681317066, "grad_norm": 0.5798465174325695, "learning_rate": 5.806256181964271e-06, "loss": 0.113250732421875, "step": 26605 }, { "epoch": 0.23008880165324985, "grad_norm": 0.7042237713995043, "learning_rate": 5.8061841473013385e-06, "loss": 0.203167724609375, "step": 26610 }, { "epoch": 0.23013203517479314, "grad_norm": 1.831778799065678, "learning_rate": 5.806112099696532e-06, "loss": 0.14128570556640624, "step": 26615 }, { "epoch": 0.2301752686963364, "grad_norm": 3.860253179775625, "learning_rate": 5.806040039150184e-06, "loss": 0.150543212890625, "step": 26620 }, { "epoch": 0.23021850221787965, "grad_norm": 5.30956479617345, "learning_rate": 5.8059679656626285e-06, "loss": 0.3985107421875, "step": 26625 }, { "epoch": 0.2302617357394229, "grad_norm": 20.62795955624525, "learning_rate": 5.805895879234196e-06, "loss": 0.19256744384765626, "step": 26630 }, { "epoch": 0.2303049692609662, "grad_norm": 4.714857008825083, "learning_rate": 5.80582377986522e-06, "loss": 0.10848426818847656, "step": 26635 }, { "epoch": 0.23034820278250945, "grad_norm": 59.42565260493981, "learning_rate": 5.805751667556032e-06, "loss": 0.2794486999511719, "step": 26640 }, { "epoch": 0.2303914363040527, "grad_norm": 52.004391512312615, "learning_rate": 5.8056795423069654e-06, "loss": 0.2824138641357422, "step": 26645 }, { "epoch": 0.23043466982559596, "grad_norm": 0.24167551811679444, "learning_rate": 5.8056074041183535e-06, "loss": 0.16840858459472657, "step": 26650 }, { "epoch": 0.23047790334713925, "grad_norm": 2.577354238852904, "learning_rate": 5.805535252990527e-06, "loss": 0.3075916290283203, "step": 26655 }, { "epoch": 0.2305211368686825, "grad_norm": 1.39189716243132, "learning_rate": 5.80546308892382e-06, "loss": 0.22451934814453126, "step": 26660 }, { "epoch": 0.23056437039022576, "grad_norm": 21.34309365465928, "learning_rate": 5.805390911918566e-06, "loss": 0.306298828125, "step": 26665 }, { "epoch": 0.23060760391176902, "grad_norm": 35.13041182173666, "learning_rate": 5.8053187219750965e-06, "loss": 0.36636962890625, "step": 26670 }, { "epoch": 0.2306508374333123, "grad_norm": 30.685577470237106, "learning_rate": 5.805246519093744e-06, "loss": 0.33953857421875, "step": 26675 }, { "epoch": 0.23069407095485556, "grad_norm": 9.397143023528955, "learning_rate": 5.805174303274844e-06, "loss": 0.122442626953125, "step": 26680 }, { "epoch": 0.2307373044763988, "grad_norm": 10.053209001061445, "learning_rate": 5.8051020745187274e-06, "loss": 0.262860107421875, "step": 26685 }, { "epoch": 0.2307805379979421, "grad_norm": 2.52645307039583, "learning_rate": 5.805029832825728e-06, "loss": 0.0912109375, "step": 26690 }, { "epoch": 0.23082377151948535, "grad_norm": 14.369387340498129, "learning_rate": 5.804957578196178e-06, "loss": 0.12191162109375, "step": 26695 }, { "epoch": 0.2308670050410286, "grad_norm": 1.643834109105857, "learning_rate": 5.804885310630412e-06, "loss": 0.23928985595703126, "step": 26700 }, { "epoch": 0.23091023856257187, "grad_norm": 17.484706754259076, "learning_rate": 5.804813030128763e-06, "loss": 0.19804763793945312, "step": 26705 }, { "epoch": 0.23095347208411515, "grad_norm": 16.758767252621205, "learning_rate": 5.804740736691565e-06, "loss": 0.1201751708984375, "step": 26710 }, { "epoch": 0.2309967056056584, "grad_norm": 10.835794307733147, "learning_rate": 5.804668430319149e-06, "loss": 0.3552459716796875, "step": 26715 }, { "epoch": 0.23103993912720167, "grad_norm": 37.314027719828374, "learning_rate": 5.804596111011851e-06, "loss": 0.41512908935546877, "step": 26720 }, { "epoch": 0.23108317264874492, "grad_norm": 1.549283371917004, "learning_rate": 5.8045237787700035e-06, "loss": 0.11293411254882812, "step": 26725 }, { "epoch": 0.2311264061702882, "grad_norm": 18.035621576380752, "learning_rate": 5.80445143359394e-06, "loss": 0.35369415283203126, "step": 26730 }, { "epoch": 0.23116963969183146, "grad_norm": 38.743687629958195, "learning_rate": 5.804379075483994e-06, "loss": 0.19017333984375, "step": 26735 }, { "epoch": 0.23121287321337472, "grad_norm": 0.41395752738426034, "learning_rate": 5.8043067044405e-06, "loss": 0.01697959899902344, "step": 26740 }, { "epoch": 0.23125610673491798, "grad_norm": 51.59791663315031, "learning_rate": 5.80423432046379e-06, "loss": 0.20947723388671874, "step": 26745 }, { "epoch": 0.23129934025646126, "grad_norm": 6.399171463612919, "learning_rate": 5.8041619235542e-06, "loss": 0.07149734497070312, "step": 26750 }, { "epoch": 0.23134257377800452, "grad_norm": 11.327484026589705, "learning_rate": 5.804089513712063e-06, "loss": 0.1744251251220703, "step": 26755 }, { "epoch": 0.23138580729954777, "grad_norm": 9.644616328647995, "learning_rate": 5.8040170909377135e-06, "loss": 0.16832237243652343, "step": 26760 }, { "epoch": 0.23142904082109103, "grad_norm": 23.795883167635772, "learning_rate": 5.803944655231484e-06, "loss": 0.29196624755859374, "step": 26765 }, { "epoch": 0.23147227434263432, "grad_norm": 2.1375113332306444, "learning_rate": 5.80387220659371e-06, "loss": 0.083074951171875, "step": 26770 }, { "epoch": 0.23151550786417757, "grad_norm": 16.579152248329166, "learning_rate": 5.8037997450247245e-06, "loss": 0.1514373779296875, "step": 26775 }, { "epoch": 0.23155874138572083, "grad_norm": 14.742762699338446, "learning_rate": 5.803727270524863e-06, "loss": 0.09342212677001953, "step": 26780 }, { "epoch": 0.23160197490726409, "grad_norm": 2.6246879509777723, "learning_rate": 5.80365478309446e-06, "loss": 0.176910400390625, "step": 26785 }, { "epoch": 0.23164520842880737, "grad_norm": 0.17888032740258913, "learning_rate": 5.803582282733848e-06, "loss": 0.14323883056640624, "step": 26790 }, { "epoch": 0.23168844195035063, "grad_norm": 1.4375934548661817, "learning_rate": 5.803509769443361e-06, "loss": 0.04434814453125, "step": 26795 }, { "epoch": 0.23173167547189388, "grad_norm": 3.953786283931232, "learning_rate": 5.803437243223336e-06, "loss": 0.12030029296875, "step": 26800 }, { "epoch": 0.23177490899343714, "grad_norm": 43.48176813082643, "learning_rate": 5.803364704074106e-06, "loss": 0.45283203125, "step": 26805 }, { "epoch": 0.23181814251498042, "grad_norm": 15.137482198179628, "learning_rate": 5.803292151996006e-06, "loss": 0.19140625, "step": 26810 }, { "epoch": 0.23186137603652368, "grad_norm": 7.965728795954377, "learning_rate": 5.8032195869893695e-06, "loss": 0.08154373168945313, "step": 26815 }, { "epoch": 0.23190460955806694, "grad_norm": 0.14245797088043272, "learning_rate": 5.803147009054533e-06, "loss": 0.3901054382324219, "step": 26820 }, { "epoch": 0.2319478430796102, "grad_norm": 0.8474690736398697, "learning_rate": 5.803074418191829e-06, "loss": 0.13083343505859374, "step": 26825 }, { "epoch": 0.23199107660115348, "grad_norm": 0.8721878032379633, "learning_rate": 5.803001814401594e-06, "loss": 0.2969198226928711, "step": 26830 }, { "epoch": 0.23203431012269674, "grad_norm": 6.984558245891264, "learning_rate": 5.802929197684162e-06, "loss": 0.1110565185546875, "step": 26835 }, { "epoch": 0.23207754364424, "grad_norm": 2.053159401057871, "learning_rate": 5.802856568039869e-06, "loss": 0.339727783203125, "step": 26840 }, { "epoch": 0.23212077716578325, "grad_norm": 15.8487647596724, "learning_rate": 5.8027839254690485e-06, "loss": 0.35166015625, "step": 26845 }, { "epoch": 0.23216401068732653, "grad_norm": 2.159658499086385, "learning_rate": 5.802711269972037e-06, "loss": 0.3352481842041016, "step": 26850 }, { "epoch": 0.2322072442088698, "grad_norm": 32.93522949838993, "learning_rate": 5.802638601549168e-06, "loss": 0.1185272216796875, "step": 26855 }, { "epoch": 0.23225047773041305, "grad_norm": 14.577479626565324, "learning_rate": 5.8025659202007775e-06, "loss": 0.08087387084960937, "step": 26860 }, { "epoch": 0.2322937112519563, "grad_norm": 17.46857282177834, "learning_rate": 5.8024932259272e-06, "loss": 0.1602569580078125, "step": 26865 }, { "epoch": 0.2323369447734996, "grad_norm": 9.164720465337853, "learning_rate": 5.8024205187287726e-06, "loss": 0.051904296875, "step": 26870 }, { "epoch": 0.23238017829504284, "grad_norm": 29.874141205927998, "learning_rate": 5.802347798605829e-06, "loss": 0.37047348022460935, "step": 26875 }, { "epoch": 0.2324234118165861, "grad_norm": 40.22222694151068, "learning_rate": 5.802275065558705e-06, "loss": 0.4748291015625, "step": 26880 }, { "epoch": 0.23246664533812939, "grad_norm": 1.5502529501225313, "learning_rate": 5.8022023195877356e-06, "loss": 0.07659912109375, "step": 26885 }, { "epoch": 0.23250987885967264, "grad_norm": 37.24568290147914, "learning_rate": 5.802129560693256e-06, "loss": 0.45885009765625, "step": 26890 }, { "epoch": 0.2325531123812159, "grad_norm": 0.11906831440834671, "learning_rate": 5.802056788875604e-06, "loss": 0.2506889343261719, "step": 26895 }, { "epoch": 0.23259634590275916, "grad_norm": 3.666040632250846, "learning_rate": 5.801984004135113e-06, "loss": 0.17156524658203126, "step": 26900 }, { "epoch": 0.23263957942430244, "grad_norm": 21.045144568430466, "learning_rate": 5.80191120647212e-06, "loss": 0.2689544677734375, "step": 26905 }, { "epoch": 0.2326828129458457, "grad_norm": 3.165091837809561, "learning_rate": 5.801838395886959e-06, "loss": 0.088995361328125, "step": 26910 }, { "epoch": 0.23272604646738895, "grad_norm": 25.035814898365537, "learning_rate": 5.801765572379967e-06, "loss": 0.2277801513671875, "step": 26915 }, { "epoch": 0.2327692799889322, "grad_norm": 1.0366344790293252, "learning_rate": 5.801692735951481e-06, "loss": 0.1112091064453125, "step": 26920 }, { "epoch": 0.2328125135104755, "grad_norm": 0.2235416567446518, "learning_rate": 5.801619886601835e-06, "loss": 0.13759765625, "step": 26925 }, { "epoch": 0.23285574703201875, "grad_norm": 20.388690431560885, "learning_rate": 5.801547024331365e-06, "loss": 0.2793704986572266, "step": 26930 }, { "epoch": 0.232898980553562, "grad_norm": 0.1735965549934319, "learning_rate": 5.801474149140409e-06, "loss": 0.10464630126953126, "step": 26935 }, { "epoch": 0.23294221407510526, "grad_norm": 14.606830898414689, "learning_rate": 5.8014012610293e-06, "loss": 0.10087432861328124, "step": 26940 }, { "epoch": 0.23298544759664855, "grad_norm": 9.730623123752661, "learning_rate": 5.801328359998377e-06, "loss": 0.09963607788085938, "step": 26945 }, { "epoch": 0.2330286811181918, "grad_norm": 0.345010139038883, "learning_rate": 5.801255446047975e-06, "loss": 0.09613494873046875, "step": 26950 }, { "epoch": 0.23307191463973506, "grad_norm": 5.245978987337721, "learning_rate": 5.80118251917843e-06, "loss": 0.06133842468261719, "step": 26955 }, { "epoch": 0.23311514816127832, "grad_norm": 14.688187340461733, "learning_rate": 5.80110957939008e-06, "loss": 0.21287841796875, "step": 26960 }, { "epoch": 0.2331583816828216, "grad_norm": 8.567826631759361, "learning_rate": 5.801036626683259e-06, "loss": 0.207470703125, "step": 26965 }, { "epoch": 0.23320161520436486, "grad_norm": 9.834762050904587, "learning_rate": 5.800963661058305e-06, "loss": 0.136480712890625, "step": 26970 }, { "epoch": 0.23324484872590812, "grad_norm": 3.4964435614976956, "learning_rate": 5.800890682515553e-06, "loss": 0.17018280029296876, "step": 26975 }, { "epoch": 0.23328808224745137, "grad_norm": 2.863159594429064, "learning_rate": 5.800817691055342e-06, "loss": 0.54620361328125, "step": 26980 }, { "epoch": 0.23333131576899466, "grad_norm": 2.2862243974764147, "learning_rate": 5.800744686678007e-06, "loss": 0.143359375, "step": 26985 }, { "epoch": 0.2333745492905379, "grad_norm": 3.0817856893970177, "learning_rate": 5.8006716693838845e-06, "loss": 0.1521728515625, "step": 26990 }, { "epoch": 0.23341778281208117, "grad_norm": 46.70984449295377, "learning_rate": 5.800598639173312e-06, "loss": 0.36827850341796875, "step": 26995 }, { "epoch": 0.23346101633362443, "grad_norm": 15.108178345771003, "learning_rate": 5.8005255960466265e-06, "loss": 0.20019989013671874, "step": 27000 }, { "epoch": 0.2335042498551677, "grad_norm": 0.5601701869521354, "learning_rate": 5.800452540004164e-06, "loss": 0.05831451416015625, "step": 27005 }, { "epoch": 0.23354748337671097, "grad_norm": 12.430542966412908, "learning_rate": 5.800379471046262e-06, "loss": 0.1432220458984375, "step": 27010 }, { "epoch": 0.23359071689825422, "grad_norm": 31.23311781501841, "learning_rate": 5.800306389173258e-06, "loss": 0.1742034912109375, "step": 27015 }, { "epoch": 0.23363395041979748, "grad_norm": 38.48386602611384, "learning_rate": 5.800233294385487e-06, "loss": 0.40581512451171875, "step": 27020 }, { "epoch": 0.23367718394134077, "grad_norm": 1.5347705866851735, "learning_rate": 5.800160186683288e-06, "loss": 0.23231201171875, "step": 27025 }, { "epoch": 0.23372041746288402, "grad_norm": 26.80313834485471, "learning_rate": 5.800087066066998e-06, "loss": 0.2446044921875, "step": 27030 }, { "epoch": 0.23376365098442728, "grad_norm": 3.2462910730003025, "learning_rate": 5.800013932536953e-06, "loss": 0.1378814697265625, "step": 27035 }, { "epoch": 0.23380688450597054, "grad_norm": 7.698096522179099, "learning_rate": 5.799940786093492e-06, "loss": 0.19968719482421876, "step": 27040 }, { "epoch": 0.23385011802751382, "grad_norm": 7.115103273777437, "learning_rate": 5.799867626736951e-06, "loss": 0.19820556640625, "step": 27045 }, { "epoch": 0.23389335154905708, "grad_norm": 1.506125134535628, "learning_rate": 5.799794454467668e-06, "loss": 0.06421890258789062, "step": 27050 }, { "epoch": 0.23393658507060033, "grad_norm": 6.943569912387333, "learning_rate": 5.799721269285981e-06, "loss": 0.27356948852539065, "step": 27055 }, { "epoch": 0.23397981859214362, "grad_norm": 27.121574021360882, "learning_rate": 5.799648071192226e-06, "loss": 0.40732269287109374, "step": 27060 }, { "epoch": 0.23402305211368687, "grad_norm": 1.3978882344938004, "learning_rate": 5.799574860186742e-06, "loss": 0.1954986572265625, "step": 27065 }, { "epoch": 0.23406628563523013, "grad_norm": 1.8537855398776213, "learning_rate": 5.799501636269866e-06, "loss": 0.0878265380859375, "step": 27070 }, { "epoch": 0.2341095191567734, "grad_norm": 0.9227655806305571, "learning_rate": 5.799428399441936e-06, "loss": 0.3048248291015625, "step": 27075 }, { "epoch": 0.23415275267831667, "grad_norm": 0.8766077040085006, "learning_rate": 5.799355149703289e-06, "loss": 0.12191619873046874, "step": 27080 }, { "epoch": 0.23419598619985993, "grad_norm": 33.51539208468516, "learning_rate": 5.799281887054264e-06, "loss": 0.18135337829589843, "step": 27085 }, { "epoch": 0.23423921972140319, "grad_norm": 0.8733842893265069, "learning_rate": 5.799208611495198e-06, "loss": 0.2880523681640625, "step": 27090 }, { "epoch": 0.23428245324294644, "grad_norm": 42.68099319140069, "learning_rate": 5.7991353230264296e-06, "loss": 0.58939208984375, "step": 27095 }, { "epoch": 0.23432568676448973, "grad_norm": 13.231678278492488, "learning_rate": 5.7990620216482964e-06, "loss": 0.15171661376953124, "step": 27100 }, { "epoch": 0.23436892028603298, "grad_norm": 7.496386755828232, "learning_rate": 5.7989887073611356e-06, "loss": 0.074493408203125, "step": 27105 }, { "epoch": 0.23441215380757624, "grad_norm": 5.605083416465063, "learning_rate": 5.798915380165287e-06, "loss": 0.5461761474609375, "step": 27110 }, { "epoch": 0.2344553873291195, "grad_norm": 1.0857148291847527, "learning_rate": 5.798842040061088e-06, "loss": 0.20149459838867187, "step": 27115 }, { "epoch": 0.23449862085066278, "grad_norm": 3.583094496552174, "learning_rate": 5.798768687048877e-06, "loss": 0.04519729614257813, "step": 27120 }, { "epoch": 0.23454185437220604, "grad_norm": 1.7700232944049414, "learning_rate": 5.798695321128991e-06, "loss": 0.19761810302734376, "step": 27125 }, { "epoch": 0.2345850878937493, "grad_norm": 9.200266237849048, "learning_rate": 5.798621942301771e-06, "loss": 0.069598388671875, "step": 27130 }, { "epoch": 0.23462832141529255, "grad_norm": 49.40459737362597, "learning_rate": 5.7985485505675525e-06, "loss": 0.3207275390625, "step": 27135 }, { "epoch": 0.23467155493683584, "grad_norm": 1.0590757324778735, "learning_rate": 5.7984751459266765e-06, "loss": 0.06735916137695312, "step": 27140 }, { "epoch": 0.2347147884583791, "grad_norm": 0.4667230765848944, "learning_rate": 5.798401728379479e-06, "loss": 0.1169189453125, "step": 27145 }, { "epoch": 0.23475802197992235, "grad_norm": 39.70126551829187, "learning_rate": 5.798328297926301e-06, "loss": 0.38967132568359375, "step": 27150 }, { "epoch": 0.2348012555014656, "grad_norm": 13.53411084103601, "learning_rate": 5.79825485456748e-06, "loss": 0.16085433959960938, "step": 27155 }, { "epoch": 0.2348444890230089, "grad_norm": 41.98373137263648, "learning_rate": 5.798181398303355e-06, "loss": 0.335723876953125, "step": 27160 }, { "epoch": 0.23488772254455215, "grad_norm": 3.3658943318540704, "learning_rate": 5.798107929134265e-06, "loss": 0.40557403564453126, "step": 27165 }, { "epoch": 0.2349309560660954, "grad_norm": 8.271813737890074, "learning_rate": 5.798034447060548e-06, "loss": 0.050537109375, "step": 27170 }, { "epoch": 0.23497418958763866, "grad_norm": 2.3705294002318418, "learning_rate": 5.7979609520825425e-06, "loss": 0.060181045532226564, "step": 27175 }, { "epoch": 0.23501742310918194, "grad_norm": 49.08617887816338, "learning_rate": 5.79788744420059e-06, "loss": 0.20013427734375, "step": 27180 }, { "epoch": 0.2350606566307252, "grad_norm": 17.59170976962075, "learning_rate": 5.797813923415027e-06, "loss": 0.39136962890625, "step": 27185 }, { "epoch": 0.23510389015226846, "grad_norm": 8.556475416234807, "learning_rate": 5.797740389726193e-06, "loss": 0.150323486328125, "step": 27190 }, { "epoch": 0.23514712367381171, "grad_norm": 10.307112087177016, "learning_rate": 5.797666843134429e-06, "loss": 0.139532470703125, "step": 27195 }, { "epoch": 0.235190357195355, "grad_norm": 6.204055565486545, "learning_rate": 5.797593283640072e-06, "loss": 0.0810089111328125, "step": 27200 }, { "epoch": 0.23523359071689826, "grad_norm": 51.33441436393407, "learning_rate": 5.797519711243461e-06, "loss": 0.1138916015625, "step": 27205 }, { "epoch": 0.2352768242384415, "grad_norm": 18.413618157579307, "learning_rate": 5.7974461259449365e-06, "loss": 0.18305206298828125, "step": 27210 }, { "epoch": 0.23532005775998477, "grad_norm": 1.4865028658115056, "learning_rate": 5.797372527744838e-06, "loss": 0.07625617980957031, "step": 27215 }, { "epoch": 0.23536329128152805, "grad_norm": 22.257666840392222, "learning_rate": 5.797298916643506e-06, "loss": 0.12912979125976562, "step": 27220 }, { "epoch": 0.2354065248030713, "grad_norm": 6.34394772115398, "learning_rate": 5.797225292641277e-06, "loss": 0.0852203369140625, "step": 27225 }, { "epoch": 0.23544975832461457, "grad_norm": 23.999589968075732, "learning_rate": 5.797151655738492e-06, "loss": 0.50579833984375, "step": 27230 }, { "epoch": 0.23549299184615782, "grad_norm": 6.917630969880278, "learning_rate": 5.797078005935491e-06, "loss": 0.04025344848632813, "step": 27235 }, { "epoch": 0.2355362253677011, "grad_norm": 12.679218800070938, "learning_rate": 5.797004343232614e-06, "loss": 0.09723663330078125, "step": 27240 }, { "epoch": 0.23557945888924436, "grad_norm": 4.449631959060471, "learning_rate": 5.796930667630199e-06, "loss": 0.377667236328125, "step": 27245 }, { "epoch": 0.23562269241078762, "grad_norm": 16.743762663418106, "learning_rate": 5.796856979128588e-06, "loss": 0.097308349609375, "step": 27250 }, { "epoch": 0.2356659259323309, "grad_norm": 2.442512383622368, "learning_rate": 5.796783277728119e-06, "loss": 0.12164878845214844, "step": 27255 }, { "epoch": 0.23570915945387416, "grad_norm": 3.0430584665418854, "learning_rate": 5.796709563429133e-06, "loss": 0.1955474853515625, "step": 27260 }, { "epoch": 0.23575239297541742, "grad_norm": 2.795824177036621, "learning_rate": 5.79663583623197e-06, "loss": 0.04157562255859375, "step": 27265 }, { "epoch": 0.23579562649696068, "grad_norm": 2.0678099711175344, "learning_rate": 5.796562096136969e-06, "loss": 0.1579833984375, "step": 27270 }, { "epoch": 0.23583886001850396, "grad_norm": 15.09105499852598, "learning_rate": 5.79648834314447e-06, "loss": 0.08837661743164063, "step": 27275 }, { "epoch": 0.23588209354004722, "grad_norm": 3.9559627786875975, "learning_rate": 5.796414577254815e-06, "loss": 0.3535888671875, "step": 27280 }, { "epoch": 0.23592532706159047, "grad_norm": 16.076496280743516, "learning_rate": 5.796340798468343e-06, "loss": 0.20304718017578124, "step": 27285 }, { "epoch": 0.23596856058313373, "grad_norm": 2.2297192644809822, "learning_rate": 5.796267006785395e-06, "loss": 0.1465158462524414, "step": 27290 }, { "epoch": 0.23601179410467701, "grad_norm": 36.306157380730056, "learning_rate": 5.7961932022063095e-06, "loss": 0.21427154541015625, "step": 27295 }, { "epoch": 0.23605502762622027, "grad_norm": 44.37327522070572, "learning_rate": 5.796119384731428e-06, "loss": 0.413470458984375, "step": 27300 }, { "epoch": 0.23609826114776353, "grad_norm": 1.4787495629733478, "learning_rate": 5.796045554361091e-06, "loss": 0.13981781005859376, "step": 27305 }, { "epoch": 0.23614149466930678, "grad_norm": 2.94151314726803, "learning_rate": 5.79597171109564e-06, "loss": 0.160845947265625, "step": 27310 }, { "epoch": 0.23618472819085007, "grad_norm": 16.803262581310165, "learning_rate": 5.795897854935413e-06, "loss": 0.35254974365234376, "step": 27315 }, { "epoch": 0.23622796171239333, "grad_norm": 6.19081155979669, "learning_rate": 5.795823985880754e-06, "loss": 0.13428573608398436, "step": 27320 }, { "epoch": 0.23627119523393658, "grad_norm": 32.799418828351534, "learning_rate": 5.795750103932e-06, "loss": 0.260528564453125, "step": 27325 }, { "epoch": 0.23631442875547984, "grad_norm": 5.770328750089305, "learning_rate": 5.795676209089494e-06, "loss": 0.2587165832519531, "step": 27330 }, { "epoch": 0.23635766227702312, "grad_norm": 38.58836581716408, "learning_rate": 5.795602301353577e-06, "loss": 0.63912353515625, "step": 27335 }, { "epoch": 0.23640089579856638, "grad_norm": 20.130829243047287, "learning_rate": 5.795528380724588e-06, "loss": 0.2598388671875, "step": 27340 }, { "epoch": 0.23644412932010964, "grad_norm": 9.51112139698787, "learning_rate": 5.795454447202871e-06, "loss": 0.22078781127929686, "step": 27345 }, { "epoch": 0.2364873628416529, "grad_norm": 0.8399179364037195, "learning_rate": 5.7953805007887635e-06, "loss": 0.77889404296875, "step": 27350 }, { "epoch": 0.23653059636319618, "grad_norm": 33.61345058485683, "learning_rate": 5.79530654148261e-06, "loss": 0.2940681457519531, "step": 27355 }, { "epoch": 0.23657382988473943, "grad_norm": 5.576345827035105, "learning_rate": 5.795232569284748e-06, "loss": 0.0970550537109375, "step": 27360 }, { "epoch": 0.2366170634062827, "grad_norm": 28.494758648825318, "learning_rate": 5.795158584195521e-06, "loss": 0.16560440063476561, "step": 27365 }, { "epoch": 0.23666029692782595, "grad_norm": 12.818640593861627, "learning_rate": 5.79508458621527e-06, "loss": 0.47562446594238283, "step": 27370 }, { "epoch": 0.23670353044936923, "grad_norm": 28.354229582470275, "learning_rate": 5.7950105753443345e-06, "loss": 0.225518798828125, "step": 27375 }, { "epoch": 0.2367467639709125, "grad_norm": 1.389039544281789, "learning_rate": 5.794936551583058e-06, "loss": 0.11701278686523438, "step": 27380 }, { "epoch": 0.23678999749245574, "grad_norm": 4.219323550519198, "learning_rate": 5.794862514931781e-06, "loss": 0.1369476318359375, "step": 27385 }, { "epoch": 0.236833231013999, "grad_norm": 19.756943715581464, "learning_rate": 5.794788465390845e-06, "loss": 0.2743316650390625, "step": 27390 }, { "epoch": 0.23687646453554229, "grad_norm": 11.985142030914176, "learning_rate": 5.794714402960591e-06, "loss": 0.072503662109375, "step": 27395 }, { "epoch": 0.23691969805708554, "grad_norm": 11.983495432788633, "learning_rate": 5.794640327641362e-06, "loss": 0.14893646240234376, "step": 27400 }, { "epoch": 0.2369629315786288, "grad_norm": 6.977642524612531, "learning_rate": 5.794566239433499e-06, "loss": 0.20125274658203124, "step": 27405 }, { "epoch": 0.23700616510017206, "grad_norm": 18.785858357763313, "learning_rate": 5.794492138337343e-06, "loss": 0.1669586181640625, "step": 27410 }, { "epoch": 0.23704939862171534, "grad_norm": 3.1633970839619217, "learning_rate": 5.794418024353236e-06, "loss": 0.08565444946289062, "step": 27415 }, { "epoch": 0.2370926321432586, "grad_norm": 0.8391013155719648, "learning_rate": 5.79434389748152e-06, "loss": 0.22702789306640625, "step": 27420 }, { "epoch": 0.23713586566480185, "grad_norm": 11.126230420414313, "learning_rate": 5.794269757722537e-06, "loss": 0.19439697265625, "step": 27425 }, { "epoch": 0.23717909918634514, "grad_norm": 5.511553353490478, "learning_rate": 5.794195605076629e-06, "loss": 0.1157958984375, "step": 27430 }, { "epoch": 0.2372223327078884, "grad_norm": 11.957804955095037, "learning_rate": 5.794121439544138e-06, "loss": 0.6569313049316406, "step": 27435 }, { "epoch": 0.23726556622943165, "grad_norm": 36.38836277096856, "learning_rate": 5.794047261125405e-06, "loss": 0.15310125350952147, "step": 27440 }, { "epoch": 0.2373087997509749, "grad_norm": 2.2750022254341116, "learning_rate": 5.793973069820774e-06, "loss": 0.3186065673828125, "step": 27445 }, { "epoch": 0.2373520332725182, "grad_norm": 27.11903840266083, "learning_rate": 5.793898865630585e-06, "loss": 0.4097412109375, "step": 27450 }, { "epoch": 0.23739526679406145, "grad_norm": 3.3797807059601017, "learning_rate": 5.793824648555181e-06, "loss": 0.087298583984375, "step": 27455 }, { "epoch": 0.2374385003156047, "grad_norm": 7.821546236213164, "learning_rate": 5.793750418594905e-06, "loss": 0.1476726531982422, "step": 27460 }, { "epoch": 0.23748173383714796, "grad_norm": 11.420851330825371, "learning_rate": 5.7936761757501e-06, "loss": 0.4818023681640625, "step": 27465 }, { "epoch": 0.23752496735869125, "grad_norm": 4.879794015416048, "learning_rate": 5.7936019200211064e-06, "loss": 0.38110885620117185, "step": 27470 }, { "epoch": 0.2375682008802345, "grad_norm": 20.755594684666278, "learning_rate": 5.793527651408268e-06, "loss": 0.08828659057617187, "step": 27475 }, { "epoch": 0.23761143440177776, "grad_norm": 2.0318154711010656, "learning_rate": 5.793453369911926e-06, "loss": 0.156201171875, "step": 27480 }, { "epoch": 0.23765466792332102, "grad_norm": 25.041561007458085, "learning_rate": 5.7933790755324245e-06, "loss": 0.1489410400390625, "step": 27485 }, { "epoch": 0.2376979014448643, "grad_norm": 15.274278257039777, "learning_rate": 5.793304768270106e-06, "loss": 0.33431396484375, "step": 27490 }, { "epoch": 0.23774113496640756, "grad_norm": 0.12572069253895085, "learning_rate": 5.793230448125312e-06, "loss": 0.321661376953125, "step": 27495 }, { "epoch": 0.23778436848795081, "grad_norm": 1.3695849692796236, "learning_rate": 5.793156115098386e-06, "loss": 0.42546768188476564, "step": 27500 }, { "epoch": 0.23782760200949407, "grad_norm": 4.741925692028939, "learning_rate": 5.793081769189672e-06, "loss": 0.066387939453125, "step": 27505 }, { "epoch": 0.23787083553103736, "grad_norm": 13.114310330889534, "learning_rate": 5.7930074103995105e-06, "loss": 0.318267822265625, "step": 27510 }, { "epoch": 0.2379140690525806, "grad_norm": 14.559393655615613, "learning_rate": 5.792933038728246e-06, "loss": 0.081524658203125, "step": 27515 }, { "epoch": 0.23795730257412387, "grad_norm": 0.47190414084102444, "learning_rate": 5.79285865417622e-06, "loss": 0.14945831298828124, "step": 27520 }, { "epoch": 0.23800053609566713, "grad_norm": 43.928524894829025, "learning_rate": 5.792784256743777e-06, "loss": 0.257464599609375, "step": 27525 }, { "epoch": 0.2380437696172104, "grad_norm": 2.1493497669597867, "learning_rate": 5.792709846431262e-06, "loss": 0.187872314453125, "step": 27530 }, { "epoch": 0.23808700313875367, "grad_norm": 6.082328295365382, "learning_rate": 5.792635423239014e-06, "loss": 0.21659698486328124, "step": 27535 }, { "epoch": 0.23813023666029692, "grad_norm": 29.20225908789484, "learning_rate": 5.792560987167378e-06, "loss": 0.5309326171875, "step": 27540 }, { "epoch": 0.23817347018184018, "grad_norm": 45.29519526067391, "learning_rate": 5.792486538216698e-06, "loss": 0.1304841995239258, "step": 27545 }, { "epoch": 0.23821670370338346, "grad_norm": 9.297146767508389, "learning_rate": 5.792412076387317e-06, "loss": 0.3725555419921875, "step": 27550 }, { "epoch": 0.23825993722492672, "grad_norm": 11.077047843922756, "learning_rate": 5.792337601679579e-06, "loss": 0.1405517578125, "step": 27555 }, { "epoch": 0.23830317074646998, "grad_norm": 14.69217997220465, "learning_rate": 5.792263114093825e-06, "loss": 0.3445556640625, "step": 27560 }, { "epoch": 0.23834640426801323, "grad_norm": 0.9932224282025441, "learning_rate": 5.792188613630401e-06, "loss": 0.071484375, "step": 27565 }, { "epoch": 0.23838963778955652, "grad_norm": 4.404548844918125, "learning_rate": 5.7921141002896504e-06, "loss": 0.10904693603515625, "step": 27570 }, { "epoch": 0.23843287131109978, "grad_norm": 0.4527893343705313, "learning_rate": 5.792039574071916e-06, "loss": 0.21516075134277343, "step": 27575 }, { "epoch": 0.23847610483264303, "grad_norm": 4.5306223180522, "learning_rate": 5.791965034977542e-06, "loss": 0.07050704956054688, "step": 27580 }, { "epoch": 0.2385193383541863, "grad_norm": 5.661839844569883, "learning_rate": 5.791890483006871e-06, "loss": 0.138397216796875, "step": 27585 }, { "epoch": 0.23856257187572957, "grad_norm": 61.540036127199514, "learning_rate": 5.791815918160248e-06, "loss": 0.5327926635742187, "step": 27590 }, { "epoch": 0.23860580539727283, "grad_norm": 16.54081711526809, "learning_rate": 5.791741340438017e-06, "loss": 0.272601318359375, "step": 27595 }, { "epoch": 0.2386490389188161, "grad_norm": 10.62922673726957, "learning_rate": 5.791666749840522e-06, "loss": 0.29172210693359374, "step": 27600 }, { "epoch": 0.23869227244035934, "grad_norm": 26.68320220482701, "learning_rate": 5.791592146368106e-06, "loss": 0.22366943359375, "step": 27605 }, { "epoch": 0.23873550596190263, "grad_norm": 4.4273700273324, "learning_rate": 5.791517530021114e-06, "loss": 0.11707763671875, "step": 27610 }, { "epoch": 0.23877873948344588, "grad_norm": 9.277961484326823, "learning_rate": 5.79144290079989e-06, "loss": 0.09337272644042968, "step": 27615 }, { "epoch": 0.23882197300498914, "grad_norm": 17.963896450853778, "learning_rate": 5.791368258704778e-06, "loss": 0.27783203125, "step": 27620 }, { "epoch": 0.23886520652653243, "grad_norm": 12.162281313746183, "learning_rate": 5.791293603736122e-06, "loss": 0.1291107177734375, "step": 27625 }, { "epoch": 0.23890844004807568, "grad_norm": 5.205140295811351, "learning_rate": 5.791218935894266e-06, "loss": 0.100018310546875, "step": 27630 }, { "epoch": 0.23895167356961894, "grad_norm": 28.996550598337844, "learning_rate": 5.791144255179555e-06, "loss": 0.11572532653808594, "step": 27635 }, { "epoch": 0.2389949070911622, "grad_norm": 25.128610217764123, "learning_rate": 5.791069561592335e-06, "loss": 0.2008270263671875, "step": 27640 }, { "epoch": 0.23903814061270548, "grad_norm": 2.593985805862072, "learning_rate": 5.790994855132947e-06, "loss": 0.0434783935546875, "step": 27645 }, { "epoch": 0.23908137413424874, "grad_norm": 26.783035676218535, "learning_rate": 5.790920135801738e-06, "loss": 0.2851959228515625, "step": 27650 }, { "epoch": 0.239124607655792, "grad_norm": 1.307137479535294, "learning_rate": 5.7908454035990515e-06, "loss": 0.17623291015625, "step": 27655 }, { "epoch": 0.23916784117733525, "grad_norm": 7.484001733376172, "learning_rate": 5.790770658525233e-06, "loss": 0.06389007568359376, "step": 27660 }, { "epoch": 0.23921107469887853, "grad_norm": 13.426167014899393, "learning_rate": 5.790695900580627e-06, "loss": 0.13908729553222657, "step": 27665 }, { "epoch": 0.2392543082204218, "grad_norm": 7.101301425251592, "learning_rate": 5.790621129765578e-06, "loss": 0.134661865234375, "step": 27670 }, { "epoch": 0.23929754174196505, "grad_norm": 36.67081788931647, "learning_rate": 5.790546346080431e-06, "loss": 0.221575927734375, "step": 27675 }, { "epoch": 0.2393407752635083, "grad_norm": 44.370328911741495, "learning_rate": 5.7904715495255305e-06, "loss": 0.5073715209960937, "step": 27680 }, { "epoch": 0.2393840087850516, "grad_norm": 18.07857084191915, "learning_rate": 5.790396740101221e-06, "loss": 0.1486175537109375, "step": 27685 }, { "epoch": 0.23942724230659485, "grad_norm": 67.97677411948564, "learning_rate": 5.79032191780785e-06, "loss": 0.198681640625, "step": 27690 }, { "epoch": 0.2394704758281381, "grad_norm": 2.5370166659274025, "learning_rate": 5.79024708264576e-06, "loss": 0.146661376953125, "step": 27695 }, { "epoch": 0.23951370934968136, "grad_norm": 10.712435911813618, "learning_rate": 5.790172234615297e-06, "loss": 0.1270904541015625, "step": 27700 }, { "epoch": 0.23955694287122464, "grad_norm": 1.0695429838776715, "learning_rate": 5.790097373716806e-06, "loss": 0.19345550537109374, "step": 27705 }, { "epoch": 0.2396001763927679, "grad_norm": 5.224096498542562, "learning_rate": 5.7900224999506336e-06, "loss": 0.39610748291015624, "step": 27710 }, { "epoch": 0.23964340991431116, "grad_norm": 42.6851148615918, "learning_rate": 5.789947613317123e-06, "loss": 0.200054931640625, "step": 27715 }, { "epoch": 0.2396866434358544, "grad_norm": 0.5559092606574686, "learning_rate": 5.789872713816621e-06, "loss": 0.050071334838867186, "step": 27720 }, { "epoch": 0.2397298769573977, "grad_norm": 12.435411033231565, "learning_rate": 5.789797801449472e-06, "loss": 0.14142532348632814, "step": 27725 }, { "epoch": 0.23977311047894095, "grad_norm": 0.42826515118354364, "learning_rate": 5.789722876216022e-06, "loss": 0.101531982421875, "step": 27730 }, { "epoch": 0.2398163440004842, "grad_norm": 9.242273007046276, "learning_rate": 5.789647938116617e-06, "loss": 0.26283950805664064, "step": 27735 }, { "epoch": 0.23985957752202747, "grad_norm": 6.175479832811218, "learning_rate": 5.789572987151603e-06, "loss": 0.08931884765625, "step": 27740 }, { "epoch": 0.23990281104357075, "grad_norm": 6.049073696134327, "learning_rate": 5.789498023321323e-06, "loss": 0.08547134399414062, "step": 27745 }, { "epoch": 0.239946044565114, "grad_norm": 0.7075348411778318, "learning_rate": 5.789423046626126e-06, "loss": 0.11838035583496094, "step": 27750 }, { "epoch": 0.23998927808665727, "grad_norm": 0.5125917417129183, "learning_rate": 5.789348057066356e-06, "loss": 0.2038848876953125, "step": 27755 }, { "epoch": 0.24003251160820052, "grad_norm": 0.7847258187189897, "learning_rate": 5.789273054642359e-06, "loss": 0.118670654296875, "step": 27760 }, { "epoch": 0.2400757451297438, "grad_norm": 6.395485751725717, "learning_rate": 5.789198039354481e-06, "loss": 0.1298248291015625, "step": 27765 }, { "epoch": 0.24011897865128706, "grad_norm": 6.4791776467407995, "learning_rate": 5.7891230112030684e-06, "loss": 0.07385787963867188, "step": 27770 }, { "epoch": 0.24016221217283032, "grad_norm": 51.68441964892459, "learning_rate": 5.789047970188467e-06, "loss": 0.196728515625, "step": 27775 }, { "epoch": 0.24020544569437358, "grad_norm": 7.219247701670627, "learning_rate": 5.7889729163110224e-06, "loss": 0.13472681045532225, "step": 27780 }, { "epoch": 0.24024867921591686, "grad_norm": 2.5933108155787967, "learning_rate": 5.788897849571082e-06, "loss": 0.09821548461914062, "step": 27785 }, { "epoch": 0.24029191273746012, "grad_norm": 6.521820418658088, "learning_rate": 5.788822769968991e-06, "loss": 0.4197113037109375, "step": 27790 }, { "epoch": 0.24033514625900337, "grad_norm": 24.103092015171637, "learning_rate": 5.788747677505094e-06, "loss": 0.1905181884765625, "step": 27795 }, { "epoch": 0.24037837978054666, "grad_norm": 0.37771850643446475, "learning_rate": 5.7886725721797414e-06, "loss": 0.08507347106933594, "step": 27800 }, { "epoch": 0.24042161330208991, "grad_norm": 33.84770552928257, "learning_rate": 5.7885974539932765e-06, "loss": 0.252581787109375, "step": 27805 }, { "epoch": 0.24046484682363317, "grad_norm": 2.9327853378477093, "learning_rate": 5.788522322946046e-06, "loss": 0.0443878173828125, "step": 27810 }, { "epoch": 0.24050808034517643, "grad_norm": 8.389591075167365, "learning_rate": 5.788447179038398e-06, "loss": 0.10768070220947265, "step": 27815 }, { "epoch": 0.2405513138667197, "grad_norm": 6.484242946876001, "learning_rate": 5.788372022270677e-06, "loss": 0.2850456237792969, "step": 27820 }, { "epoch": 0.24059454738826297, "grad_norm": 30.19589946733064, "learning_rate": 5.788296852643232e-06, "loss": 0.2813323974609375, "step": 27825 }, { "epoch": 0.24063778090980623, "grad_norm": 76.41448644385858, "learning_rate": 5.788221670156407e-06, "loss": 0.24568328857421876, "step": 27830 }, { "epoch": 0.24068101443134948, "grad_norm": 0.7126909295288918, "learning_rate": 5.78814647481055e-06, "loss": 0.11068954467773437, "step": 27835 }, { "epoch": 0.24072424795289277, "grad_norm": 2.9719922103870067, "learning_rate": 5.788071266606008e-06, "loss": 0.0215606689453125, "step": 27840 }, { "epoch": 0.24076748147443602, "grad_norm": 9.397286513866852, "learning_rate": 5.787996045543128e-06, "loss": 0.3946525573730469, "step": 27845 }, { "epoch": 0.24081071499597928, "grad_norm": 32.26057564343752, "learning_rate": 5.787920811622256e-06, "loss": 0.20422897338867188, "step": 27850 }, { "epoch": 0.24085394851752254, "grad_norm": 10.056129205296978, "learning_rate": 5.787845564843741e-06, "loss": 0.11298828125, "step": 27855 }, { "epoch": 0.24089718203906582, "grad_norm": 9.66989543935112, "learning_rate": 5.787770305207928e-06, "loss": 0.21044921875, "step": 27860 }, { "epoch": 0.24094041556060908, "grad_norm": 39.76812211994726, "learning_rate": 5.787695032715164e-06, "loss": 0.33517303466796877, "step": 27865 }, { "epoch": 0.24098364908215233, "grad_norm": 9.638926377163953, "learning_rate": 5.787619747365797e-06, "loss": 0.10783424377441406, "step": 27870 }, { "epoch": 0.2410268826036956, "grad_norm": 6.751347730918564, "learning_rate": 5.787544449160174e-06, "loss": 0.3219429016113281, "step": 27875 }, { "epoch": 0.24107011612523888, "grad_norm": 1.8043601408137528, "learning_rate": 5.7874691380986435e-06, "loss": 0.07328147888183593, "step": 27880 }, { "epoch": 0.24111334964678213, "grad_norm": 15.156194422742999, "learning_rate": 5.78739381418155e-06, "loss": 0.08429012298583985, "step": 27885 }, { "epoch": 0.2411565831683254, "grad_norm": 4.211901547873107, "learning_rate": 5.787318477409243e-06, "loss": 0.22133026123046876, "step": 27890 }, { "epoch": 0.24119981668986865, "grad_norm": 13.319141300627873, "learning_rate": 5.7872431277820694e-06, "loss": 0.1714996337890625, "step": 27895 }, { "epoch": 0.24124305021141193, "grad_norm": 6.569955101000726, "learning_rate": 5.7871677653003775e-06, "loss": 0.24916610717773438, "step": 27900 }, { "epoch": 0.2412862837329552, "grad_norm": 5.636664623829065, "learning_rate": 5.787092389964513e-06, "loss": 0.1895233154296875, "step": 27905 }, { "epoch": 0.24132951725449844, "grad_norm": 0.04023520737530255, "learning_rate": 5.787017001774826e-06, "loss": 0.21677474975585936, "step": 27910 }, { "epoch": 0.2413727507760417, "grad_norm": 2.3187651377553724, "learning_rate": 5.7869416007316626e-06, "loss": 0.2613193511962891, "step": 27915 }, { "epoch": 0.24141598429758498, "grad_norm": 2.358676006649074, "learning_rate": 5.78686618683537e-06, "loss": 0.09232215881347657, "step": 27920 }, { "epoch": 0.24145921781912824, "grad_norm": 77.1075867915545, "learning_rate": 5.786790760086297e-06, "loss": 0.53814697265625, "step": 27925 }, { "epoch": 0.2415024513406715, "grad_norm": 17.948227801969068, "learning_rate": 5.786715320484792e-06, "loss": 0.165228271484375, "step": 27930 }, { "epoch": 0.24154568486221475, "grad_norm": 2.1317422884282085, "learning_rate": 5.786639868031201e-06, "loss": 0.2148876190185547, "step": 27935 }, { "epoch": 0.24158891838375804, "grad_norm": 2.3500570193290726, "learning_rate": 5.786564402725874e-06, "loss": 0.12383880615234374, "step": 27940 }, { "epoch": 0.2416321519053013, "grad_norm": 10.701379122660827, "learning_rate": 5.7864889245691575e-06, "loss": 0.19169921875, "step": 27945 }, { "epoch": 0.24167538542684455, "grad_norm": 3.8559036653776224, "learning_rate": 5.786413433561402e-06, "loss": 0.28248291015625, "step": 27950 }, { "epoch": 0.2417186189483878, "grad_norm": 8.8970072123379, "learning_rate": 5.7863379297029525e-06, "loss": 0.441455078125, "step": 27955 }, { "epoch": 0.2417618524699311, "grad_norm": 6.328668293977184, "learning_rate": 5.786262412994158e-06, "loss": 0.2012725830078125, "step": 27960 }, { "epoch": 0.24180508599147435, "grad_norm": 6.370118238530931, "learning_rate": 5.786186883435369e-06, "loss": 0.08256759643554687, "step": 27965 }, { "epoch": 0.2418483195130176, "grad_norm": 14.011438260613614, "learning_rate": 5.786111341026931e-06, "loss": 0.11995124816894531, "step": 27970 }, { "epoch": 0.24189155303456086, "grad_norm": 8.723916340094476, "learning_rate": 5.786035785769195e-06, "loss": 0.1389373779296875, "step": 27975 }, { "epoch": 0.24193478655610415, "grad_norm": 6.7870040855249965, "learning_rate": 5.785960217662508e-06, "loss": 0.1033172607421875, "step": 27980 }, { "epoch": 0.2419780200776474, "grad_norm": 1.305531699922596, "learning_rate": 5.785884636707217e-06, "loss": 0.17755355834960937, "step": 27985 }, { "epoch": 0.24202125359919066, "grad_norm": 31.968626225933004, "learning_rate": 5.785809042903673e-06, "loss": 0.22043914794921876, "step": 27990 }, { "epoch": 0.24206448712073395, "grad_norm": 2.316330385632129, "learning_rate": 5.7857334362522245e-06, "loss": 0.17838134765625, "step": 27995 }, { "epoch": 0.2421077206422772, "grad_norm": 5.767614802390153, "learning_rate": 5.78565781675322e-06, "loss": 0.5821533203125, "step": 28000 }, { "epoch": 0.24215095416382046, "grad_norm": 20.78469136655347, "learning_rate": 5.785582184407007e-06, "loss": 0.08027496337890624, "step": 28005 }, { "epoch": 0.24219418768536372, "grad_norm": 13.735244236328946, "learning_rate": 5.785506539213935e-06, "loss": 0.1500885009765625, "step": 28010 }, { "epoch": 0.242237421206907, "grad_norm": 1.624971483156952, "learning_rate": 5.785430881174352e-06, "loss": 0.2360504150390625, "step": 28015 }, { "epoch": 0.24228065472845026, "grad_norm": 1.568066633853273, "learning_rate": 5.785355210288609e-06, "loss": 0.1697784423828125, "step": 28020 }, { "epoch": 0.2423238882499935, "grad_norm": 55.288800543511165, "learning_rate": 5.7852795265570546e-06, "loss": 0.20137176513671876, "step": 28025 }, { "epoch": 0.24236712177153677, "grad_norm": 1.9654016941091363, "learning_rate": 5.785203829980036e-06, "loss": 0.2187408447265625, "step": 28030 }, { "epoch": 0.24241035529308005, "grad_norm": 15.763631032110176, "learning_rate": 5.7851281205579044e-06, "loss": 0.22455253601074218, "step": 28035 }, { "epoch": 0.2424535888146233, "grad_norm": 0.7747701047316692, "learning_rate": 5.785052398291008e-06, "loss": 0.1952840805053711, "step": 28040 }, { "epoch": 0.24249682233616657, "grad_norm": 45.833862652990575, "learning_rate": 5.784976663179694e-06, "loss": 0.589862060546875, "step": 28045 }, { "epoch": 0.24254005585770982, "grad_norm": 4.820171207055633, "learning_rate": 5.784900915224316e-06, "loss": 0.10682373046875, "step": 28050 }, { "epoch": 0.2425832893792531, "grad_norm": 6.721030690719297, "learning_rate": 5.78482515442522e-06, "loss": 0.21986083984375, "step": 28055 }, { "epoch": 0.24262652290079637, "grad_norm": 4.184624464110314, "learning_rate": 5.7847493807827565e-06, "loss": 0.10008087158203124, "step": 28060 }, { "epoch": 0.24266975642233962, "grad_norm": 35.2245858278329, "learning_rate": 5.784673594297275e-06, "loss": 0.394097900390625, "step": 28065 }, { "epoch": 0.24271298994388288, "grad_norm": 12.744627353794096, "learning_rate": 5.784597794969126e-06, "loss": 0.115283203125, "step": 28070 }, { "epoch": 0.24275622346542616, "grad_norm": 0.06950905526147474, "learning_rate": 5.784521982798657e-06, "loss": 0.3044921875, "step": 28075 }, { "epoch": 0.24279945698696942, "grad_norm": 21.483457898611817, "learning_rate": 5.784446157786218e-06, "loss": 0.31693458557128906, "step": 28080 }, { "epoch": 0.24284269050851268, "grad_norm": 6.525436009924464, "learning_rate": 5.78437031993216e-06, "loss": 0.150140380859375, "step": 28085 }, { "epoch": 0.24288592403005593, "grad_norm": 4.824245733788252, "learning_rate": 5.784294469236833e-06, "loss": 0.6782638549804687, "step": 28090 }, { "epoch": 0.24292915755159922, "grad_norm": 23.254978774670445, "learning_rate": 5.784218605700585e-06, "loss": 0.10826339721679687, "step": 28095 }, { "epoch": 0.24297239107314247, "grad_norm": 0.9876341876506995, "learning_rate": 5.784142729323767e-06, "loss": 0.15198974609375, "step": 28100 }, { "epoch": 0.24301562459468573, "grad_norm": 4.9117449028547995, "learning_rate": 5.78406684010673e-06, "loss": 0.129925537109375, "step": 28105 }, { "epoch": 0.243058858116229, "grad_norm": 5.363444045567891, "learning_rate": 5.783990938049821e-06, "loss": 0.12001953125, "step": 28110 }, { "epoch": 0.24310209163777227, "grad_norm": 1.0334426308048474, "learning_rate": 5.7839150231533915e-06, "loss": 0.113726806640625, "step": 28115 }, { "epoch": 0.24314532515931553, "grad_norm": 2.256917143063956, "learning_rate": 5.783839095417793e-06, "loss": 0.24288082122802734, "step": 28120 }, { "epoch": 0.24318855868085879, "grad_norm": 0.49782322634216175, "learning_rate": 5.783763154843374e-06, "loss": 0.405731201171875, "step": 28125 }, { "epoch": 0.24323179220240204, "grad_norm": 34.43899208112142, "learning_rate": 5.783687201430486e-06, "loss": 0.3185089111328125, "step": 28130 }, { "epoch": 0.24327502572394533, "grad_norm": 25.61643506130396, "learning_rate": 5.783611235179477e-06, "loss": 0.252880859375, "step": 28135 }, { "epoch": 0.24331825924548858, "grad_norm": 8.647794591588061, "learning_rate": 5.783535256090701e-06, "loss": 0.09014549255371093, "step": 28140 }, { "epoch": 0.24336149276703184, "grad_norm": 27.489678704131016, "learning_rate": 5.783459264164505e-06, "loss": 0.48563232421875, "step": 28145 }, { "epoch": 0.2434047262885751, "grad_norm": 2.214871766825055, "learning_rate": 5.783383259401241e-06, "loss": 0.20881729125976561, "step": 28150 }, { "epoch": 0.24344795981011838, "grad_norm": 57.65926533239018, "learning_rate": 5.78330724180126e-06, "loss": 0.4527557373046875, "step": 28155 }, { "epoch": 0.24349119333166164, "grad_norm": 29.08935252545856, "learning_rate": 5.783231211364911e-06, "loss": 0.205511474609375, "step": 28160 }, { "epoch": 0.2435344268532049, "grad_norm": 5.43325008025698, "learning_rate": 5.783155168092547e-06, "loss": 0.11812744140625, "step": 28165 }, { "epoch": 0.24357766037474818, "grad_norm": 0.4913127227946357, "learning_rate": 5.7830791119845164e-06, "loss": 0.2281768798828125, "step": 28170 }, { "epoch": 0.24362089389629143, "grad_norm": 37.43799789147564, "learning_rate": 5.7830030430411705e-06, "loss": 0.4773403167724609, "step": 28175 }, { "epoch": 0.2436641274178347, "grad_norm": 1.3174328446746635, "learning_rate": 5.7829269612628614e-06, "loss": 0.04790802001953125, "step": 28180 }, { "epoch": 0.24370736093937795, "grad_norm": 2.3158837650785102, "learning_rate": 5.782850866649937e-06, "loss": 0.272686767578125, "step": 28185 }, { "epoch": 0.24375059446092123, "grad_norm": 4.271567319793334, "learning_rate": 5.782774759202753e-06, "loss": 0.21544189453125, "step": 28190 }, { "epoch": 0.2437938279824645, "grad_norm": 1.4236135470886695, "learning_rate": 5.782698638921656e-06, "loss": 0.3369476318359375, "step": 28195 }, { "epoch": 0.24383706150400775, "grad_norm": 1.14556034518813, "learning_rate": 5.782622505807e-06, "loss": 0.10245819091796875, "step": 28200 }, { "epoch": 0.243880295025551, "grad_norm": 17.06687213153166, "learning_rate": 5.782546359859134e-06, "loss": 0.14058265686035157, "step": 28205 }, { "epoch": 0.2439235285470943, "grad_norm": 34.18117099131114, "learning_rate": 5.78247020107841e-06, "loss": 0.4722419738769531, "step": 28210 }, { "epoch": 0.24396676206863754, "grad_norm": 2.076851610763787, "learning_rate": 5.78239402946518e-06, "loss": 0.07077102661132813, "step": 28215 }, { "epoch": 0.2440099955901808, "grad_norm": 0.5230302318934055, "learning_rate": 5.782317845019793e-06, "loss": 0.09009170532226562, "step": 28220 }, { "epoch": 0.24405322911172406, "grad_norm": 4.724530251557034, "learning_rate": 5.782241647742604e-06, "loss": 0.18688087463378905, "step": 28225 }, { "epoch": 0.24409646263326734, "grad_norm": 28.653120493814452, "learning_rate": 5.782165437633961e-06, "loss": 0.1386444091796875, "step": 28230 }, { "epoch": 0.2441396961548106, "grad_norm": 28.58447461707604, "learning_rate": 5.782089214694217e-06, "loss": 0.17660865783691407, "step": 28235 }, { "epoch": 0.24418292967635385, "grad_norm": 5.533369060906264, "learning_rate": 5.782012978923724e-06, "loss": 0.089752197265625, "step": 28240 }, { "epoch": 0.2442261631978971, "grad_norm": 0.586282121529776, "learning_rate": 5.781936730322832e-06, "loss": 0.0748016357421875, "step": 28245 }, { "epoch": 0.2442693967194404, "grad_norm": 100.94370739731782, "learning_rate": 5.781860468891894e-06, "loss": 0.327880859375, "step": 28250 }, { "epoch": 0.24431263024098365, "grad_norm": 12.45217105418419, "learning_rate": 5.781784194631263e-06, "loss": 0.1414783477783203, "step": 28255 }, { "epoch": 0.2443558637625269, "grad_norm": 7.983001744528381, "learning_rate": 5.781707907541286e-06, "loss": 0.1477750778198242, "step": 28260 }, { "epoch": 0.24439909728407017, "grad_norm": 39.88496651291761, "learning_rate": 5.78163160762232e-06, "loss": 0.48578720092773436, "step": 28265 }, { "epoch": 0.24444233080561345, "grad_norm": 22.348737490090237, "learning_rate": 5.7815552948747145e-06, "loss": 0.198858642578125, "step": 28270 }, { "epoch": 0.2444855643271567, "grad_norm": 0.34533208759568296, "learning_rate": 5.781478969298822e-06, "loss": 0.07511749267578124, "step": 28275 }, { "epoch": 0.24452879784869996, "grad_norm": 6.116994826138441, "learning_rate": 5.781402630894994e-06, "loss": 0.0425323486328125, "step": 28280 }, { "epoch": 0.24457203137024322, "grad_norm": 1.8151655995905054, "learning_rate": 5.781326279663582e-06, "loss": 0.1862091064453125, "step": 28285 }, { "epoch": 0.2446152648917865, "grad_norm": 0.2036397941809441, "learning_rate": 5.7812499156049405e-06, "loss": 0.04553298950195313, "step": 28290 }, { "epoch": 0.24465849841332976, "grad_norm": 13.519594462946033, "learning_rate": 5.781173538719419e-06, "loss": 0.126165771484375, "step": 28295 }, { "epoch": 0.24470173193487302, "grad_norm": 41.700874899453666, "learning_rate": 5.781097149007371e-06, "loss": 0.4360630035400391, "step": 28300 }, { "epoch": 0.24474496545641627, "grad_norm": 1.3915411292511666, "learning_rate": 5.7810207464691486e-06, "loss": 0.1824310302734375, "step": 28305 }, { "epoch": 0.24478819897795956, "grad_norm": 3.7765562960238377, "learning_rate": 5.780944331105105e-06, "loss": 0.17388954162597656, "step": 28310 }, { "epoch": 0.24483143249950282, "grad_norm": 8.740118753496391, "learning_rate": 5.780867902915592e-06, "loss": 0.19028244018554688, "step": 28315 }, { "epoch": 0.24487466602104607, "grad_norm": 8.334113835514135, "learning_rate": 5.78079146190096e-06, "loss": 0.10658187866210937, "step": 28320 }, { "epoch": 0.24491789954258933, "grad_norm": 4.448741220305298, "learning_rate": 5.7807150080615655e-06, "loss": 0.164996337890625, "step": 28325 }, { "epoch": 0.2449611330641326, "grad_norm": 9.916101417235405, "learning_rate": 5.780638541397759e-06, "loss": 0.1502349853515625, "step": 28330 }, { "epoch": 0.24500436658567587, "grad_norm": 0.3126911056771371, "learning_rate": 5.780562061909893e-06, "loss": 0.03165512084960938, "step": 28335 }, { "epoch": 0.24504760010721913, "grad_norm": 4.3627072097429105, "learning_rate": 5.780485569598319e-06, "loss": 0.25863494873046877, "step": 28340 }, { "epoch": 0.24509083362876238, "grad_norm": 29.16274792307327, "learning_rate": 5.780409064463393e-06, "loss": 0.3729888916015625, "step": 28345 }, { "epoch": 0.24513406715030567, "grad_norm": 1.7379807064322552, "learning_rate": 5.780332546505465e-06, "loss": 0.0879425048828125, "step": 28350 }, { "epoch": 0.24517730067184892, "grad_norm": 50.81687544461309, "learning_rate": 5.78025601572489e-06, "loss": 0.387841796875, "step": 28355 }, { "epoch": 0.24522053419339218, "grad_norm": 7.345255582018083, "learning_rate": 5.780179472122019e-06, "loss": 0.5519622802734375, "step": 28360 }, { "epoch": 0.24526376771493547, "grad_norm": 3.039574578301338, "learning_rate": 5.780102915697207e-06, "loss": 0.0930755615234375, "step": 28365 }, { "epoch": 0.24530700123647872, "grad_norm": 2.165217415130714, "learning_rate": 5.7800263464508055e-06, "loss": 0.4692413330078125, "step": 28370 }, { "epoch": 0.24535023475802198, "grad_norm": 5.7975143091398165, "learning_rate": 5.779949764383167e-06, "loss": 0.0691192626953125, "step": 28375 }, { "epoch": 0.24539346827956524, "grad_norm": 4.065558891163086, "learning_rate": 5.779873169494648e-06, "loss": 0.09301376342773438, "step": 28380 }, { "epoch": 0.24543670180110852, "grad_norm": 1.0857544861021402, "learning_rate": 5.779796561785598e-06, "loss": 0.12666015625, "step": 28385 }, { "epoch": 0.24547993532265178, "grad_norm": 5.375752718967346, "learning_rate": 5.779719941256372e-06, "loss": 0.06722908020019532, "step": 28390 }, { "epoch": 0.24552316884419503, "grad_norm": 6.584204623245593, "learning_rate": 5.779643307907323e-06, "loss": 0.16836013793945312, "step": 28395 }, { "epoch": 0.2455664023657383, "grad_norm": 64.55948536136859, "learning_rate": 5.779566661738806e-06, "loss": 0.11494903564453125, "step": 28400 }, { "epoch": 0.24560963588728157, "grad_norm": 16.772973169651, "learning_rate": 5.779490002751172e-06, "loss": 0.32027587890625, "step": 28405 }, { "epoch": 0.24565286940882483, "grad_norm": 29.293233510991413, "learning_rate": 5.779413330944776e-06, "loss": 0.198992919921875, "step": 28410 }, { "epoch": 0.2456961029303681, "grad_norm": 2.859636185945771, "learning_rate": 5.779336646319972e-06, "loss": 0.4352508544921875, "step": 28415 }, { "epoch": 0.24573933645191134, "grad_norm": 7.118951238441063, "learning_rate": 5.779259948877112e-06, "loss": 0.19281005859375, "step": 28420 }, { "epoch": 0.24578256997345463, "grad_norm": 21.172655234842907, "learning_rate": 5.77918323861655e-06, "loss": 0.1128875732421875, "step": 28425 }, { "epoch": 0.24582580349499789, "grad_norm": 13.90617117128998, "learning_rate": 5.779106515538642e-06, "loss": 0.15898876190185546, "step": 28430 }, { "epoch": 0.24586903701654114, "grad_norm": 11.089721470906746, "learning_rate": 5.77902977964374e-06, "loss": 0.15809326171875, "step": 28435 }, { "epoch": 0.2459122705380844, "grad_norm": 2.5180334845588015, "learning_rate": 5.778953030932198e-06, "loss": 0.124169921875, "step": 28440 }, { "epoch": 0.24595550405962768, "grad_norm": 17.018767066458974, "learning_rate": 5.77887626940437e-06, "loss": 0.148211669921875, "step": 28445 }, { "epoch": 0.24599873758117094, "grad_norm": 22.48494441091361, "learning_rate": 5.77879949506061e-06, "loss": 0.25959320068359376, "step": 28450 }, { "epoch": 0.2460419711027142, "grad_norm": 2.643978371740476, "learning_rate": 5.778722707901273e-06, "loss": 0.0648651123046875, "step": 28455 }, { "epoch": 0.24608520462425745, "grad_norm": 0.3949696108443356, "learning_rate": 5.778645907926712e-06, "loss": 0.1116485595703125, "step": 28460 }, { "epoch": 0.24612843814580074, "grad_norm": 1.4787043249988396, "learning_rate": 5.778569095137282e-06, "loss": 0.32642059326171874, "step": 28465 }, { "epoch": 0.246171671667344, "grad_norm": 6.067829413029844, "learning_rate": 5.7784922695333365e-06, "loss": 0.13691864013671876, "step": 28470 }, { "epoch": 0.24621490518888725, "grad_norm": 1.0794153956552757, "learning_rate": 5.7784154311152306e-06, "loss": 0.11591949462890624, "step": 28475 }, { "epoch": 0.2462581387104305, "grad_norm": 3.89923966203732, "learning_rate": 5.778338579883317e-06, "loss": 0.46646728515625, "step": 28480 }, { "epoch": 0.2463013722319738, "grad_norm": 29.125057462326016, "learning_rate": 5.778261715837953e-06, "loss": 0.144683837890625, "step": 28485 }, { "epoch": 0.24634460575351705, "grad_norm": 33.892272687794964, "learning_rate": 5.7781848389794905e-06, "loss": 0.14799766540527343, "step": 28490 }, { "epoch": 0.2463878392750603, "grad_norm": 0.3632091812062531, "learning_rate": 5.778107949308285e-06, "loss": 0.18647842407226561, "step": 28495 }, { "epoch": 0.24643107279660356, "grad_norm": 10.037345389818139, "learning_rate": 5.778031046824691e-06, "loss": 0.0700469970703125, "step": 28500 }, { "epoch": 0.24647430631814685, "grad_norm": 1.1188248234541633, "learning_rate": 5.777954131529064e-06, "loss": 0.06975173950195312, "step": 28505 }, { "epoch": 0.2465175398396901, "grad_norm": 4.384356918356591, "learning_rate": 5.777877203421758e-06, "loss": 0.07465286254882812, "step": 28510 }, { "epoch": 0.24656077336123336, "grad_norm": 1.036449293671985, "learning_rate": 5.777800262503127e-06, "loss": 0.31625213623046877, "step": 28515 }, { "epoch": 0.24660400688277662, "grad_norm": 7.909757507622112, "learning_rate": 5.777723308773527e-06, "loss": 0.43544921875, "step": 28520 }, { "epoch": 0.2466472404043199, "grad_norm": 4.819354411916319, "learning_rate": 5.777646342233312e-06, "loss": 0.24057769775390625, "step": 28525 }, { "epoch": 0.24669047392586316, "grad_norm": 27.59949919725324, "learning_rate": 5.777569362882838e-06, "loss": 0.30075531005859374, "step": 28530 }, { "epoch": 0.24673370744740641, "grad_norm": 6.484683677546474, "learning_rate": 5.77749237072246e-06, "loss": 0.1165771484375, "step": 28535 }, { "epoch": 0.2467769409689497, "grad_norm": 11.510873640374177, "learning_rate": 5.7774153657525325e-06, "loss": 0.1683929443359375, "step": 28540 }, { "epoch": 0.24682017449049296, "grad_norm": 6.86789178714051, "learning_rate": 5.77733834797341e-06, "loss": 0.0891143798828125, "step": 28545 }, { "epoch": 0.2468634080120362, "grad_norm": 13.48966691083467, "learning_rate": 5.777261317385448e-06, "loss": 0.350347900390625, "step": 28550 }, { "epoch": 0.24690664153357947, "grad_norm": 32.75805351138957, "learning_rate": 5.777184273989004e-06, "loss": 0.44470977783203125, "step": 28555 }, { "epoch": 0.24694987505512275, "grad_norm": 12.970804336145777, "learning_rate": 5.77710721778443e-06, "loss": 0.2716419219970703, "step": 28560 }, { "epoch": 0.246993108576666, "grad_norm": 1.2578194839412142, "learning_rate": 5.777030148772084e-06, "loss": 0.08487319946289062, "step": 28565 }, { "epoch": 0.24703634209820927, "grad_norm": 1.0164391381518554, "learning_rate": 5.77695306695232e-06, "loss": 0.451806640625, "step": 28570 }, { "epoch": 0.24707957561975252, "grad_norm": 11.95497429692006, "learning_rate": 5.776875972325494e-06, "loss": 0.31009063720703123, "step": 28575 }, { "epoch": 0.2471228091412958, "grad_norm": 1.0888948945571923, "learning_rate": 5.776798864891961e-06, "loss": 0.1205841064453125, "step": 28580 }, { "epoch": 0.24716604266283906, "grad_norm": 8.198049760057042, "learning_rate": 5.776721744652077e-06, "loss": 0.17703704833984374, "step": 28585 }, { "epoch": 0.24720927618438232, "grad_norm": 3.645345971220577, "learning_rate": 5.776644611606197e-06, "loss": 0.41193389892578125, "step": 28590 }, { "epoch": 0.24725250970592558, "grad_norm": 1.4697134312282916, "learning_rate": 5.776567465754679e-06, "loss": 0.404559326171875, "step": 28595 }, { "epoch": 0.24729574322746886, "grad_norm": 15.19488737078448, "learning_rate": 5.776490307097876e-06, "loss": 0.04844589233398437, "step": 28600 }, { "epoch": 0.24733897674901212, "grad_norm": 7.6713051745246235, "learning_rate": 5.776413135636145e-06, "loss": 0.2663749694824219, "step": 28605 }, { "epoch": 0.24738221027055537, "grad_norm": 23.84203115202725, "learning_rate": 5.776335951369842e-06, "loss": 0.37112274169921877, "step": 28610 }, { "epoch": 0.24742544379209863, "grad_norm": 6.677710631435228, "learning_rate": 5.776258754299324e-06, "loss": 0.11274871826171876, "step": 28615 }, { "epoch": 0.24746867731364192, "grad_norm": 4.434378955063698, "learning_rate": 5.776181544424944e-06, "loss": 0.11595001220703124, "step": 28620 }, { "epoch": 0.24751191083518517, "grad_norm": 8.525990830980989, "learning_rate": 5.776104321747061e-06, "loss": 0.240350341796875, "step": 28625 }, { "epoch": 0.24755514435672843, "grad_norm": 2.0506666250712393, "learning_rate": 5.776027086266031e-06, "loss": 0.14521484375, "step": 28630 }, { "epoch": 0.24759837787827169, "grad_norm": 0.8931409987666419, "learning_rate": 5.7759498379822095e-06, "loss": 0.15347900390625, "step": 28635 }, { "epoch": 0.24764161139981497, "grad_norm": 11.149661247792853, "learning_rate": 5.775872576895951e-06, "loss": 0.2724029541015625, "step": 28640 }, { "epoch": 0.24768484492135823, "grad_norm": 34.57026602756, "learning_rate": 5.775795303007615e-06, "loss": 0.2666534423828125, "step": 28645 }, { "epoch": 0.24772807844290148, "grad_norm": 49.46385544464519, "learning_rate": 5.775718016317556e-06, "loss": 0.33467254638671873, "step": 28650 }, { "epoch": 0.24777131196444474, "grad_norm": 2.462392890129905, "learning_rate": 5.77564071682613e-06, "loss": 0.25136375427246094, "step": 28655 }, { "epoch": 0.24781454548598802, "grad_norm": 38.37158818841127, "learning_rate": 5.775563404533694e-06, "loss": 0.28252315521240234, "step": 28660 }, { "epoch": 0.24785777900753128, "grad_norm": 7.8282800744442325, "learning_rate": 5.775486079440605e-06, "loss": 0.1300689697265625, "step": 28665 }, { "epoch": 0.24790101252907454, "grad_norm": 56.10547081691838, "learning_rate": 5.775408741547221e-06, "loss": 0.382928466796875, "step": 28670 }, { "epoch": 0.2479442460506178, "grad_norm": 5.414099344263033, "learning_rate": 5.775331390853897e-06, "loss": 0.127142333984375, "step": 28675 }, { "epoch": 0.24798747957216108, "grad_norm": 8.710591146482269, "learning_rate": 5.775254027360988e-06, "loss": 0.0833251953125, "step": 28680 }, { "epoch": 0.24803071309370434, "grad_norm": 4.622592083150609, "learning_rate": 5.775176651068854e-06, "loss": 0.1057525634765625, "step": 28685 }, { "epoch": 0.2480739466152476, "grad_norm": 8.10303917902229, "learning_rate": 5.77509926197785e-06, "loss": 0.240185546875, "step": 28690 }, { "epoch": 0.24811718013679085, "grad_norm": 22.900936075133362, "learning_rate": 5.775021860088333e-06, "loss": 0.142840576171875, "step": 28695 }, { "epoch": 0.24816041365833413, "grad_norm": 2.7161864127933137, "learning_rate": 5.7749444454006616e-06, "loss": 0.0702545166015625, "step": 28700 }, { "epoch": 0.2482036471798774, "grad_norm": 20.00004447331399, "learning_rate": 5.77486701791519e-06, "loss": 0.1563751220703125, "step": 28705 }, { "epoch": 0.24824688070142065, "grad_norm": 34.647674755936805, "learning_rate": 5.774789577632279e-06, "loss": 0.15787353515625, "step": 28710 }, { "epoch": 0.2482901142229639, "grad_norm": 18.271308516741243, "learning_rate": 5.774712124552282e-06, "loss": 0.3666404724121094, "step": 28715 }, { "epoch": 0.2483333477445072, "grad_norm": 3.251234637057768, "learning_rate": 5.774634658675559e-06, "loss": 0.4139411926269531, "step": 28720 }, { "epoch": 0.24837658126605044, "grad_norm": 14.061366726935182, "learning_rate": 5.774557180002465e-06, "loss": 0.1583019256591797, "step": 28725 }, { "epoch": 0.2484198147875937, "grad_norm": 50.52958303401237, "learning_rate": 5.774479688533358e-06, "loss": 0.27613983154296873, "step": 28730 }, { "epoch": 0.24846304830913699, "grad_norm": 25.24656805462761, "learning_rate": 5.774402184268598e-06, "loss": 0.25099716186523435, "step": 28735 }, { "epoch": 0.24850628183068024, "grad_norm": 1.6671412858789563, "learning_rate": 5.774324667208538e-06, "loss": 0.18304443359375, "step": 28740 }, { "epoch": 0.2485495153522235, "grad_norm": 3.9538459881707633, "learning_rate": 5.774247137353539e-06, "loss": 0.0952301025390625, "step": 28745 }, { "epoch": 0.24859274887376676, "grad_norm": 11.124279224568212, "learning_rate": 5.774169594703957e-06, "loss": 0.481640625, "step": 28750 }, { "epoch": 0.24863598239531004, "grad_norm": 1.9216855679301506, "learning_rate": 5.77409203926015e-06, "loss": 0.150347900390625, "step": 28755 }, { "epoch": 0.2486792159168533, "grad_norm": 1.245725884850337, "learning_rate": 5.774014471022476e-06, "loss": 0.1674041748046875, "step": 28760 }, { "epoch": 0.24872244943839655, "grad_norm": 5.24269855352438, "learning_rate": 5.773936889991292e-06, "loss": 0.09747161865234374, "step": 28765 }, { "epoch": 0.2487656829599398, "grad_norm": 20.18918752237728, "learning_rate": 5.773859296166957e-06, "loss": 0.27490921020507814, "step": 28770 }, { "epoch": 0.2488089164814831, "grad_norm": 3.53486832399456, "learning_rate": 5.7737816895498265e-06, "loss": 0.1900146484375, "step": 28775 }, { "epoch": 0.24885215000302635, "grad_norm": 1.5419819462184072, "learning_rate": 5.773704070140261e-06, "loss": 0.246697998046875, "step": 28780 }, { "epoch": 0.2488953835245696, "grad_norm": 3.073245809922823, "learning_rate": 5.773626437938617e-06, "loss": 0.1238189697265625, "step": 28785 }, { "epoch": 0.24893861704611286, "grad_norm": 25.425974529626323, "learning_rate": 5.773548792945253e-06, "loss": 0.086358642578125, "step": 28790 }, { "epoch": 0.24898185056765615, "grad_norm": 1.5165653038063938, "learning_rate": 5.773471135160527e-06, "loss": 0.24235076904296876, "step": 28795 }, { "epoch": 0.2490250840891994, "grad_norm": 8.921840140488936, "learning_rate": 5.773393464584797e-06, "loss": 0.37371826171875, "step": 28800 }, { "epoch": 0.24906831761074266, "grad_norm": 2.895051065420842, "learning_rate": 5.7733157812184225e-06, "loss": 0.1131591796875, "step": 28805 }, { "epoch": 0.24911155113228592, "grad_norm": 17.48091106692099, "learning_rate": 5.77323808506176e-06, "loss": 0.16700286865234376, "step": 28810 }, { "epoch": 0.2491547846538292, "grad_norm": 0.6663847320663734, "learning_rate": 5.773160376115168e-06, "loss": 0.09363288879394531, "step": 28815 }, { "epoch": 0.24919801817537246, "grad_norm": 22.059481761634657, "learning_rate": 5.773082654379006e-06, "loss": 0.22235107421875, "step": 28820 }, { "epoch": 0.24924125169691572, "grad_norm": 2.8739970034978173, "learning_rate": 5.7730049198536315e-06, "loss": 0.2867401123046875, "step": 28825 }, { "epoch": 0.24928448521845897, "grad_norm": 5.037513723751982, "learning_rate": 5.772927172539403e-06, "loss": 0.258502197265625, "step": 28830 }, { "epoch": 0.24932771874000226, "grad_norm": 37.654918195550835, "learning_rate": 5.772849412436681e-06, "loss": 0.25113677978515625, "step": 28835 }, { "epoch": 0.24937095226154551, "grad_norm": 4.792917963137055, "learning_rate": 5.772771639545821e-06, "loss": 0.3919677734375, "step": 28840 }, { "epoch": 0.24941418578308877, "grad_norm": 12.213310714083258, "learning_rate": 5.772693853867184e-06, "loss": 0.08634185791015625, "step": 28845 }, { "epoch": 0.24945741930463203, "grad_norm": 9.324993698542436, "learning_rate": 5.772616055401127e-06, "loss": 0.3060150146484375, "step": 28850 }, { "epoch": 0.2495006528261753, "grad_norm": 4.52002073898681, "learning_rate": 5.77253824414801e-06, "loss": 0.151434326171875, "step": 28855 }, { "epoch": 0.24954388634771857, "grad_norm": 7.847990068106426, "learning_rate": 5.7724604201081926e-06, "loss": 0.4531585693359375, "step": 28860 }, { "epoch": 0.24958711986926183, "grad_norm": 14.62117154217121, "learning_rate": 5.772382583282032e-06, "loss": 0.0723358154296875, "step": 28865 }, { "epoch": 0.24963035339080508, "grad_norm": 19.41391683904441, "learning_rate": 5.772304733669887e-06, "loss": 0.36609725952148436, "step": 28870 }, { "epoch": 0.24967358691234837, "grad_norm": 0.3766979354188318, "learning_rate": 5.77222687127212e-06, "loss": 0.10859832763671876, "step": 28875 }, { "epoch": 0.24971682043389162, "grad_norm": 11.819920533908325, "learning_rate": 5.7721489960890855e-06, "loss": 0.1244140625, "step": 28880 }, { "epoch": 0.24976005395543488, "grad_norm": 21.976872872329007, "learning_rate": 5.772071108121145e-06, "loss": 0.26454887390136717, "step": 28885 }, { "epoch": 0.24980328747697814, "grad_norm": 1.4768103331210847, "learning_rate": 5.771993207368658e-06, "loss": 0.07836151123046875, "step": 28890 }, { "epoch": 0.24984652099852142, "grad_norm": 10.134416620802462, "learning_rate": 5.771915293831983e-06, "loss": 0.2500465393066406, "step": 28895 }, { "epoch": 0.24988975452006468, "grad_norm": 4.351879758536533, "learning_rate": 5.77183736751148e-06, "loss": 0.19702606201171874, "step": 28900 }, { "epoch": 0.24993298804160793, "grad_norm": 25.657482446614672, "learning_rate": 5.771759428407508e-06, "loss": 0.1402069091796875, "step": 28905 }, { "epoch": 0.24997622156315122, "grad_norm": 5.262186441135424, "learning_rate": 5.771681476520426e-06, "loss": 0.16983909606933595, "step": 28910 }, { "epoch": 0.2500194550846945, "grad_norm": 8.725441775170625, "learning_rate": 5.7716035118505935e-06, "loss": 0.3147186279296875, "step": 28915 }, { "epoch": 0.25006268860623776, "grad_norm": 3.9847019883978967, "learning_rate": 5.771525534398371e-06, "loss": 0.416156005859375, "step": 28920 }, { "epoch": 0.250105922127781, "grad_norm": 9.372657388357101, "learning_rate": 5.771447544164118e-06, "loss": 0.1259307861328125, "step": 28925 }, { "epoch": 0.2501491556493243, "grad_norm": 18.903037196983142, "learning_rate": 5.771369541148194e-06, "loss": 0.14823760986328124, "step": 28930 }, { "epoch": 0.2501923891708675, "grad_norm": 7.473271916176461, "learning_rate": 5.7712915253509586e-06, "loss": 0.062297630310058597, "step": 28935 }, { "epoch": 0.2502356226924108, "grad_norm": 12.980162403882947, "learning_rate": 5.77121349677277e-06, "loss": 0.1471609115600586, "step": 28940 }, { "epoch": 0.25027885621395407, "grad_norm": 32.43109435180673, "learning_rate": 5.771135455413991e-06, "loss": 0.21134567260742188, "step": 28945 }, { "epoch": 0.2503220897354973, "grad_norm": 5.368000264298575, "learning_rate": 5.7710574012749796e-06, "loss": 0.06659698486328125, "step": 28950 }, { "epoch": 0.2503653232570406, "grad_norm": 4.689137610597109, "learning_rate": 5.770979334356097e-06, "loss": 0.1654388427734375, "step": 28955 }, { "epoch": 0.25040855677858387, "grad_norm": 30.27192826437717, "learning_rate": 5.770901254657701e-06, "loss": 0.17940521240234375, "step": 28960 }, { "epoch": 0.2504517903001271, "grad_norm": 2.8694562914081887, "learning_rate": 5.770823162180155e-06, "loss": 0.050201416015625, "step": 28965 }, { "epoch": 0.2504950238216704, "grad_norm": 10.99075969285774, "learning_rate": 5.770745056923817e-06, "loss": 0.2941619873046875, "step": 28970 }, { "epoch": 0.2505382573432136, "grad_norm": 1.1344005866469853, "learning_rate": 5.770666938889046e-06, "loss": 0.3014640808105469, "step": 28975 }, { "epoch": 0.2505814908647569, "grad_norm": 16.040860079319284, "learning_rate": 5.770588808076206e-06, "loss": 0.22244796752929688, "step": 28980 }, { "epoch": 0.2506247243863002, "grad_norm": 1.1262693775062962, "learning_rate": 5.770510664485655e-06, "loss": 0.1084381103515625, "step": 28985 }, { "epoch": 0.2506679579078434, "grad_norm": 12.416994443070731, "learning_rate": 5.7704325081177524e-06, "loss": 0.2761260986328125, "step": 28990 }, { "epoch": 0.2507111914293867, "grad_norm": 0.9550279211401246, "learning_rate": 5.7703543389728605e-06, "loss": 0.18339691162109376, "step": 28995 }, { "epoch": 0.25075442495093, "grad_norm": 2.932586586248154, "learning_rate": 5.77027615705134e-06, "loss": 0.131878662109375, "step": 29000 }, { "epoch": 0.2507976584724732, "grad_norm": 6.558656014082539, "learning_rate": 5.77019796235355e-06, "loss": 0.13450698852539061, "step": 29005 }, { "epoch": 0.2508408919940165, "grad_norm": 1.580381710319267, "learning_rate": 5.7701197548798516e-06, "loss": 0.1766681671142578, "step": 29010 }, { "epoch": 0.2508841255155597, "grad_norm": 4.730294179103588, "learning_rate": 5.770041534630606e-06, "loss": 0.44138336181640625, "step": 29015 }, { "epoch": 0.250927359037103, "grad_norm": 16.549990314629706, "learning_rate": 5.769963301606173e-06, "loss": 0.1343658447265625, "step": 29020 }, { "epoch": 0.2509705925586463, "grad_norm": 12.991925022101295, "learning_rate": 5.769885055806914e-06, "loss": 0.7170440673828125, "step": 29025 }, { "epoch": 0.2510138260801895, "grad_norm": 14.2434308706553, "learning_rate": 5.76980679723319e-06, "loss": 0.2124237060546875, "step": 29030 }, { "epoch": 0.2510570596017328, "grad_norm": 3.126372571951673, "learning_rate": 5.769728525885363e-06, "loss": 0.0826812744140625, "step": 29035 }, { "epoch": 0.2511002931232761, "grad_norm": 6.704532635581851, "learning_rate": 5.769650241763792e-06, "loss": 0.15489959716796875, "step": 29040 }, { "epoch": 0.2511435266448193, "grad_norm": 23.892168028652303, "learning_rate": 5.769571944868838e-06, "loss": 0.23927459716796876, "step": 29045 }, { "epoch": 0.2511867601663626, "grad_norm": 15.26380408883283, "learning_rate": 5.769493635200864e-06, "loss": 0.17598114013671876, "step": 29050 }, { "epoch": 0.25122999368790583, "grad_norm": 1.3173452325667914, "learning_rate": 5.76941531276023e-06, "loss": 0.244622802734375, "step": 29055 }, { "epoch": 0.2512732272094491, "grad_norm": 41.09940315937749, "learning_rate": 5.769336977547296e-06, "loss": 0.5412143707275391, "step": 29060 }, { "epoch": 0.2513164607309924, "grad_norm": 8.151408083804943, "learning_rate": 5.769258629562425e-06, "loss": 0.342364501953125, "step": 29065 }, { "epoch": 0.2513596942525356, "grad_norm": 13.816299119645779, "learning_rate": 5.769180268805979e-06, "loss": 0.3051734924316406, "step": 29070 }, { "epoch": 0.2514029277740789, "grad_norm": 15.590321353600897, "learning_rate": 5.769101895278318e-06, "loss": 0.2516021728515625, "step": 29075 }, { "epoch": 0.2514461612956222, "grad_norm": 4.429381579408949, "learning_rate": 5.769023508979803e-06, "loss": 0.2335235595703125, "step": 29080 }, { "epoch": 0.2514893948171654, "grad_norm": 40.96979977467336, "learning_rate": 5.768945109910797e-06, "loss": 0.48580169677734375, "step": 29085 }, { "epoch": 0.2515326283387087, "grad_norm": 15.97664513444982, "learning_rate": 5.76886669807166e-06, "loss": 0.157830810546875, "step": 29090 }, { "epoch": 0.251575861860252, "grad_norm": 0.3743770754120103, "learning_rate": 5.768788273462755e-06, "loss": 0.158721923828125, "step": 29095 }, { "epoch": 0.2516190953817952, "grad_norm": 44.788213195918466, "learning_rate": 5.7687098360844424e-06, "loss": 0.46688385009765626, "step": 29100 }, { "epoch": 0.2516623289033385, "grad_norm": 3.913820519180674, "learning_rate": 5.768631385937085e-06, "loss": 0.11032180786132813, "step": 29105 }, { "epoch": 0.25170556242488173, "grad_norm": 16.623909644211366, "learning_rate": 5.768552923021045e-06, "loss": 0.17072906494140624, "step": 29110 }, { "epoch": 0.251748795946425, "grad_norm": 5.283367215960307, "learning_rate": 5.768474447336684e-06, "loss": 0.21349258422851564, "step": 29115 }, { "epoch": 0.2517920294679683, "grad_norm": 78.0067279511577, "learning_rate": 5.768395958884362e-06, "loss": 0.31197052001953124, "step": 29120 }, { "epoch": 0.25183526298951153, "grad_norm": 38.521212982961075, "learning_rate": 5.7683174576644436e-06, "loss": 0.23261337280273436, "step": 29125 }, { "epoch": 0.2518784965110548, "grad_norm": 10.423257481355819, "learning_rate": 5.768238943677289e-06, "loss": 0.107305908203125, "step": 29130 }, { "epoch": 0.2519217300325981, "grad_norm": 52.26006500304185, "learning_rate": 5.768160416923261e-06, "loss": 0.6380279541015625, "step": 29135 }, { "epoch": 0.25196496355414133, "grad_norm": 4.0857819890949525, "learning_rate": 5.768081877402722e-06, "loss": 0.08697509765625, "step": 29140 }, { "epoch": 0.2520081970756846, "grad_norm": 21.090552026487206, "learning_rate": 5.768003325116034e-06, "loss": 0.14054183959960936, "step": 29145 }, { "epoch": 0.25205143059722784, "grad_norm": 6.809673875654713, "learning_rate": 5.767924760063559e-06, "loss": 0.09228363037109374, "step": 29150 }, { "epoch": 0.25209466411877113, "grad_norm": 5.61963311229075, "learning_rate": 5.76784618224566e-06, "loss": 0.1233367919921875, "step": 29155 }, { "epoch": 0.2521378976403144, "grad_norm": 0.8889276730817209, "learning_rate": 5.767767591662699e-06, "loss": 0.03386688232421875, "step": 29160 }, { "epoch": 0.25218113116185764, "grad_norm": 11.81909244789638, "learning_rate": 5.767688988315039e-06, "loss": 0.4025416374206543, "step": 29165 }, { "epoch": 0.2522243646834009, "grad_norm": 15.606940564118442, "learning_rate": 5.76761037220304e-06, "loss": 0.166436767578125, "step": 29170 }, { "epoch": 0.2522675982049442, "grad_norm": 2.3777118671782325, "learning_rate": 5.767531743327068e-06, "loss": 0.0454437255859375, "step": 29175 }, { "epoch": 0.25231083172648744, "grad_norm": 0.3664310755271946, "learning_rate": 5.7674531016874836e-06, "loss": 0.203863525390625, "step": 29180 }, { "epoch": 0.2523540652480307, "grad_norm": 5.406441774284333, "learning_rate": 5.76737444728465e-06, "loss": 0.4898651123046875, "step": 29185 }, { "epoch": 0.25239729876957395, "grad_norm": 56.75536012739885, "learning_rate": 5.76729578011893e-06, "loss": 0.3194366455078125, "step": 29190 }, { "epoch": 0.25244053229111724, "grad_norm": 2.6046652361670697, "learning_rate": 5.767217100190687e-06, "loss": 0.2139892578125, "step": 29195 }, { "epoch": 0.2524837658126605, "grad_norm": 3.479079238077066, "learning_rate": 5.7671384075002825e-06, "loss": 0.1193115234375, "step": 29200 }, { "epoch": 0.25252699933420375, "grad_norm": 30.989941414087987, "learning_rate": 5.76705970204808e-06, "loss": 0.3130340576171875, "step": 29205 }, { "epoch": 0.25257023285574703, "grad_norm": 6.809222461779854, "learning_rate": 5.766980983834444e-06, "loss": 0.10194091796875, "step": 29210 }, { "epoch": 0.2526134663772903, "grad_norm": 10.856252760597206, "learning_rate": 5.766902252859735e-06, "loss": 0.2112579345703125, "step": 29215 }, { "epoch": 0.25265669989883355, "grad_norm": 25.547093427874888, "learning_rate": 5.766823509124317e-06, "loss": 0.18765182495117189, "step": 29220 }, { "epoch": 0.25269993342037683, "grad_norm": 17.919263426144262, "learning_rate": 5.766744752628555e-06, "loss": 0.21938629150390626, "step": 29225 }, { "epoch": 0.25274316694192006, "grad_norm": 9.2537873881635, "learning_rate": 5.76666598337281e-06, "loss": 0.05463409423828125, "step": 29230 }, { "epoch": 0.25278640046346335, "grad_norm": 26.445043575388624, "learning_rate": 5.766587201357446e-06, "loss": 0.1118896484375, "step": 29235 }, { "epoch": 0.25282963398500663, "grad_norm": 0.8156267340565544, "learning_rate": 5.7665084065828256e-06, "loss": 0.09242401123046876, "step": 29240 }, { "epoch": 0.25287286750654986, "grad_norm": 0.6404504802433448, "learning_rate": 5.766429599049313e-06, "loss": 0.052239990234375, "step": 29245 }, { "epoch": 0.25291610102809314, "grad_norm": 36.9107968296665, "learning_rate": 5.766350778757272e-06, "loss": 0.512042236328125, "step": 29250 }, { "epoch": 0.2529593345496364, "grad_norm": 17.799516676486693, "learning_rate": 5.766271945707066e-06, "loss": 0.1800750732421875, "step": 29255 }, { "epoch": 0.25300256807117966, "grad_norm": 19.18976337929278, "learning_rate": 5.766193099899057e-06, "loss": 0.140386962890625, "step": 29260 }, { "epoch": 0.25304580159272294, "grad_norm": 18.663743669252412, "learning_rate": 5.766114241333611e-06, "loss": 0.3343502044677734, "step": 29265 }, { "epoch": 0.25308903511426617, "grad_norm": 6.247660316471607, "learning_rate": 5.76603537001109e-06, "loss": 0.09810409545898438, "step": 29270 }, { "epoch": 0.25313226863580945, "grad_norm": 2.9628753718381424, "learning_rate": 5.765956485931858e-06, "loss": 0.11565170288085938, "step": 29275 }, { "epoch": 0.25317550215735274, "grad_norm": 8.674533513233136, "learning_rate": 5.765877589096279e-06, "loss": 0.2253387451171875, "step": 29280 }, { "epoch": 0.25321873567889597, "grad_norm": 20.447635869847083, "learning_rate": 5.765798679504718e-06, "loss": 0.13531036376953126, "step": 29285 }, { "epoch": 0.25326196920043925, "grad_norm": 11.021642334060832, "learning_rate": 5.765719757157537e-06, "loss": 0.22132415771484376, "step": 29290 }, { "epoch": 0.25330520272198254, "grad_norm": 13.901717347704658, "learning_rate": 5.765640822055101e-06, "loss": 0.281658935546875, "step": 29295 }, { "epoch": 0.25334843624352577, "grad_norm": 52.15464684452104, "learning_rate": 5.7655618741977745e-06, "loss": 0.3351654052734375, "step": 29300 }, { "epoch": 0.25339166976506905, "grad_norm": 41.242602100041005, "learning_rate": 5.76548291358592e-06, "loss": 0.11052703857421875, "step": 29305 }, { "epoch": 0.25343490328661233, "grad_norm": 0.7445338994475009, "learning_rate": 5.7654039402199035e-06, "loss": 0.114697265625, "step": 29310 }, { "epoch": 0.25347813680815556, "grad_norm": 7.297178651765237, "learning_rate": 5.765324954100088e-06, "loss": 0.2009490966796875, "step": 29315 }, { "epoch": 0.25352137032969885, "grad_norm": 25.73162264110032, "learning_rate": 5.765245955226838e-06, "loss": 0.23175506591796874, "step": 29320 }, { "epoch": 0.2535646038512421, "grad_norm": 49.35418421178691, "learning_rate": 5.765166943600519e-06, "loss": 0.34062042236328127, "step": 29325 }, { "epoch": 0.25360783737278536, "grad_norm": 40.1093373978513, "learning_rate": 5.765087919221493e-06, "loss": 0.326318359375, "step": 29330 }, { "epoch": 0.25365107089432865, "grad_norm": 17.28283617777959, "learning_rate": 5.7650088820901265e-06, "loss": 0.19503021240234375, "step": 29335 }, { "epoch": 0.2536943044158719, "grad_norm": 18.353746546257412, "learning_rate": 5.764929832206784e-06, "loss": 0.45242919921875, "step": 29340 }, { "epoch": 0.25373753793741516, "grad_norm": 0.6307024621047778, "learning_rate": 5.764850769571829e-06, "loss": 0.157537841796875, "step": 29345 }, { "epoch": 0.25378077145895844, "grad_norm": 0.6513657271218952, "learning_rate": 5.764771694185626e-06, "loss": 0.1105682373046875, "step": 29350 }, { "epoch": 0.25382400498050167, "grad_norm": 9.327160779968882, "learning_rate": 5.764692606048541e-06, "loss": 0.33861827850341797, "step": 29355 }, { "epoch": 0.25386723850204496, "grad_norm": 5.883291709632755, "learning_rate": 5.764613505160937e-06, "loss": 0.6610742568969726, "step": 29360 }, { "epoch": 0.2539104720235882, "grad_norm": 23.759624238186998, "learning_rate": 5.7645343915231804e-06, "loss": 0.352099609375, "step": 29365 }, { "epoch": 0.25395370554513147, "grad_norm": 40.20693763234125, "learning_rate": 5.764455265135636e-06, "loss": 0.2838584899902344, "step": 29370 }, { "epoch": 0.25399693906667475, "grad_norm": 12.567024259227864, "learning_rate": 5.764376125998667e-06, "loss": 0.09740524291992188, "step": 29375 }, { "epoch": 0.254040172588218, "grad_norm": 1.4603459460043642, "learning_rate": 5.76429697411264e-06, "loss": 0.129705810546875, "step": 29380 }, { "epoch": 0.25408340610976127, "grad_norm": 19.169032342160317, "learning_rate": 5.76421780947792e-06, "loss": 0.13547210693359374, "step": 29385 }, { "epoch": 0.25412663963130455, "grad_norm": 7.771301887039145, "learning_rate": 5.764138632094871e-06, "loss": 0.1396728515625, "step": 29390 }, { "epoch": 0.2541698731528478, "grad_norm": 4.836215902103136, "learning_rate": 5.76405944196386e-06, "loss": 0.12916717529296876, "step": 29395 }, { "epoch": 0.25421310667439107, "grad_norm": 7.851181636246193, "learning_rate": 5.763980239085251e-06, "loss": 0.2064697265625, "step": 29400 }, { "epoch": 0.2542563401959343, "grad_norm": 3.337862721805027, "learning_rate": 5.763901023459408e-06, "loss": 0.10931396484375, "step": 29405 }, { "epoch": 0.2542995737174776, "grad_norm": 41.3528271304279, "learning_rate": 5.763821795086699e-06, "loss": 0.22542572021484375, "step": 29410 }, { "epoch": 0.25434280723902086, "grad_norm": 3.475286843004868, "learning_rate": 5.763742553967487e-06, "loss": 0.35099945068359373, "step": 29415 }, { "epoch": 0.2543860407605641, "grad_norm": 2.3384886478468045, "learning_rate": 5.763663300102139e-06, "loss": 0.1105926513671875, "step": 29420 }, { "epoch": 0.2544292742821074, "grad_norm": 13.022318020426795, "learning_rate": 5.763584033491021e-06, "loss": 0.4802490234375, "step": 29425 }, { "epoch": 0.25447250780365066, "grad_norm": 6.9364576877458, "learning_rate": 5.763504754134497e-06, "loss": 0.076959228515625, "step": 29430 }, { "epoch": 0.2545157413251939, "grad_norm": 26.38898720476315, "learning_rate": 5.763425462032933e-06, "loss": 0.2361083984375, "step": 29435 }, { "epoch": 0.2545589748467372, "grad_norm": 0.82276626858143, "learning_rate": 5.763346157186695e-06, "loss": 0.08742828369140625, "step": 29440 }, { "epoch": 0.2546022083682804, "grad_norm": 18.97312210360565, "learning_rate": 5.763266839596149e-06, "loss": 0.3041343688964844, "step": 29445 }, { "epoch": 0.2546454418898237, "grad_norm": 41.860431544332855, "learning_rate": 5.76318750926166e-06, "loss": 0.2980949401855469, "step": 29450 }, { "epoch": 0.25468867541136697, "grad_norm": 9.957896922890592, "learning_rate": 5.7631081661835945e-06, "loss": 0.15316162109375, "step": 29455 }, { "epoch": 0.2547319089329102, "grad_norm": 16.961856224161668, "learning_rate": 5.763028810362319e-06, "loss": 0.12127838134765626, "step": 29460 }, { "epoch": 0.2547751424544535, "grad_norm": 25.189435802936995, "learning_rate": 5.762949441798198e-06, "loss": 0.18568191528320313, "step": 29465 }, { "epoch": 0.25481837597599677, "grad_norm": 29.142222525811096, "learning_rate": 5.762870060491598e-06, "loss": 0.40216064453125, "step": 29470 }, { "epoch": 0.25486160949754, "grad_norm": 2.8708623496717003, "learning_rate": 5.762790666442886e-06, "loss": 0.14187393188476563, "step": 29475 }, { "epoch": 0.2549048430190833, "grad_norm": 17.52378887109832, "learning_rate": 5.762711259652428e-06, "loss": 0.14496688842773436, "step": 29480 }, { "epoch": 0.25494807654062657, "grad_norm": 1.8365575777762326, "learning_rate": 5.762631840120589e-06, "loss": 0.02051239013671875, "step": 29485 }, { "epoch": 0.2549913100621698, "grad_norm": 3.7347877286310185, "learning_rate": 5.762552407847736e-06, "loss": 0.03965415954589844, "step": 29490 }, { "epoch": 0.2550345435837131, "grad_norm": 20.611797825250576, "learning_rate": 5.762472962834237e-06, "loss": 0.22274322509765626, "step": 29495 }, { "epoch": 0.2550777771052563, "grad_norm": 3.4862554824991214, "learning_rate": 5.762393505080455e-06, "loss": 0.16583709716796874, "step": 29500 }, { "epoch": 0.2551210106267996, "grad_norm": 75.52112620104398, "learning_rate": 5.762314034586758e-06, "loss": 0.254327392578125, "step": 29505 }, { "epoch": 0.2551642441483429, "grad_norm": 13.971601733500123, "learning_rate": 5.762234551353514e-06, "loss": 0.17677154541015624, "step": 29510 }, { "epoch": 0.2552074776698861, "grad_norm": 9.776958276839682, "learning_rate": 5.7621550553810875e-06, "loss": 0.09286651611328126, "step": 29515 }, { "epoch": 0.2552507111914294, "grad_norm": 32.24210731876242, "learning_rate": 5.762075546669847e-06, "loss": 0.14575653076171874, "step": 29520 }, { "epoch": 0.2552939447129727, "grad_norm": 0.04661087505046665, "learning_rate": 5.761996025220157e-06, "loss": 0.08164043426513672, "step": 29525 }, { "epoch": 0.2553371782345159, "grad_norm": 2.31558876865855, "learning_rate": 5.761916491032387e-06, "loss": 0.1740753173828125, "step": 29530 }, { "epoch": 0.2553804117560592, "grad_norm": 69.93158099070753, "learning_rate": 5.7618369441069004e-06, "loss": 0.2901630401611328, "step": 29535 }, { "epoch": 0.2554236452776024, "grad_norm": 11.389557823613464, "learning_rate": 5.761757384444066e-06, "loss": 0.38522186279296877, "step": 29540 }, { "epoch": 0.2554668787991457, "grad_norm": 16.853660857414987, "learning_rate": 5.761677812044251e-06, "loss": 0.34514694213867186, "step": 29545 }, { "epoch": 0.255510112320689, "grad_norm": 4.1175685386975935, "learning_rate": 5.761598226907823e-06, "loss": 0.1644775390625, "step": 29550 }, { "epoch": 0.2555533458422322, "grad_norm": 16.793426723569706, "learning_rate": 5.761518629035147e-06, "loss": 0.21012420654296876, "step": 29555 }, { "epoch": 0.2555965793637755, "grad_norm": 1.7696751291203605, "learning_rate": 5.761439018426591e-06, "loss": 0.5536859512329102, "step": 29560 }, { "epoch": 0.2556398128853188, "grad_norm": 4.437682660906226, "learning_rate": 5.761359395082522e-06, "loss": 0.6608604431152344, "step": 29565 }, { "epoch": 0.255683046406862, "grad_norm": 28.177824382427485, "learning_rate": 5.761279759003309e-06, "loss": 0.6071159362792968, "step": 29570 }, { "epoch": 0.2557262799284053, "grad_norm": 7.713436298903985, "learning_rate": 5.761200110189316e-06, "loss": 0.1502655029296875, "step": 29575 }, { "epoch": 0.2557695134499485, "grad_norm": 6.617569033931722, "learning_rate": 5.761120448640912e-06, "loss": 0.1188201904296875, "step": 29580 }, { "epoch": 0.2558127469714918, "grad_norm": 16.61046363968155, "learning_rate": 5.7610407743584655e-06, "loss": 0.2644622802734375, "step": 29585 }, { "epoch": 0.2558559804930351, "grad_norm": 5.014699945914891, "learning_rate": 5.760961087342342e-06, "loss": 0.103326416015625, "step": 29590 }, { "epoch": 0.2558992140145783, "grad_norm": 6.415307819303781, "learning_rate": 5.760881387592911e-06, "loss": 0.07023468017578124, "step": 29595 }, { "epoch": 0.2559424475361216, "grad_norm": 0.1897901683985417, "learning_rate": 5.760801675110538e-06, "loss": 0.11849784851074219, "step": 29600 }, { "epoch": 0.2559856810576649, "grad_norm": 1.7229840672131418, "learning_rate": 5.760721949895592e-06, "loss": 0.0599700927734375, "step": 29605 }, { "epoch": 0.2560289145792081, "grad_norm": 27.07807249630645, "learning_rate": 5.760642211948441e-06, "loss": 0.51617431640625, "step": 29610 }, { "epoch": 0.2560721481007514, "grad_norm": 6.9312152455290565, "learning_rate": 5.760562461269451e-06, "loss": 0.39151611328125, "step": 29615 }, { "epoch": 0.25611538162229464, "grad_norm": 2.6726479114292823, "learning_rate": 5.760482697858991e-06, "loss": 0.21251983642578126, "step": 29620 }, { "epoch": 0.2561586151438379, "grad_norm": 3.1048807299365286, "learning_rate": 5.760402921717429e-06, "loss": 0.34662704467773436, "step": 29625 }, { "epoch": 0.2562018486653812, "grad_norm": 9.333158621076267, "learning_rate": 5.760323132845133e-06, "loss": 0.3453094482421875, "step": 29630 }, { "epoch": 0.25624508218692443, "grad_norm": 23.509357461702983, "learning_rate": 5.76024333124247e-06, "loss": 0.11982192993164062, "step": 29635 }, { "epoch": 0.2562883157084677, "grad_norm": 4.44059693030619, "learning_rate": 5.76016351690981e-06, "loss": 0.10403594970703126, "step": 29640 }, { "epoch": 0.256331549230011, "grad_norm": 4.600754295432633, "learning_rate": 5.760083689847518e-06, "loss": 0.0558685302734375, "step": 29645 }, { "epoch": 0.25637478275155423, "grad_norm": 1.318367734940939, "learning_rate": 5.7600038500559644e-06, "loss": 0.4240570068359375, "step": 29650 }, { "epoch": 0.2564180162730975, "grad_norm": 17.341889796344606, "learning_rate": 5.759923997535517e-06, "loss": 0.27141571044921875, "step": 29655 }, { "epoch": 0.2564612497946408, "grad_norm": 0.7967160436139313, "learning_rate": 5.759844132286544e-06, "loss": 0.195050048828125, "step": 29660 }, { "epoch": 0.25650448331618403, "grad_norm": 3.1856175997789586, "learning_rate": 5.759764254309415e-06, "loss": 0.0362457275390625, "step": 29665 }, { "epoch": 0.2565477168377273, "grad_norm": 34.042843924022506, "learning_rate": 5.7596843636044955e-06, "loss": 0.17216949462890624, "step": 29670 }, { "epoch": 0.25659095035927054, "grad_norm": 16.992235049475138, "learning_rate": 5.759604460172156e-06, "loss": 0.6001754760742187, "step": 29675 }, { "epoch": 0.2566341838808138, "grad_norm": 15.685685055224264, "learning_rate": 5.7595245440127645e-06, "loss": 0.2537139892578125, "step": 29680 }, { "epoch": 0.2566774174023571, "grad_norm": 18.004416223027043, "learning_rate": 5.759444615126689e-06, "loss": 0.44864349365234374, "step": 29685 }, { "epoch": 0.25672065092390034, "grad_norm": 8.85174499262916, "learning_rate": 5.7593646735143e-06, "loss": 0.6319244384765625, "step": 29690 }, { "epoch": 0.2567638844454436, "grad_norm": 47.593463467073605, "learning_rate": 5.7592847191759645e-06, "loss": 0.2611907958984375, "step": 29695 }, { "epoch": 0.2568071179669869, "grad_norm": 3.3016881529762188, "learning_rate": 5.759204752112052e-06, "loss": 0.34439697265625, "step": 29700 }, { "epoch": 0.25685035148853014, "grad_norm": 7.494875245832712, "learning_rate": 5.759124772322931e-06, "loss": 0.11764488220214844, "step": 29705 }, { "epoch": 0.2568935850100734, "grad_norm": 2.929402444034915, "learning_rate": 5.759044779808969e-06, "loss": 0.17158203125, "step": 29710 }, { "epoch": 0.25693681853161665, "grad_norm": 17.709943469174682, "learning_rate": 5.758964774570537e-06, "loss": 0.17371063232421874, "step": 29715 }, { "epoch": 0.25698005205315994, "grad_norm": 2.503473466842343, "learning_rate": 5.758884756608004e-06, "loss": 0.2917144775390625, "step": 29720 }, { "epoch": 0.2570232855747032, "grad_norm": 4.232182628331576, "learning_rate": 5.758804725921738e-06, "loss": 0.12505416870117186, "step": 29725 }, { "epoch": 0.25706651909624645, "grad_norm": 0.6707699117218283, "learning_rate": 5.7587246825121085e-06, "loss": 0.05842437744140625, "step": 29730 }, { "epoch": 0.25710975261778973, "grad_norm": 0.6568641949466163, "learning_rate": 5.758644626379484e-06, "loss": 0.10358734130859375, "step": 29735 }, { "epoch": 0.257152986139333, "grad_norm": 0.7561845096055224, "learning_rate": 5.758564557524234e-06, "loss": 0.16720428466796874, "step": 29740 }, { "epoch": 0.25719621966087625, "grad_norm": 0.31866293029578985, "learning_rate": 5.7584844759467284e-06, "loss": 0.04810333251953125, "step": 29745 }, { "epoch": 0.25723945318241953, "grad_norm": 0.9936663701445886, "learning_rate": 5.758404381647336e-06, "loss": 0.2222076416015625, "step": 29750 }, { "epoch": 0.25728268670396276, "grad_norm": 7.391274159117798, "learning_rate": 5.758324274626427e-06, "loss": 0.1760772705078125, "step": 29755 }, { "epoch": 0.25732592022550604, "grad_norm": 2.95980879958975, "learning_rate": 5.7582441548843685e-06, "loss": 0.14092597961425782, "step": 29760 }, { "epoch": 0.25736915374704933, "grad_norm": 22.55179908954351, "learning_rate": 5.758164022421533e-06, "loss": 0.07941741943359375, "step": 29765 }, { "epoch": 0.25741238726859256, "grad_norm": 4.35055657277351, "learning_rate": 5.758083877238289e-06, "loss": 0.10885772705078126, "step": 29770 }, { "epoch": 0.25745562079013584, "grad_norm": 440.95206326107893, "learning_rate": 5.758003719335005e-06, "loss": 0.22684326171875, "step": 29775 }, { "epoch": 0.2574988543116791, "grad_norm": 24.945921513708342, "learning_rate": 5.757923548712052e-06, "loss": 0.2293010711669922, "step": 29780 }, { "epoch": 0.25754208783322236, "grad_norm": 4.768396208742784, "learning_rate": 5.7578433653698e-06, "loss": 0.34122161865234374, "step": 29785 }, { "epoch": 0.25758532135476564, "grad_norm": 35.06110598063697, "learning_rate": 5.757763169308617e-06, "loss": 0.2393280029296875, "step": 29790 }, { "epoch": 0.25762855487630887, "grad_norm": 4.156979260469825, "learning_rate": 5.757682960528875e-06, "loss": 0.19118499755859375, "step": 29795 }, { "epoch": 0.25767178839785215, "grad_norm": 0.04264374048322781, "learning_rate": 5.757602739030942e-06, "loss": 0.27152385711669924, "step": 29800 }, { "epoch": 0.25771502191939544, "grad_norm": 5.855601421822506, "learning_rate": 5.7575225048151886e-06, "loss": 0.12941131591796876, "step": 29805 }, { "epoch": 0.25775825544093867, "grad_norm": 0.3865288134414569, "learning_rate": 5.757442257881986e-06, "loss": 0.3184173583984375, "step": 29810 }, { "epoch": 0.25780148896248195, "grad_norm": 1.0581834702060284, "learning_rate": 5.757361998231703e-06, "loss": 0.0970703125, "step": 29815 }, { "epoch": 0.25784472248402523, "grad_norm": 12.74501701821418, "learning_rate": 5.757281725864709e-06, "loss": 0.1403839111328125, "step": 29820 }, { "epoch": 0.25788795600556846, "grad_norm": 3.5104207992770555, "learning_rate": 5.757201440781377e-06, "loss": 0.03402786254882813, "step": 29825 }, { "epoch": 0.25793118952711175, "grad_norm": 13.45855846091519, "learning_rate": 5.757121142982074e-06, "loss": 0.11767501831054687, "step": 29830 }, { "epoch": 0.25797442304865503, "grad_norm": 0.7911338826184844, "learning_rate": 5.757040832467173e-06, "loss": 0.22488861083984374, "step": 29835 }, { "epoch": 0.25801765657019826, "grad_norm": 2.9591096928033185, "learning_rate": 5.756960509237043e-06, "loss": 0.378533935546875, "step": 29840 }, { "epoch": 0.25806089009174155, "grad_norm": 1.9043192649243936, "learning_rate": 5.756880173292055e-06, "loss": 0.1422821044921875, "step": 29845 }, { "epoch": 0.2581041236132848, "grad_norm": 46.94767003552795, "learning_rate": 5.756799824632579e-06, "loss": 0.186529541015625, "step": 29850 }, { "epoch": 0.25814735713482806, "grad_norm": 36.04220878155575, "learning_rate": 5.756719463258986e-06, "loss": 0.5102798461914062, "step": 29855 }, { "epoch": 0.25819059065637134, "grad_norm": 21.724416633674338, "learning_rate": 5.756639089171647e-06, "loss": 0.20252914428710939, "step": 29860 }, { "epoch": 0.2582338241779146, "grad_norm": 4.214967144373272, "learning_rate": 5.7565587023709305e-06, "loss": 0.12202377319335937, "step": 29865 }, { "epoch": 0.25827705769945786, "grad_norm": 4.826748426519226, "learning_rate": 5.756478302857209e-06, "loss": 0.339697265625, "step": 29870 }, { "epoch": 0.25832029122100114, "grad_norm": 24.9754864112719, "learning_rate": 5.756397890630854e-06, "loss": 0.0592315673828125, "step": 29875 }, { "epoch": 0.25836352474254437, "grad_norm": 0.38463931128030626, "learning_rate": 5.756317465692236e-06, "loss": 0.0347076416015625, "step": 29880 }, { "epoch": 0.25840675826408765, "grad_norm": 1.5691433821742808, "learning_rate": 5.756237028041724e-06, "loss": 0.14308948516845704, "step": 29885 }, { "epoch": 0.2584499917856309, "grad_norm": 10.853307162938158, "learning_rate": 5.756156577679692e-06, "loss": 0.21156883239746094, "step": 29890 }, { "epoch": 0.25849322530717417, "grad_norm": 2.9637036256543987, "learning_rate": 5.756076114606509e-06, "loss": 0.12814254760742189, "step": 29895 }, { "epoch": 0.25853645882871745, "grad_norm": 0.6505591748405969, "learning_rate": 5.755995638822545e-06, "loss": 0.1569915771484375, "step": 29900 }, { "epoch": 0.2585796923502607, "grad_norm": 2.293786606088865, "learning_rate": 5.755915150328174e-06, "loss": 0.09914703369140625, "step": 29905 }, { "epoch": 0.25862292587180397, "grad_norm": 9.818822767006699, "learning_rate": 5.755834649123765e-06, "loss": 0.13575057983398436, "step": 29910 }, { "epoch": 0.25866615939334725, "grad_norm": 1.295016276666932, "learning_rate": 5.755754135209691e-06, "loss": 0.13411502838134765, "step": 29915 }, { "epoch": 0.2587093929148905, "grad_norm": 0.34298728692224467, "learning_rate": 5.755673608586322e-06, "loss": 0.07199325561523437, "step": 29920 }, { "epoch": 0.25875262643643376, "grad_norm": 12.105976828874155, "learning_rate": 5.75559306925403e-06, "loss": 0.12635650634765624, "step": 29925 }, { "epoch": 0.258795859957977, "grad_norm": 15.58479720041487, "learning_rate": 5.755512517213186e-06, "loss": 0.2811004638671875, "step": 29930 }, { "epoch": 0.2588390934795203, "grad_norm": 0.564216703497639, "learning_rate": 5.755431952464162e-06, "loss": 0.3648223876953125, "step": 29935 }, { "epoch": 0.25888232700106356, "grad_norm": 15.174255431842207, "learning_rate": 5.755351375007328e-06, "loss": 0.35789337158203127, "step": 29940 }, { "epoch": 0.2589255605226068, "grad_norm": 1.2757619870969914, "learning_rate": 5.755270784843059e-06, "loss": 0.23293991088867189, "step": 29945 }, { "epoch": 0.2589687940441501, "grad_norm": 5.43700831019197, "learning_rate": 5.755190181971723e-06, "loss": 0.18602447509765624, "step": 29950 }, { "epoch": 0.25901202756569336, "grad_norm": 3.5876465537645674, "learning_rate": 5.7551095663936945e-06, "loss": 0.19518890380859374, "step": 29955 }, { "epoch": 0.2590552610872366, "grad_norm": 0.5651620226746399, "learning_rate": 5.755028938109343e-06, "loss": 0.11735076904296875, "step": 29960 }, { "epoch": 0.2590984946087799, "grad_norm": 7.191108366094181, "learning_rate": 5.754948297119043e-06, "loss": 0.137335205078125, "step": 29965 }, { "epoch": 0.2591417281303231, "grad_norm": 8.116152705652151, "learning_rate": 5.7548676434231635e-06, "loss": 0.132867431640625, "step": 29970 }, { "epoch": 0.2591849616518664, "grad_norm": 16.984779332235973, "learning_rate": 5.754786977022078e-06, "loss": 0.34679412841796875, "step": 29975 }, { "epoch": 0.25922819517340967, "grad_norm": 5.323848770010101, "learning_rate": 5.754706297916159e-06, "loss": 0.08420486450195312, "step": 29980 }, { "epoch": 0.2592714286949529, "grad_norm": 14.676252800026582, "learning_rate": 5.754625606105777e-06, "loss": 0.3102748870849609, "step": 29985 }, { "epoch": 0.2593146622164962, "grad_norm": 42.89511865182231, "learning_rate": 5.754544901591306e-06, "loss": 0.3118408203125, "step": 29990 }, { "epoch": 0.25935789573803947, "grad_norm": 3.4008490967988587, "learning_rate": 5.754464184373117e-06, "loss": 0.07225875854492188, "step": 29995 }, { "epoch": 0.2594011292595827, "grad_norm": 7.306246873794658, "learning_rate": 5.7543834544515836e-06, "loss": 0.037923431396484374, "step": 30000 }, { "epoch": 0.259444362781126, "grad_norm": 2.604598888052297, "learning_rate": 5.754302711827076e-06, "loss": 0.269073486328125, "step": 30005 }, { "epoch": 0.2594875963026692, "grad_norm": 7.531879259862805, "learning_rate": 5.7542219564999675e-06, "loss": 0.12037353515625, "step": 30010 }, { "epoch": 0.2595308298242125, "grad_norm": 4.372346148131458, "learning_rate": 5.754141188470631e-06, "loss": 0.07664642333984376, "step": 30015 }, { "epoch": 0.2595740633457558, "grad_norm": 8.015684957441726, "learning_rate": 5.754060407739439e-06, "loss": 0.1669097900390625, "step": 30020 }, { "epoch": 0.259617296867299, "grad_norm": 0.4244771195273673, "learning_rate": 5.753979614306763e-06, "loss": 0.034641265869140625, "step": 30025 }, { "epoch": 0.2596605303888423, "grad_norm": 9.776400525780574, "learning_rate": 5.753898808172977e-06, "loss": 0.141448974609375, "step": 30030 }, { "epoch": 0.2597037639103856, "grad_norm": 9.532579907003075, "learning_rate": 5.753817989338453e-06, "loss": 0.1674589157104492, "step": 30035 }, { "epoch": 0.2597469974319288, "grad_norm": 1.893011781503566, "learning_rate": 5.753737157803564e-06, "loss": 0.141552734375, "step": 30040 }, { "epoch": 0.2597902309534721, "grad_norm": 31.527492307665984, "learning_rate": 5.753656313568682e-06, "loss": 0.16613998413085937, "step": 30045 }, { "epoch": 0.2598334644750154, "grad_norm": 42.8793140249001, "learning_rate": 5.7535754566341805e-06, "loss": 0.28173828125, "step": 30050 }, { "epoch": 0.2598766979965586, "grad_norm": 16.791385014128885, "learning_rate": 5.753494587000432e-06, "loss": 0.23429412841796876, "step": 30055 }, { "epoch": 0.2599199315181019, "grad_norm": 10.59698448230264, "learning_rate": 5.7534137046678105e-06, "loss": 0.09347457885742187, "step": 30060 }, { "epoch": 0.2599631650396451, "grad_norm": 0.9207298255379748, "learning_rate": 5.753332809636688e-06, "loss": 0.4300956726074219, "step": 30065 }, { "epoch": 0.2600063985611884, "grad_norm": 3.8170526853301165, "learning_rate": 5.753251901907438e-06, "loss": 0.1107757568359375, "step": 30070 }, { "epoch": 0.2600496320827317, "grad_norm": 2.4903758347350236, "learning_rate": 5.753170981480433e-06, "loss": 0.179248046875, "step": 30075 }, { "epoch": 0.2600928656042749, "grad_norm": 1.3076905584855576, "learning_rate": 5.753090048356047e-06, "loss": 0.28118133544921875, "step": 30080 }, { "epoch": 0.2601360991258182, "grad_norm": 15.510709555678384, "learning_rate": 5.753009102534653e-06, "loss": 0.59049072265625, "step": 30085 }, { "epoch": 0.2601793326473615, "grad_norm": 4.162287320593462, "learning_rate": 5.752928144016624e-06, "loss": 0.1039215087890625, "step": 30090 }, { "epoch": 0.2602225661689047, "grad_norm": 1.349939578975803, "learning_rate": 5.752847172802335e-06, "loss": 0.071954345703125, "step": 30095 }, { "epoch": 0.260265799690448, "grad_norm": 5.336238024103412, "learning_rate": 5.752766188892156e-06, "loss": 0.1474365234375, "step": 30100 }, { "epoch": 0.2603090332119912, "grad_norm": 0.6628467216859765, "learning_rate": 5.752685192286464e-06, "loss": 0.23746490478515625, "step": 30105 }, { "epoch": 0.2603522667335345, "grad_norm": 64.66951204590966, "learning_rate": 5.75260418298563e-06, "loss": 0.4545654296875, "step": 30110 }, { "epoch": 0.2603955002550778, "grad_norm": 11.660694925232553, "learning_rate": 5.75252316099003e-06, "loss": 0.09368896484375, "step": 30115 }, { "epoch": 0.260438733776621, "grad_norm": 10.71945676786599, "learning_rate": 5.752442126300036e-06, "loss": 0.053957366943359376, "step": 30120 }, { "epoch": 0.2604819672981643, "grad_norm": 15.57369351388551, "learning_rate": 5.752361078916022e-06, "loss": 0.3252407073974609, "step": 30125 }, { "epoch": 0.2605252008197076, "grad_norm": 0.4619500480300233, "learning_rate": 5.752280018838361e-06, "loss": 0.14481620788574218, "step": 30130 }, { "epoch": 0.2605684343412508, "grad_norm": 12.63599673042512, "learning_rate": 5.752198946067429e-06, "loss": 0.264007568359375, "step": 30135 }, { "epoch": 0.2606116678627941, "grad_norm": 5.117505462942349, "learning_rate": 5.752117860603599e-06, "loss": 0.11352386474609374, "step": 30140 }, { "epoch": 0.26065490138433733, "grad_norm": 16.17568808582178, "learning_rate": 5.752036762447243e-06, "loss": 0.3080963134765625, "step": 30145 }, { "epoch": 0.2606981349058806, "grad_norm": 10.877658256796169, "learning_rate": 5.751955651598737e-06, "loss": 0.07359695434570312, "step": 30150 }, { "epoch": 0.2607413684274239, "grad_norm": 8.238231974933043, "learning_rate": 5.751874528058456e-06, "loss": 0.20023193359375, "step": 30155 }, { "epoch": 0.26078460194896713, "grad_norm": 10.915544283282182, "learning_rate": 5.751793391826772e-06, "loss": 0.0909637451171875, "step": 30160 }, { "epoch": 0.2608278354705104, "grad_norm": 1.2466819609826143, "learning_rate": 5.75171224290406e-06, "loss": 0.02739410400390625, "step": 30165 }, { "epoch": 0.2608710689920537, "grad_norm": 12.434476570719442, "learning_rate": 5.751631081290694e-06, "loss": 0.1626729965209961, "step": 30170 }, { "epoch": 0.26091430251359693, "grad_norm": 14.575077660545348, "learning_rate": 5.7515499069870495e-06, "loss": 0.17551422119140625, "step": 30175 }, { "epoch": 0.2609575360351402, "grad_norm": 15.602068375428177, "learning_rate": 5.751468719993499e-06, "loss": 0.1321990966796875, "step": 30180 }, { "epoch": 0.26100076955668344, "grad_norm": 4.583573638270261, "learning_rate": 5.751387520310418e-06, "loss": 0.299945068359375, "step": 30185 }, { "epoch": 0.2610440030782267, "grad_norm": 8.93537169366545, "learning_rate": 5.751306307938182e-06, "loss": 0.4007545471191406, "step": 30190 }, { "epoch": 0.26108723659977, "grad_norm": 13.819521832277394, "learning_rate": 5.751225082877163e-06, "loss": 0.1173095703125, "step": 30195 }, { "epoch": 0.26113047012131324, "grad_norm": 2.5247370509971816, "learning_rate": 5.751143845127738e-06, "loss": 0.24638671875, "step": 30200 }, { "epoch": 0.2611737036428565, "grad_norm": 0.40179825229070604, "learning_rate": 5.75106259469028e-06, "loss": 0.34019775390625, "step": 30205 }, { "epoch": 0.2612169371643998, "grad_norm": 2.310487644405855, "learning_rate": 5.750981331565164e-06, "loss": 0.2226593017578125, "step": 30210 }, { "epoch": 0.26126017068594304, "grad_norm": 6.086945667817048, "learning_rate": 5.750900055752767e-06, "loss": 0.0350189208984375, "step": 30215 }, { "epoch": 0.2613034042074863, "grad_norm": 0.6082174677585074, "learning_rate": 5.75081876725346e-06, "loss": 0.03491363525390625, "step": 30220 }, { "epoch": 0.2613466377290296, "grad_norm": 23.387868078612588, "learning_rate": 5.75073746606762e-06, "loss": 0.11654052734375, "step": 30225 }, { "epoch": 0.26138987125057284, "grad_norm": 20.25148172826352, "learning_rate": 5.750656152195624e-06, "loss": 0.31194343566894533, "step": 30230 }, { "epoch": 0.2614331047721161, "grad_norm": 4.067066671728714, "learning_rate": 5.750574825637843e-06, "loss": 0.0768707275390625, "step": 30235 }, { "epoch": 0.26147633829365935, "grad_norm": 41.88678431704598, "learning_rate": 5.750493486394654e-06, "loss": 0.46420440673828123, "step": 30240 }, { "epoch": 0.26151957181520263, "grad_norm": 6.497219730959117, "learning_rate": 5.750412134466433e-06, "loss": 0.0747802734375, "step": 30245 }, { "epoch": 0.2615628053367459, "grad_norm": 5.418550542757546, "learning_rate": 5.750330769853554e-06, "loss": 0.23946380615234375, "step": 30250 }, { "epoch": 0.26160603885828915, "grad_norm": 24.732654309698226, "learning_rate": 5.750249392556392e-06, "loss": 0.147198486328125, "step": 30255 }, { "epoch": 0.26164927237983243, "grad_norm": 37.687674273995896, "learning_rate": 5.750168002575323e-06, "loss": 0.1994384765625, "step": 30260 }, { "epoch": 0.2616925059013757, "grad_norm": 37.785859509248695, "learning_rate": 5.750086599910723e-06, "loss": 0.40816650390625, "step": 30265 }, { "epoch": 0.26173573942291894, "grad_norm": 41.54675032806076, "learning_rate": 5.750005184562966e-06, "loss": 0.5261890411376953, "step": 30270 }, { "epoch": 0.26177897294446223, "grad_norm": 13.774854476510095, "learning_rate": 5.749923756532428e-06, "loss": 0.148773193359375, "step": 30275 }, { "epoch": 0.26182220646600546, "grad_norm": 13.488060472969469, "learning_rate": 5.749842315819484e-06, "loss": 0.133563232421875, "step": 30280 }, { "epoch": 0.26186543998754874, "grad_norm": 27.713013597390507, "learning_rate": 5.749760862424511e-06, "loss": 0.2439422607421875, "step": 30285 }, { "epoch": 0.261908673509092, "grad_norm": 1.249172159349194, "learning_rate": 5.749679396347884e-06, "loss": 0.080755615234375, "step": 30290 }, { "epoch": 0.26195190703063526, "grad_norm": 2.6207083158364863, "learning_rate": 5.749597917589978e-06, "loss": 0.15970916748046876, "step": 30295 }, { "epoch": 0.26199514055217854, "grad_norm": 1.944292722817126, "learning_rate": 5.749516426151171e-06, "loss": 0.10115966796875, "step": 30300 }, { "epoch": 0.2620383740737218, "grad_norm": 47.91258500950611, "learning_rate": 5.749434922031835e-06, "loss": 0.3956329345703125, "step": 30305 }, { "epoch": 0.26208160759526505, "grad_norm": 16.554486090231077, "learning_rate": 5.7493534052323486e-06, "loss": 0.392877197265625, "step": 30310 }, { "epoch": 0.26212484111680834, "grad_norm": 3.8874737968844424, "learning_rate": 5.7492718757530876e-06, "loss": 0.1005126953125, "step": 30315 }, { "epoch": 0.26216807463835157, "grad_norm": 45.9622624072379, "learning_rate": 5.749190333594428e-06, "loss": 0.4865833282470703, "step": 30320 }, { "epoch": 0.26221130815989485, "grad_norm": 3.6532382272500032, "learning_rate": 5.749108778756744e-06, "loss": 0.1821807861328125, "step": 30325 }, { "epoch": 0.26225454168143814, "grad_norm": 25.009158683386808, "learning_rate": 5.749027211240415e-06, "loss": 0.3415245056152344, "step": 30330 }, { "epoch": 0.26229777520298136, "grad_norm": 29.73681089281918, "learning_rate": 5.748945631045815e-06, "loss": 0.1181610107421875, "step": 30335 }, { "epoch": 0.26234100872452465, "grad_norm": 22.760920138743252, "learning_rate": 5.74886403817332e-06, "loss": 0.09289970397949218, "step": 30340 }, { "epoch": 0.26238424224606793, "grad_norm": 9.747508588809438, "learning_rate": 5.7487824326233075e-06, "loss": 0.0868408203125, "step": 30345 }, { "epoch": 0.26242747576761116, "grad_norm": 2.606255664539968, "learning_rate": 5.748700814396153e-06, "loss": 0.09660873413085938, "step": 30350 }, { "epoch": 0.26247070928915445, "grad_norm": 1.0565337363920615, "learning_rate": 5.748619183492234e-06, "loss": 0.10028076171875, "step": 30355 }, { "epoch": 0.2625139428106977, "grad_norm": 2.1035599333296955, "learning_rate": 5.748537539911926e-06, "loss": 0.1082000732421875, "step": 30360 }, { "epoch": 0.26255717633224096, "grad_norm": 1.4930529001688806, "learning_rate": 5.748455883655605e-06, "loss": 0.10718154907226562, "step": 30365 }, { "epoch": 0.26260040985378424, "grad_norm": 10.524297374069578, "learning_rate": 5.748374214723649e-06, "loss": 0.1332550048828125, "step": 30370 }, { "epoch": 0.2626436433753275, "grad_norm": 1.9438452900774768, "learning_rate": 5.748292533116433e-06, "loss": 0.092584228515625, "step": 30375 }, { "epoch": 0.26268687689687076, "grad_norm": 18.996381448961465, "learning_rate": 5.748210838834335e-06, "loss": 0.2692291259765625, "step": 30380 }, { "epoch": 0.26273011041841404, "grad_norm": 11.24856008937148, "learning_rate": 5.748129131877732e-06, "loss": 0.1716583251953125, "step": 30385 }, { "epoch": 0.26277334393995727, "grad_norm": 42.77557694419618, "learning_rate": 5.748047412247e-06, "loss": 0.603765869140625, "step": 30390 }, { "epoch": 0.26281657746150056, "grad_norm": 24.611810265466712, "learning_rate": 5.747965679942516e-06, "loss": 0.108160400390625, "step": 30395 }, { "epoch": 0.26285981098304384, "grad_norm": 3.9383168788487555, "learning_rate": 5.747883934964657e-06, "loss": 0.5453475952148438, "step": 30400 }, { "epoch": 0.26290304450458707, "grad_norm": 43.15156095121709, "learning_rate": 5.747802177313801e-06, "loss": 0.26343536376953125, "step": 30405 }, { "epoch": 0.26294627802613035, "grad_norm": 6.656897898815735, "learning_rate": 5.7477204069903236e-06, "loss": 0.30601348876953127, "step": 30410 }, { "epoch": 0.2629895115476736, "grad_norm": 0.9855673572538496, "learning_rate": 5.747638623994603e-06, "loss": 0.261065673828125, "step": 30415 }, { "epoch": 0.26303274506921687, "grad_norm": 16.44580153249226, "learning_rate": 5.747556828327014e-06, "loss": 0.1736480712890625, "step": 30420 }, { "epoch": 0.26307597859076015, "grad_norm": 15.092101661615954, "learning_rate": 5.7474750199879374e-06, "loss": 0.1785003662109375, "step": 30425 }, { "epoch": 0.2631192121123034, "grad_norm": 2.446725512801093, "learning_rate": 5.747393198977748e-06, "loss": 0.23663330078125, "step": 30430 }, { "epoch": 0.26316244563384666, "grad_norm": 17.170692456827044, "learning_rate": 5.7473113652968245e-06, "loss": 0.20866928100585938, "step": 30435 }, { "epoch": 0.26320567915538995, "grad_norm": 27.651522458407044, "learning_rate": 5.747229518945544e-06, "loss": 0.20612945556640624, "step": 30440 }, { "epoch": 0.2632489126769332, "grad_norm": 23.45208133220222, "learning_rate": 5.747147659924282e-06, "loss": 0.2093994140625, "step": 30445 }, { "epoch": 0.26329214619847646, "grad_norm": 1.0230788338128973, "learning_rate": 5.74706578823342e-06, "loss": 0.23432998657226561, "step": 30450 }, { "epoch": 0.2633353797200197, "grad_norm": 5.320185016314561, "learning_rate": 5.746983903873332e-06, "loss": 0.1809967041015625, "step": 30455 }, { "epoch": 0.263378613241563, "grad_norm": 2.273459739719861, "learning_rate": 5.746902006844398e-06, "loss": 0.06806182861328125, "step": 30460 }, { "epoch": 0.26342184676310626, "grad_norm": 0.9621786632349815, "learning_rate": 5.746820097146993e-06, "loss": 0.2089202880859375, "step": 30465 }, { "epoch": 0.2634650802846495, "grad_norm": 28.75689338631207, "learning_rate": 5.746738174781498e-06, "loss": 0.2684165954589844, "step": 30470 }, { "epoch": 0.2635083138061928, "grad_norm": 0.5569859652188214, "learning_rate": 5.746656239748288e-06, "loss": 0.168695068359375, "step": 30475 }, { "epoch": 0.26355154732773606, "grad_norm": 0.33808411482234535, "learning_rate": 5.746574292047743e-06, "loss": 0.2586475372314453, "step": 30480 }, { "epoch": 0.2635947808492793, "grad_norm": 0.13885330574858556, "learning_rate": 5.74649233168024e-06, "loss": 0.15664749145507811, "step": 30485 }, { "epoch": 0.26363801437082257, "grad_norm": 3.46773651357115, "learning_rate": 5.746410358646156e-06, "loss": 0.28536300659179686, "step": 30490 }, { "epoch": 0.2636812478923658, "grad_norm": 5.253408429797069, "learning_rate": 5.746328372945871e-06, "loss": 0.230230712890625, "step": 30495 }, { "epoch": 0.2637244814139091, "grad_norm": 0.6908658219204618, "learning_rate": 5.746246374579762e-06, "loss": 0.4334381103515625, "step": 30500 }, { "epoch": 0.26376771493545237, "grad_norm": 38.61022892549641, "learning_rate": 5.746164363548208e-06, "loss": 0.3236968994140625, "step": 30505 }, { "epoch": 0.2638109484569956, "grad_norm": 31.681505188915498, "learning_rate": 5.746082339851586e-06, "loss": 0.3047882080078125, "step": 30510 }, { "epoch": 0.2638541819785389, "grad_norm": 1.5770727311750719, "learning_rate": 5.746000303490274e-06, "loss": 0.13722381591796876, "step": 30515 }, { "epoch": 0.26389741550008217, "grad_norm": 2.7027372959463274, "learning_rate": 5.7459182544646525e-06, "loss": 0.023770904541015624, "step": 30520 }, { "epoch": 0.2639406490216254, "grad_norm": 47.13013987460967, "learning_rate": 5.745836192775099e-06, "loss": 0.6732421875, "step": 30525 }, { "epoch": 0.2639838825431687, "grad_norm": 58.190990717898764, "learning_rate": 5.745754118421991e-06, "loss": 0.23560791015625, "step": 30530 }, { "epoch": 0.2640271160647119, "grad_norm": 39.46398876633675, "learning_rate": 5.745672031405707e-06, "loss": 0.28219451904296877, "step": 30535 }, { "epoch": 0.2640703495862552, "grad_norm": 4.5163729840257965, "learning_rate": 5.745589931726627e-06, "loss": 0.1858642578125, "step": 30540 }, { "epoch": 0.2641135831077985, "grad_norm": 5.430492210215022, "learning_rate": 5.745507819385128e-06, "loss": 0.1961883544921875, "step": 30545 }, { "epoch": 0.2641568166293417, "grad_norm": 4.754952566805617, "learning_rate": 5.74542569438159e-06, "loss": 0.08917884826660157, "step": 30550 }, { "epoch": 0.264200050150885, "grad_norm": 0.23059919954144387, "learning_rate": 5.7453435567163916e-06, "loss": 0.39433135986328127, "step": 30555 }, { "epoch": 0.2642432836724283, "grad_norm": 5.171491911956582, "learning_rate": 5.745261406389912e-06, "loss": 0.199981689453125, "step": 30560 }, { "epoch": 0.2642865171939715, "grad_norm": 19.755929742861955, "learning_rate": 5.7451792434025285e-06, "loss": 0.4105224609375, "step": 30565 }, { "epoch": 0.2643297507155148, "grad_norm": 7.999416841807281, "learning_rate": 5.745097067754621e-06, "loss": 0.1909820556640625, "step": 30570 }, { "epoch": 0.2643729842370581, "grad_norm": 0.8623430920354125, "learning_rate": 5.745014879446569e-06, "loss": 0.3771816253662109, "step": 30575 }, { "epoch": 0.2644162177586013, "grad_norm": 7.9206390714045085, "learning_rate": 5.744932678478749e-06, "loss": 0.18575515747070312, "step": 30580 }, { "epoch": 0.2644594512801446, "grad_norm": 0.3890104124630792, "learning_rate": 5.744850464851543e-06, "loss": 0.27759780883789065, "step": 30585 }, { "epoch": 0.2645026848016878, "grad_norm": 124.70252115807136, "learning_rate": 5.74476823856533e-06, "loss": 0.10306472778320312, "step": 30590 }, { "epoch": 0.2645459183232311, "grad_norm": 10.67188516999093, "learning_rate": 5.744685999620488e-06, "loss": 0.4142578125, "step": 30595 }, { "epoch": 0.2645891518447744, "grad_norm": 3.9043848561469083, "learning_rate": 5.744603748017397e-06, "loss": 0.390594482421875, "step": 30600 }, { "epoch": 0.2646323853663176, "grad_norm": 1.4215273309864176, "learning_rate": 5.744521483756435e-06, "loss": 0.31139678955078126, "step": 30605 }, { "epoch": 0.2646756188878609, "grad_norm": 2.9215625544903423, "learning_rate": 5.744439206837983e-06, "loss": 0.0300872802734375, "step": 30610 }, { "epoch": 0.2647188524094042, "grad_norm": 23.07701908956356, "learning_rate": 5.74435691726242e-06, "loss": 0.18850860595703126, "step": 30615 }, { "epoch": 0.2647620859309474, "grad_norm": 24.899559243380693, "learning_rate": 5.744274615030126e-06, "loss": 0.15344696044921874, "step": 30620 }, { "epoch": 0.2648053194524907, "grad_norm": 8.617740719944987, "learning_rate": 5.744192300141479e-06, "loss": 0.09106330871582032, "step": 30625 }, { "epoch": 0.2648485529740339, "grad_norm": 0.3225655623374317, "learning_rate": 5.74410997259686e-06, "loss": 0.1894805908203125, "step": 30630 }, { "epoch": 0.2648917864955772, "grad_norm": 3.0686569504095838, "learning_rate": 5.744027632396648e-06, "loss": 0.0929443359375, "step": 30635 }, { "epoch": 0.2649350200171205, "grad_norm": 24.89242384071751, "learning_rate": 5.7439452795412225e-06, "loss": 0.140362548828125, "step": 30640 }, { "epoch": 0.2649782535386637, "grad_norm": 9.663746526385196, "learning_rate": 5.743862914030964e-06, "loss": 0.13154830932617187, "step": 30645 }, { "epoch": 0.265021487060207, "grad_norm": 10.782038265754899, "learning_rate": 5.743780535866254e-06, "loss": 0.4873291015625, "step": 30650 }, { "epoch": 0.2650647205817503, "grad_norm": 9.92815458418164, "learning_rate": 5.743698145047469e-06, "loss": 0.12787094116210937, "step": 30655 }, { "epoch": 0.2651079541032935, "grad_norm": 9.667435613583821, "learning_rate": 5.74361574157499e-06, "loss": 0.12224044799804687, "step": 30660 }, { "epoch": 0.2651511876248368, "grad_norm": 28.466029434618367, "learning_rate": 5.743533325449198e-06, "loss": 0.13540802001953126, "step": 30665 }, { "epoch": 0.26519442114638003, "grad_norm": 23.524910909522966, "learning_rate": 5.743450896670475e-06, "loss": 0.23020095825195314, "step": 30670 }, { "epoch": 0.2652376546679233, "grad_norm": 25.9035243328799, "learning_rate": 5.7433684552391966e-06, "loss": 0.128173828125, "step": 30675 }, { "epoch": 0.2652808881894666, "grad_norm": 1.9094955001931577, "learning_rate": 5.743286001155746e-06, "loss": 0.25857696533203123, "step": 30680 }, { "epoch": 0.26532412171100983, "grad_norm": 9.69419950267812, "learning_rate": 5.743203534420502e-06, "loss": 0.41520843505859373, "step": 30685 }, { "epoch": 0.2653673552325531, "grad_norm": 13.071978244788607, "learning_rate": 5.7431210550338465e-06, "loss": 0.08757476806640625, "step": 30690 }, { "epoch": 0.2654105887540964, "grad_norm": 23.03675347989723, "learning_rate": 5.743038562996159e-06, "loss": 0.08588714599609375, "step": 30695 }, { "epoch": 0.26545382227563963, "grad_norm": 6.639053870456424, "learning_rate": 5.74295605830782e-06, "loss": 0.21708831787109376, "step": 30700 }, { "epoch": 0.2654970557971829, "grad_norm": 3.768602709936817, "learning_rate": 5.74287354096921e-06, "loss": 0.17144393920898438, "step": 30705 }, { "epoch": 0.26554028931872614, "grad_norm": 1.9269190205142104, "learning_rate": 5.742791010980709e-06, "loss": 0.09385528564453124, "step": 30710 }, { "epoch": 0.2655835228402694, "grad_norm": 0.36387897883676473, "learning_rate": 5.742708468342699e-06, "loss": 0.15714263916015625, "step": 30715 }, { "epoch": 0.2656267563618127, "grad_norm": 16.717251537784442, "learning_rate": 5.742625913055559e-06, "loss": 0.2172119140625, "step": 30720 }, { "epoch": 0.26566998988335594, "grad_norm": 4.291401489174598, "learning_rate": 5.742543345119672e-06, "loss": 0.1885162353515625, "step": 30725 }, { "epoch": 0.2657132234048992, "grad_norm": 29.12736797785495, "learning_rate": 5.7424607645354155e-06, "loss": 0.10394973754882812, "step": 30730 }, { "epoch": 0.2657564569264425, "grad_norm": 1.2463907758497976, "learning_rate": 5.742378171303174e-06, "loss": 0.03209228515625, "step": 30735 }, { "epoch": 0.26579969044798574, "grad_norm": 0.3173856986527362, "learning_rate": 5.742295565423327e-06, "loss": 0.27222747802734376, "step": 30740 }, { "epoch": 0.265842923969529, "grad_norm": 0.17017445727614436, "learning_rate": 5.742212946896254e-06, "loss": 0.23506622314453124, "step": 30745 }, { "epoch": 0.26588615749107225, "grad_norm": 5.664732570833638, "learning_rate": 5.742130315722337e-06, "loss": 0.0276702880859375, "step": 30750 }, { "epoch": 0.26592939101261553, "grad_norm": 3.3331572331101915, "learning_rate": 5.742047671901959e-06, "loss": 0.2069549560546875, "step": 30755 }, { "epoch": 0.2659726245341588, "grad_norm": 13.307528768430622, "learning_rate": 5.741965015435497e-06, "loss": 0.0787750244140625, "step": 30760 }, { "epoch": 0.26601585805570205, "grad_norm": 4.8609057014864545, "learning_rate": 5.741882346323336e-06, "loss": 0.03371124267578125, "step": 30765 }, { "epoch": 0.26605909157724533, "grad_norm": 4.353890493208695, "learning_rate": 5.741799664565856e-06, "loss": 0.07994537353515625, "step": 30770 }, { "epoch": 0.2661023250987886, "grad_norm": 9.013848998000746, "learning_rate": 5.7417169701634375e-06, "loss": 0.15694656372070312, "step": 30775 }, { "epoch": 0.26614555862033185, "grad_norm": 0.3663921328596339, "learning_rate": 5.741634263116463e-06, "loss": 0.02852935791015625, "step": 30780 }, { "epoch": 0.26618879214187513, "grad_norm": 0.19502662972034046, "learning_rate": 5.741551543425314e-06, "loss": 0.22038803100585938, "step": 30785 }, { "epoch": 0.2662320256634184, "grad_norm": 5.0917826587789525, "learning_rate": 5.741468811090371e-06, "loss": 0.17068252563476563, "step": 30790 }, { "epoch": 0.26627525918496164, "grad_norm": 2.5984439502732926, "learning_rate": 5.741386066112016e-06, "loss": 0.0907135009765625, "step": 30795 }, { "epoch": 0.26631849270650493, "grad_norm": 1.6333033603717815, "learning_rate": 5.74130330849063e-06, "loss": 0.3612030029296875, "step": 30800 }, { "epoch": 0.26636172622804816, "grad_norm": 2.5794961565468135, "learning_rate": 5.741220538226597e-06, "loss": 0.14111328125, "step": 30805 }, { "epoch": 0.26640495974959144, "grad_norm": 0.4890045115727388, "learning_rate": 5.741137755320297e-06, "loss": 0.16052780151367188, "step": 30810 }, { "epoch": 0.2664481932711347, "grad_norm": 22.135372170953325, "learning_rate": 5.741054959772112e-06, "loss": 0.263262939453125, "step": 30815 }, { "epoch": 0.26649142679267795, "grad_norm": 0.11384241746351914, "learning_rate": 5.7409721515824225e-06, "loss": 0.09681758880615235, "step": 30820 }, { "epoch": 0.26653466031422124, "grad_norm": 0.605759931208478, "learning_rate": 5.740889330751611e-06, "loss": 0.13554115295410157, "step": 30825 }, { "epoch": 0.2665778938357645, "grad_norm": 5.429069242757235, "learning_rate": 5.740806497280062e-06, "loss": 0.076470947265625, "step": 30830 }, { "epoch": 0.26662112735730775, "grad_norm": 56.896756092320224, "learning_rate": 5.740723651168155e-06, "loss": 0.5082305908203125, "step": 30835 }, { "epoch": 0.26666436087885104, "grad_norm": 17.10800582781601, "learning_rate": 5.740640792416272e-06, "loss": 0.1675628662109375, "step": 30840 }, { "epoch": 0.26670759440039427, "grad_norm": 17.215088198943537, "learning_rate": 5.740557921024797e-06, "loss": 0.4441349029541016, "step": 30845 }, { "epoch": 0.26675082792193755, "grad_norm": 4.57655010620311, "learning_rate": 5.74047503699411e-06, "loss": 0.04629707336425781, "step": 30850 }, { "epoch": 0.26679406144348083, "grad_norm": 28.76842484366671, "learning_rate": 5.740392140324594e-06, "loss": 0.213140869140625, "step": 30855 }, { "epoch": 0.26683729496502406, "grad_norm": 5.166780051737187, "learning_rate": 5.740309231016632e-06, "loss": 0.18491668701171876, "step": 30860 }, { "epoch": 0.26688052848656735, "grad_norm": 26.860592433744127, "learning_rate": 5.740226309070607e-06, "loss": 0.229437255859375, "step": 30865 }, { "epoch": 0.26692376200811063, "grad_norm": 0.8404569630158436, "learning_rate": 5.7401433744868994e-06, "loss": 0.12336053848266601, "step": 30870 }, { "epoch": 0.26696699552965386, "grad_norm": 9.47004081348072, "learning_rate": 5.740060427265893e-06, "loss": 0.30980377197265624, "step": 30875 }, { "epoch": 0.26701022905119715, "grad_norm": 31.46912323302063, "learning_rate": 5.73997746740797e-06, "loss": 0.27079925537109373, "step": 30880 }, { "epoch": 0.2670534625727404, "grad_norm": 11.840839550328443, "learning_rate": 5.739894494913513e-06, "loss": 0.3347900390625, "step": 30885 }, { "epoch": 0.26709669609428366, "grad_norm": 42.21411922205161, "learning_rate": 5.739811509782904e-06, "loss": 0.1373077392578125, "step": 30890 }, { "epoch": 0.26713992961582694, "grad_norm": 30.83362938179092, "learning_rate": 5.739728512016528e-06, "loss": 0.11308441162109376, "step": 30895 }, { "epoch": 0.26718316313737017, "grad_norm": 10.872617510726762, "learning_rate": 5.7396455016147645e-06, "loss": 0.10272216796875, "step": 30900 }, { "epoch": 0.26722639665891346, "grad_norm": 5.5015055349516615, "learning_rate": 5.739562478577999e-06, "loss": 0.0803314208984375, "step": 30905 }, { "epoch": 0.26726963018045674, "grad_norm": 3.7096352546406766, "learning_rate": 5.739479442906613e-06, "loss": 0.25795974731445315, "step": 30910 }, { "epoch": 0.26731286370199997, "grad_norm": 22.430691323644684, "learning_rate": 5.73939639460099e-06, "loss": 0.4974273681640625, "step": 30915 }, { "epoch": 0.26735609722354325, "grad_norm": 3.3690376259466346, "learning_rate": 5.7393133336615135e-06, "loss": 0.05025148391723633, "step": 30920 }, { "epoch": 0.2673993307450865, "grad_norm": 7.858340053260505, "learning_rate": 5.739230260088565e-06, "loss": 0.1769989013671875, "step": 30925 }, { "epoch": 0.26744256426662977, "grad_norm": 31.5607534163857, "learning_rate": 5.7391471738825284e-06, "loss": 0.1851806640625, "step": 30930 }, { "epoch": 0.26748579778817305, "grad_norm": 2.6268581060584095, "learning_rate": 5.739064075043787e-06, "loss": 0.1010406494140625, "step": 30935 }, { "epoch": 0.2675290313097163, "grad_norm": 1.1198104123007715, "learning_rate": 5.7389809635727265e-06, "loss": 0.17273941040039062, "step": 30940 }, { "epoch": 0.26757226483125957, "grad_norm": 33.73530458436193, "learning_rate": 5.738897839469725e-06, "loss": 0.21693267822265624, "step": 30945 }, { "epoch": 0.26761549835280285, "grad_norm": 5.90693151943036, "learning_rate": 5.738814702735171e-06, "loss": 0.07642135620117188, "step": 30950 }, { "epoch": 0.2676587318743461, "grad_norm": 35.04417443179033, "learning_rate": 5.738731553369444e-06, "loss": 0.44452056884765623, "step": 30955 }, { "epoch": 0.26770196539588936, "grad_norm": 1.6599807933305506, "learning_rate": 5.7386483913729294e-06, "loss": 0.194244384765625, "step": 30960 }, { "epoch": 0.26774519891743265, "grad_norm": 20.172768560821673, "learning_rate": 5.7385652167460095e-06, "loss": 0.115350341796875, "step": 30965 }, { "epoch": 0.2677884324389759, "grad_norm": 6.165717986111432, "learning_rate": 5.73848202948907e-06, "loss": 0.1061056137084961, "step": 30970 }, { "epoch": 0.26783166596051916, "grad_norm": 18.60747344988736, "learning_rate": 5.7383988296024935e-06, "loss": 0.176812744140625, "step": 30975 }, { "epoch": 0.2678748994820624, "grad_norm": 50.72043212545709, "learning_rate": 5.7383156170866625e-06, "loss": 0.31130828857421877, "step": 30980 }, { "epoch": 0.2679181330036057, "grad_norm": 7.406804758936221, "learning_rate": 5.738232391941963e-06, "loss": 0.2771484375, "step": 30985 }, { "epoch": 0.26796136652514896, "grad_norm": 1.9562812386649961, "learning_rate": 5.738149154168777e-06, "loss": 0.0770721435546875, "step": 30990 }, { "epoch": 0.2680046000466922, "grad_norm": 20.38787313519327, "learning_rate": 5.73806590376749e-06, "loss": 0.09655609130859374, "step": 30995 }, { "epoch": 0.26804783356823547, "grad_norm": 11.723148115304985, "learning_rate": 5.737982640738484e-06, "loss": 0.29624481201171876, "step": 31000 }, { "epoch": 0.26809106708977876, "grad_norm": 27.571244377008266, "learning_rate": 5.7378993650821434e-06, "loss": 0.2317638397216797, "step": 31005 }, { "epoch": 0.268134300611322, "grad_norm": 11.262872630454044, "learning_rate": 5.7378160767988535e-06, "loss": 0.336358642578125, "step": 31010 }, { "epoch": 0.26817753413286527, "grad_norm": 15.763470409297408, "learning_rate": 5.737732775888998e-06, "loss": 0.136322021484375, "step": 31015 }, { "epoch": 0.2682207676544085, "grad_norm": 5.299599209607282, "learning_rate": 5.737649462352961e-06, "loss": 0.18907623291015624, "step": 31020 }, { "epoch": 0.2682640011759518, "grad_norm": 3.6293490929823453, "learning_rate": 5.737566136191127e-06, "loss": 0.15220718383789061, "step": 31025 }, { "epoch": 0.26830723469749507, "grad_norm": 1.1222383380135594, "learning_rate": 5.737482797403878e-06, "loss": 0.272308349609375, "step": 31030 }, { "epoch": 0.2683504682190383, "grad_norm": 35.858672605167364, "learning_rate": 5.737399445991602e-06, "loss": 0.168621826171875, "step": 31035 }, { "epoch": 0.2683937017405816, "grad_norm": 21.09529442049369, "learning_rate": 5.737316081954681e-06, "loss": 0.3181396484375, "step": 31040 }, { "epoch": 0.26843693526212486, "grad_norm": 17.96260898347085, "learning_rate": 5.7372327052935e-06, "loss": 0.22872543334960938, "step": 31045 }, { "epoch": 0.2684801687836681, "grad_norm": 35.38421709138963, "learning_rate": 5.737149316008444e-06, "loss": 0.16314697265625, "step": 31050 }, { "epoch": 0.2685234023052114, "grad_norm": 19.92854306704819, "learning_rate": 5.737065914099897e-06, "loss": 0.3243095397949219, "step": 31055 }, { "epoch": 0.2685666358267546, "grad_norm": 3.7554147565008873, "learning_rate": 5.736982499568244e-06, "loss": 0.11832809448242188, "step": 31060 }, { "epoch": 0.2686098693482979, "grad_norm": 7.729779755534693, "learning_rate": 5.736899072413869e-06, "loss": 0.35574188232421877, "step": 31065 }, { "epoch": 0.2686531028698412, "grad_norm": 3.8336459042585926, "learning_rate": 5.736815632637158e-06, "loss": 0.07878036499023437, "step": 31070 }, { "epoch": 0.2686963363913844, "grad_norm": 1.9441796016029733, "learning_rate": 5.736732180238495e-06, "loss": 0.3977783203125, "step": 31075 }, { "epoch": 0.2687395699129277, "grad_norm": 11.133457299194326, "learning_rate": 5.736648715218264e-06, "loss": 0.23771209716796876, "step": 31080 }, { "epoch": 0.268782803434471, "grad_norm": 26.33926897040628, "learning_rate": 5.7365652375768526e-06, "loss": 0.20130538940429688, "step": 31085 }, { "epoch": 0.2688260369560142, "grad_norm": 10.76317218442397, "learning_rate": 5.736481747314643e-06, "loss": 0.1687835693359375, "step": 31090 }, { "epoch": 0.2688692704775575, "grad_norm": 33.69948451314141, "learning_rate": 5.736398244432022e-06, "loss": 0.2576469421386719, "step": 31095 }, { "epoch": 0.2689125039991007, "grad_norm": 27.80098854543606, "learning_rate": 5.736314728929374e-06, "loss": 0.208489990234375, "step": 31100 }, { "epoch": 0.268955737520644, "grad_norm": 25.415319174199933, "learning_rate": 5.736231200807084e-06, "loss": 0.2372833251953125, "step": 31105 }, { "epoch": 0.2689989710421873, "grad_norm": 3.9279386018698044, "learning_rate": 5.736147660065537e-06, "loss": 0.3100341796875, "step": 31110 }, { "epoch": 0.2690422045637305, "grad_norm": 5.174843057521366, "learning_rate": 5.736064106705119e-06, "loss": 0.1075439453125, "step": 31115 }, { "epoch": 0.2690854380852738, "grad_norm": 21.595036077102108, "learning_rate": 5.7359805407262155e-06, "loss": 0.275164794921875, "step": 31120 }, { "epoch": 0.2691286716068171, "grad_norm": 19.46316067252262, "learning_rate": 5.7358969621292125e-06, "loss": 0.38109893798828126, "step": 31125 }, { "epoch": 0.2691719051283603, "grad_norm": 5.270362200570669, "learning_rate": 5.7358133709144926e-06, "loss": 0.14887237548828125, "step": 31130 }, { "epoch": 0.2692151386499036, "grad_norm": 0.7661794563001942, "learning_rate": 5.735729767082444e-06, "loss": 0.156353759765625, "step": 31135 }, { "epoch": 0.2692583721714469, "grad_norm": 20.063731351401454, "learning_rate": 5.7356461506334516e-06, "loss": 0.23224258422851562, "step": 31140 }, { "epoch": 0.2693016056929901, "grad_norm": 5.2402912663686285, "learning_rate": 5.7355625215679005e-06, "loss": 0.32866973876953126, "step": 31145 }, { "epoch": 0.2693448392145334, "grad_norm": 0.6962345481327783, "learning_rate": 5.7354788798861774e-06, "loss": 0.02962150573730469, "step": 31150 }, { "epoch": 0.2693880727360766, "grad_norm": 33.410727170381726, "learning_rate": 5.735395225588667e-06, "loss": 0.23961868286132812, "step": 31155 }, { "epoch": 0.2694313062576199, "grad_norm": 13.467575683512303, "learning_rate": 5.735311558675755e-06, "loss": 0.15928955078125, "step": 31160 }, { "epoch": 0.2694745397791632, "grad_norm": 24.410251618218737, "learning_rate": 5.735227879147828e-06, "loss": 0.2908319473266602, "step": 31165 }, { "epoch": 0.2695177733007064, "grad_norm": 27.675060670412694, "learning_rate": 5.735144187005273e-06, "loss": 0.2281513214111328, "step": 31170 }, { "epoch": 0.2695610068222497, "grad_norm": 0.9618461163143971, "learning_rate": 5.735060482248473e-06, "loss": 0.10220947265625, "step": 31175 }, { "epoch": 0.269604240343793, "grad_norm": 44.26298703134139, "learning_rate": 5.734976764877817e-06, "loss": 0.39098968505859377, "step": 31180 }, { "epoch": 0.2696474738653362, "grad_norm": 27.0399252764708, "learning_rate": 5.734893034893689e-06, "loss": 0.33927001953125, "step": 31185 }, { "epoch": 0.2696907073868795, "grad_norm": 7.513808075382055, "learning_rate": 5.734809292296476e-06, "loss": 0.1206451416015625, "step": 31190 }, { "epoch": 0.26973394090842273, "grad_norm": 21.965349405284346, "learning_rate": 5.734725537086564e-06, "loss": 0.402239990234375, "step": 31195 }, { "epoch": 0.269777174429966, "grad_norm": 16.143655052818648, "learning_rate": 5.73464176926434e-06, "loss": 0.07190170288085937, "step": 31200 }, { "epoch": 0.2698204079515093, "grad_norm": 19.819539886131967, "learning_rate": 5.7345579888301895e-06, "loss": 0.5303730010986328, "step": 31205 }, { "epoch": 0.26986364147305253, "grad_norm": 11.584867886118563, "learning_rate": 5.7344741957844995e-06, "loss": 0.11827163696289063, "step": 31210 }, { "epoch": 0.2699068749945958, "grad_norm": 4.974151152612101, "learning_rate": 5.734390390127655e-06, "loss": 0.25469818115234377, "step": 31215 }, { "epoch": 0.2699501085161391, "grad_norm": 1.2327598340620685, "learning_rate": 5.734306571860046e-06, "loss": 0.332525634765625, "step": 31220 }, { "epoch": 0.2699933420376823, "grad_norm": 6.158477858842794, "learning_rate": 5.734222740982055e-06, "loss": 0.2885444641113281, "step": 31225 }, { "epoch": 0.2700365755592256, "grad_norm": 13.617748347860053, "learning_rate": 5.734138897494071e-06, "loss": 0.2972412109375, "step": 31230 }, { "epoch": 0.27007980908076884, "grad_norm": 4.025818603733158, "learning_rate": 5.73405504139648e-06, "loss": 0.10365219116210937, "step": 31235 }, { "epoch": 0.2701230426023121, "grad_norm": 9.899230198469613, "learning_rate": 5.733971172689668e-06, "loss": 0.2979644775390625, "step": 31240 }, { "epoch": 0.2701662761238554, "grad_norm": 13.18447578152077, "learning_rate": 5.733887291374023e-06, "loss": 0.14419822692871093, "step": 31245 }, { "epoch": 0.27020950964539864, "grad_norm": 277.3846404291768, "learning_rate": 5.733803397449931e-06, "loss": 0.23106842041015624, "step": 31250 }, { "epoch": 0.2702527431669419, "grad_norm": 1.716084050248521, "learning_rate": 5.733719490917779e-06, "loss": 0.076519775390625, "step": 31255 }, { "epoch": 0.2702959766884852, "grad_norm": 0.4738150433749284, "learning_rate": 5.733635571777956e-06, "loss": 0.107720947265625, "step": 31260 }, { "epoch": 0.27033921021002844, "grad_norm": 29.321043620683476, "learning_rate": 5.733551640030846e-06, "loss": 0.3714630126953125, "step": 31265 }, { "epoch": 0.2703824437315717, "grad_norm": 11.046453336139102, "learning_rate": 5.7334676956768366e-06, "loss": 0.18145675659179689, "step": 31270 }, { "epoch": 0.27042567725311495, "grad_norm": 9.149498105187726, "learning_rate": 5.733383738716316e-06, "loss": 0.047528076171875, "step": 31275 }, { "epoch": 0.27046891077465823, "grad_norm": 1.8861379134061806, "learning_rate": 5.7332997691496726e-06, "loss": 0.1344736099243164, "step": 31280 }, { "epoch": 0.2705121442962015, "grad_norm": 3.2034016247660726, "learning_rate": 5.733215786977291e-06, "loss": 0.18874282836914064, "step": 31285 }, { "epoch": 0.27055537781774475, "grad_norm": 9.474545814032254, "learning_rate": 5.7331317921995605e-06, "loss": 0.117279052734375, "step": 31290 }, { "epoch": 0.27059861133928803, "grad_norm": 14.521184094600539, "learning_rate": 5.733047784816867e-06, "loss": 0.08510627746582031, "step": 31295 }, { "epoch": 0.2706418448608313, "grad_norm": 8.780398712794154, "learning_rate": 5.732963764829599e-06, "loss": 0.302349853515625, "step": 31300 }, { "epoch": 0.27068507838237454, "grad_norm": 0.622348363372006, "learning_rate": 5.732879732238143e-06, "loss": 0.03056182861328125, "step": 31305 }, { "epoch": 0.27072831190391783, "grad_norm": 11.162566621051534, "learning_rate": 5.7327956870428875e-06, "loss": 0.3021026611328125, "step": 31310 }, { "epoch": 0.2707715454254611, "grad_norm": 0.03112105790230428, "learning_rate": 5.73271162924422e-06, "loss": 0.04739952087402344, "step": 31315 }, { "epoch": 0.27081477894700434, "grad_norm": 22.690261690322732, "learning_rate": 5.732627558842529e-06, "loss": 0.19280548095703126, "step": 31320 }, { "epoch": 0.2708580124685476, "grad_norm": 0.3382470509196583, "learning_rate": 5.732543475838198e-06, "loss": 0.08912506103515624, "step": 31325 }, { "epoch": 0.27090124599009086, "grad_norm": 18.94350674762538, "learning_rate": 5.7324593802316205e-06, "loss": 0.7699005126953125, "step": 31330 }, { "epoch": 0.27094447951163414, "grad_norm": 4.267822790902178, "learning_rate": 5.732375272023181e-06, "loss": 0.052880477905273435, "step": 31335 }, { "epoch": 0.2709877130331774, "grad_norm": 36.082321843114784, "learning_rate": 5.732291151213268e-06, "loss": 0.15158767700195314, "step": 31340 }, { "epoch": 0.27103094655472065, "grad_norm": 1.4316045577120204, "learning_rate": 5.73220701780227e-06, "loss": 0.2023101806640625, "step": 31345 }, { "epoch": 0.27107418007626394, "grad_norm": 0.09575174477089153, "learning_rate": 5.732122871790575e-06, "loss": 0.16213760375976563, "step": 31350 }, { "epoch": 0.2711174135978072, "grad_norm": 40.269701537823174, "learning_rate": 5.73203871317857e-06, "loss": 0.19520721435546876, "step": 31355 }, { "epoch": 0.27116064711935045, "grad_norm": 6.853378488028208, "learning_rate": 5.731954541966645e-06, "loss": 0.4399538040161133, "step": 31360 }, { "epoch": 0.27120388064089374, "grad_norm": 4.633821031310853, "learning_rate": 5.731870358155186e-06, "loss": 0.13265228271484375, "step": 31365 }, { "epoch": 0.27124711416243696, "grad_norm": 10.766332503684227, "learning_rate": 5.731786161744583e-06, "loss": 0.3593841552734375, "step": 31370 }, { "epoch": 0.27129034768398025, "grad_norm": 1.5888686860680463, "learning_rate": 5.731701952735223e-06, "loss": 0.15896835327148437, "step": 31375 }, { "epoch": 0.27133358120552353, "grad_norm": 17.304424082883596, "learning_rate": 5.731617731127495e-06, "loss": 0.207855224609375, "step": 31380 }, { "epoch": 0.27137681472706676, "grad_norm": 7.149688374022452, "learning_rate": 5.7315334969217875e-06, "loss": 0.0521636962890625, "step": 31385 }, { "epoch": 0.27142004824861005, "grad_norm": 19.34160971904754, "learning_rate": 5.731449250118489e-06, "loss": 0.33407440185546877, "step": 31390 }, { "epoch": 0.27146328177015333, "grad_norm": 1.412003085807246, "learning_rate": 5.731364990717988e-06, "loss": 0.2329925537109375, "step": 31395 }, { "epoch": 0.27150651529169656, "grad_norm": 11.860909493688645, "learning_rate": 5.731280718720673e-06, "loss": 0.7028640747070313, "step": 31400 }, { "epoch": 0.27154974881323984, "grad_norm": 5.2146840837061585, "learning_rate": 5.731196434126932e-06, "loss": 0.07879791259765626, "step": 31405 }, { "epoch": 0.2715929823347831, "grad_norm": 1.0775782466297046, "learning_rate": 5.731112136937155e-06, "loss": 0.2397857666015625, "step": 31410 }, { "epoch": 0.27163621585632636, "grad_norm": 97.40330320264052, "learning_rate": 5.73102782715173e-06, "loss": 0.502911376953125, "step": 31415 }, { "epoch": 0.27167944937786964, "grad_norm": 1.8867162230926564, "learning_rate": 5.730943504771046e-06, "loss": 0.05935745239257813, "step": 31420 }, { "epoch": 0.27172268289941287, "grad_norm": 13.089177943500875, "learning_rate": 5.730859169795492e-06, "loss": 0.17148056030273437, "step": 31425 }, { "epoch": 0.27176591642095616, "grad_norm": 12.931989942375067, "learning_rate": 5.730774822225456e-06, "loss": 0.259344482421875, "step": 31430 }, { "epoch": 0.27180914994249944, "grad_norm": 18.25724274423517, "learning_rate": 5.730690462061329e-06, "loss": 0.10871162414550781, "step": 31435 }, { "epoch": 0.27185238346404267, "grad_norm": 7.0811179720156705, "learning_rate": 5.730606089303499e-06, "loss": 0.4454063415527344, "step": 31440 }, { "epoch": 0.27189561698558595, "grad_norm": 2.5096160015170494, "learning_rate": 5.730521703952354e-06, "loss": 0.20881195068359376, "step": 31445 }, { "epoch": 0.2719388505071292, "grad_norm": 10.132267587572926, "learning_rate": 5.730437306008285e-06, "loss": 0.3061199188232422, "step": 31450 }, { "epoch": 0.27198208402867247, "grad_norm": 38.97211340682371, "learning_rate": 5.730352895471679e-06, "loss": 0.304327392578125, "step": 31455 }, { "epoch": 0.27202531755021575, "grad_norm": 20.98947153989833, "learning_rate": 5.730268472342928e-06, "loss": 0.24736328125, "step": 31460 }, { "epoch": 0.272068551071759, "grad_norm": 9.32378255410406, "learning_rate": 5.730184036622419e-06, "loss": 0.25134124755859377, "step": 31465 }, { "epoch": 0.27211178459330226, "grad_norm": 24.823085666964847, "learning_rate": 5.730099588310544e-06, "loss": 0.1588653564453125, "step": 31470 }, { "epoch": 0.27215501811484555, "grad_norm": 0.9418839257050223, "learning_rate": 5.730015127407689e-06, "loss": 0.45565643310546877, "step": 31475 }, { "epoch": 0.2721982516363888, "grad_norm": 3.1920671165094756, "learning_rate": 5.729930653914248e-06, "loss": 0.201556396484375, "step": 31480 }, { "epoch": 0.27224148515793206, "grad_norm": 6.223185052473056, "learning_rate": 5.729846167830606e-06, "loss": 0.16552200317382812, "step": 31485 }, { "epoch": 0.2722847186794753, "grad_norm": 2.7716294261881966, "learning_rate": 5.7297616691571555e-06, "loss": 0.1025543212890625, "step": 31490 }, { "epoch": 0.2723279522010186, "grad_norm": 2.6350774542824893, "learning_rate": 5.729677157894285e-06, "loss": 0.12281494140625, "step": 31495 }, { "epoch": 0.27237118572256186, "grad_norm": 24.749316657441003, "learning_rate": 5.729592634042385e-06, "loss": 0.211859130859375, "step": 31500 }, { "epoch": 0.2724144192441051, "grad_norm": 8.99473346873071, "learning_rate": 5.729508097601845e-06, "loss": 0.269732666015625, "step": 31505 }, { "epoch": 0.2724576527656484, "grad_norm": 4.72301131504132, "learning_rate": 5.729423548573055e-06, "loss": 0.19531478881835937, "step": 31510 }, { "epoch": 0.27250088628719166, "grad_norm": 9.201809317664049, "learning_rate": 5.729338986956404e-06, "loss": 0.25788116455078125, "step": 31515 }, { "epoch": 0.2725441198087349, "grad_norm": 18.413491384821594, "learning_rate": 5.7292544127522836e-06, "loss": 0.29080810546875, "step": 31520 }, { "epoch": 0.27258735333027817, "grad_norm": 6.774687616215611, "learning_rate": 5.729169825961083e-06, "loss": 0.177313232421875, "step": 31525 }, { "epoch": 0.27263058685182145, "grad_norm": 33.48652472807826, "learning_rate": 5.729085226583191e-06, "loss": 0.14017333984375, "step": 31530 }, { "epoch": 0.2726738203733647, "grad_norm": 5.612971493951357, "learning_rate": 5.729000614619e-06, "loss": 0.2866302490234375, "step": 31535 }, { "epoch": 0.27271705389490797, "grad_norm": 12.803500612410845, "learning_rate": 5.7289159900688994e-06, "loss": 0.19721393585205077, "step": 31540 }, { "epoch": 0.2727602874164512, "grad_norm": 5.787647427139542, "learning_rate": 5.728831352933279e-06, "loss": 0.270220947265625, "step": 31545 }, { "epoch": 0.2728035209379945, "grad_norm": 4.089205738910445, "learning_rate": 5.72874670321253e-06, "loss": 0.11370849609375, "step": 31550 }, { "epoch": 0.27284675445953777, "grad_norm": 6.444297112781752, "learning_rate": 5.728662040907042e-06, "loss": 0.185992431640625, "step": 31555 }, { "epoch": 0.272889987981081, "grad_norm": 3.4393007663921975, "learning_rate": 5.728577366017206e-06, "loss": 0.3650360107421875, "step": 31560 }, { "epoch": 0.2729332215026243, "grad_norm": 40.42144566711978, "learning_rate": 5.7284926785434125e-06, "loss": 0.200927734375, "step": 31565 }, { "epoch": 0.27297645502416756, "grad_norm": 15.318673553496053, "learning_rate": 5.728407978486051e-06, "loss": 0.692425537109375, "step": 31570 }, { "epoch": 0.2730196885457108, "grad_norm": 6.636121953967052, "learning_rate": 5.728323265845513e-06, "loss": 0.1232391357421875, "step": 31575 }, { "epoch": 0.2730629220672541, "grad_norm": 2.3168580832554935, "learning_rate": 5.728238540622191e-06, "loss": 0.02389068603515625, "step": 31580 }, { "epoch": 0.2731061555887973, "grad_norm": 0.7478253674706687, "learning_rate": 5.728153802816472e-06, "loss": 0.09937591552734375, "step": 31585 }, { "epoch": 0.2731493891103406, "grad_norm": 31.11684255177956, "learning_rate": 5.728069052428749e-06, "loss": 0.238848876953125, "step": 31590 }, { "epoch": 0.2731926226318839, "grad_norm": 30.77864916378178, "learning_rate": 5.727984289459414e-06, "loss": 0.2076934814453125, "step": 31595 }, { "epoch": 0.2732358561534271, "grad_norm": 5.096084977242137, "learning_rate": 5.7278995139088546e-06, "loss": 0.08912353515625, "step": 31600 }, { "epoch": 0.2732790896749704, "grad_norm": 1.681848335336501, "learning_rate": 5.727814725777465e-06, "loss": 0.22795524597167968, "step": 31605 }, { "epoch": 0.2733223231965137, "grad_norm": 18.512797412762744, "learning_rate": 5.7277299250656335e-06, "loss": 0.2573333740234375, "step": 31610 }, { "epoch": 0.2733655567180569, "grad_norm": 33.78127496885005, "learning_rate": 5.727645111773754e-06, "loss": 0.267572021484375, "step": 31615 }, { "epoch": 0.2734087902396002, "grad_norm": 35.99257004381976, "learning_rate": 5.727560285902216e-06, "loss": 0.263580322265625, "step": 31620 }, { "epoch": 0.2734520237611434, "grad_norm": 15.099830646660505, "learning_rate": 5.72747544745141e-06, "loss": 0.32912139892578124, "step": 31625 }, { "epoch": 0.2734952572826867, "grad_norm": 1.7181505667111883, "learning_rate": 5.72739059642173e-06, "loss": 0.0990966796875, "step": 31630 }, { "epoch": 0.27353849080423, "grad_norm": 163.08458379995923, "learning_rate": 5.727305732813564e-06, "loss": 0.2575347900390625, "step": 31635 }, { "epoch": 0.2735817243257732, "grad_norm": 21.143220335633874, "learning_rate": 5.727220856627306e-06, "loss": 0.25069580078125, "step": 31640 }, { "epoch": 0.2736249578473165, "grad_norm": 14.301839484991103, "learning_rate": 5.7271359678633454e-06, "loss": 0.5784614562988282, "step": 31645 }, { "epoch": 0.2736681913688598, "grad_norm": 1.5796608016513387, "learning_rate": 5.727051066522076e-06, "loss": 0.0586090087890625, "step": 31650 }, { "epoch": 0.273711424890403, "grad_norm": 1.5646149116515489, "learning_rate": 5.726966152603887e-06, "loss": 0.0873382568359375, "step": 31655 }, { "epoch": 0.2737546584119463, "grad_norm": 4.035534128782784, "learning_rate": 5.726881226109172e-06, "loss": 0.309722900390625, "step": 31660 }, { "epoch": 0.2737978919334895, "grad_norm": 1.574186978001206, "learning_rate": 5.72679628703832e-06, "loss": 0.12066650390625, "step": 31665 }, { "epoch": 0.2738411254550328, "grad_norm": 11.165092335949769, "learning_rate": 5.7267113353917255e-06, "loss": 0.11803131103515625, "step": 31670 }, { "epoch": 0.2738843589765761, "grad_norm": 8.859064909363159, "learning_rate": 5.72662637116978e-06, "loss": 0.22994537353515626, "step": 31675 }, { "epoch": 0.2739275924981193, "grad_norm": 5.2315046041182995, "learning_rate": 5.726541394372873e-06, "loss": 0.20070724487304686, "step": 31680 }, { "epoch": 0.2739708260196626, "grad_norm": 18.34064259480861, "learning_rate": 5.7264564050014e-06, "loss": 0.18148651123046874, "step": 31685 }, { "epoch": 0.2740140595412059, "grad_norm": 9.484936188335471, "learning_rate": 5.72637140305575e-06, "loss": 0.13032569885253906, "step": 31690 }, { "epoch": 0.2740572930627491, "grad_norm": 0.7665133203035983, "learning_rate": 5.726286388536315e-06, "loss": 0.05547065734863281, "step": 31695 }, { "epoch": 0.2741005265842924, "grad_norm": 1.6042910173917861, "learning_rate": 5.72620136144349e-06, "loss": 0.202191162109375, "step": 31700 }, { "epoch": 0.2741437601058357, "grad_norm": 0.46872426599319134, "learning_rate": 5.726116321777663e-06, "loss": 0.05638427734375, "step": 31705 }, { "epoch": 0.2741869936273789, "grad_norm": 34.02602510288825, "learning_rate": 5.726031269539229e-06, "loss": 0.28038330078125, "step": 31710 }, { "epoch": 0.2742302271489222, "grad_norm": 0.3463602640333943, "learning_rate": 5.7259462047285805e-06, "loss": 0.11385927200317383, "step": 31715 }, { "epoch": 0.27427346067046543, "grad_norm": 12.3090876191201, "learning_rate": 5.725861127346109e-06, "loss": 0.36522979736328126, "step": 31720 }, { "epoch": 0.2743166941920087, "grad_norm": 14.597858775968598, "learning_rate": 5.725776037392207e-06, "loss": 0.1496734619140625, "step": 31725 }, { "epoch": 0.274359927713552, "grad_norm": 1.7453015846401856, "learning_rate": 5.725690934867266e-06, "loss": 0.113690185546875, "step": 31730 }, { "epoch": 0.2744031612350952, "grad_norm": 14.315306411877163, "learning_rate": 5.72560581977168e-06, "loss": 0.07223968505859375, "step": 31735 }, { "epoch": 0.2744463947566385, "grad_norm": 20.244391125205706, "learning_rate": 5.72552069210584e-06, "loss": 0.2447021484375, "step": 31740 }, { "epoch": 0.2744896282781818, "grad_norm": 50.71827117599829, "learning_rate": 5.725435551870139e-06, "loss": 0.18838119506835938, "step": 31745 }, { "epoch": 0.274532861799725, "grad_norm": 2.3329722031269378, "learning_rate": 5.725350399064971e-06, "loss": 0.06443023681640625, "step": 31750 }, { "epoch": 0.2745760953212683, "grad_norm": 1.605437322726801, "learning_rate": 5.725265233690728e-06, "loss": 0.3654975891113281, "step": 31755 }, { "epoch": 0.27461932884281154, "grad_norm": 3.200415367116863, "learning_rate": 5.725180055747802e-06, "loss": 0.2110626220703125, "step": 31760 }, { "epoch": 0.2746625623643548, "grad_norm": 1.6799593387330238, "learning_rate": 5.725094865236587e-06, "loss": 0.10357666015625, "step": 31765 }, { "epoch": 0.2747057958858981, "grad_norm": 32.43064856974728, "learning_rate": 5.725009662157474e-06, "loss": 0.32321014404296877, "step": 31770 }, { "epoch": 0.27474902940744134, "grad_norm": 2.5329708395202664, "learning_rate": 5.724924446510859e-06, "loss": 0.23895721435546874, "step": 31775 }, { "epoch": 0.2747922629289846, "grad_norm": 1.8288711309972994, "learning_rate": 5.724839218297132e-06, "loss": 0.15690231323242188, "step": 31780 }, { "epoch": 0.2748354964505279, "grad_norm": 21.67280485570514, "learning_rate": 5.724753977516688e-06, "loss": 0.1578369140625, "step": 31785 }, { "epoch": 0.27487872997207113, "grad_norm": 10.114359809680524, "learning_rate": 5.724668724169919e-06, "loss": 0.205487060546875, "step": 31790 }, { "epoch": 0.2749219634936144, "grad_norm": 6.709528462843119, "learning_rate": 5.724583458257218e-06, "loss": 0.335333251953125, "step": 31795 }, { "epoch": 0.27496519701515765, "grad_norm": 29.674363326546285, "learning_rate": 5.72449817977898e-06, "loss": 0.1316680908203125, "step": 31800 }, { "epoch": 0.27500843053670093, "grad_norm": 0.8984012643216521, "learning_rate": 5.724412888735596e-06, "loss": 0.23228607177734376, "step": 31805 }, { "epoch": 0.2750516640582442, "grad_norm": 12.051146116347807, "learning_rate": 5.7243275851274615e-06, "loss": 0.3758995056152344, "step": 31810 }, { "epoch": 0.27509489757978745, "grad_norm": 1.5124040407191777, "learning_rate": 5.7242422689549684e-06, "loss": 0.090484619140625, "step": 31815 }, { "epoch": 0.27513813110133073, "grad_norm": 19.46319805316371, "learning_rate": 5.72415694021851e-06, "loss": 0.16665496826171874, "step": 31820 }, { "epoch": 0.275181364622874, "grad_norm": 24.672223111652944, "learning_rate": 5.724071598918482e-06, "loss": 0.42730560302734377, "step": 31825 }, { "epoch": 0.27522459814441724, "grad_norm": 8.322457282323295, "learning_rate": 5.723986245055275e-06, "loss": 0.1641143798828125, "step": 31830 }, { "epoch": 0.2752678316659605, "grad_norm": 64.53100821825177, "learning_rate": 5.723900878629285e-06, "loss": 0.4423675537109375, "step": 31835 }, { "epoch": 0.27531106518750376, "grad_norm": 2.8786973346577813, "learning_rate": 5.723815499640905e-06, "loss": 0.09694290161132812, "step": 31840 }, { "epoch": 0.27535429870904704, "grad_norm": 7.637107866147896, "learning_rate": 5.723730108090528e-06, "loss": 0.19507484436035155, "step": 31845 }, { "epoch": 0.2753975322305903, "grad_norm": 24.685429257008767, "learning_rate": 5.723644703978548e-06, "loss": 0.5511474609375, "step": 31850 }, { "epoch": 0.27544076575213355, "grad_norm": 54.87182580373786, "learning_rate": 5.72355928730536e-06, "loss": 0.144512939453125, "step": 31855 }, { "epoch": 0.27548399927367684, "grad_norm": 16.440873416315856, "learning_rate": 5.723473858071358e-06, "loss": 0.31173171997070315, "step": 31860 }, { "epoch": 0.2755272327952201, "grad_norm": 6.726825788890021, "learning_rate": 5.723388416276934e-06, "loss": 0.044683837890625, "step": 31865 }, { "epoch": 0.27557046631676335, "grad_norm": 16.55019080677238, "learning_rate": 5.723302961922484e-06, "loss": 0.25384368896484377, "step": 31870 }, { "epoch": 0.27561369983830664, "grad_norm": 2.263278137521522, "learning_rate": 5.723217495008401e-06, "loss": 0.05697059631347656, "step": 31875 }, { "epoch": 0.2756569333598499, "grad_norm": 19.809364557191277, "learning_rate": 5.72313201553508e-06, "loss": 0.184686279296875, "step": 31880 }, { "epoch": 0.27570016688139315, "grad_norm": 8.197029431013751, "learning_rate": 5.723046523502914e-06, "loss": 0.07705898284912109, "step": 31885 }, { "epoch": 0.27574340040293643, "grad_norm": 5.036153387328914, "learning_rate": 5.7229610189122985e-06, "loss": 0.12087478637695312, "step": 31890 }, { "epoch": 0.27578663392447966, "grad_norm": 5.019170058254573, "learning_rate": 5.722875501763627e-06, "loss": 0.17538833618164062, "step": 31895 }, { "epoch": 0.27582986744602295, "grad_norm": 16.004279531664604, "learning_rate": 5.722789972057294e-06, "loss": 0.21605224609375, "step": 31900 }, { "epoch": 0.27587310096756623, "grad_norm": 1.616914798352473, "learning_rate": 5.7227044297936954e-06, "loss": 0.190484619140625, "step": 31905 }, { "epoch": 0.27591633448910946, "grad_norm": 6.3419879395216086, "learning_rate": 5.7226188749732245e-06, "loss": 0.15172271728515624, "step": 31910 }, { "epoch": 0.27595956801065274, "grad_norm": 40.23312701825373, "learning_rate": 5.722533307596275e-06, "loss": 0.16686248779296875, "step": 31915 }, { "epoch": 0.27600280153219603, "grad_norm": 5.097389006786313, "learning_rate": 5.722447727663243e-06, "loss": 0.2459716796875, "step": 31920 }, { "epoch": 0.27604603505373926, "grad_norm": 0.16533500155219263, "learning_rate": 5.722362135174523e-06, "loss": 0.36361846923828123, "step": 31925 }, { "epoch": 0.27608926857528254, "grad_norm": 0.6487884860682734, "learning_rate": 5.722276530130508e-06, "loss": 0.0903717041015625, "step": 31930 }, { "epoch": 0.27613250209682577, "grad_norm": 11.280032322904697, "learning_rate": 5.7221909125315954e-06, "loss": 0.279351806640625, "step": 31935 }, { "epoch": 0.27617573561836906, "grad_norm": 19.34248568376106, "learning_rate": 5.722105282378178e-06, "loss": 0.621136474609375, "step": 31940 }, { "epoch": 0.27621896913991234, "grad_norm": 22.710368191175807, "learning_rate": 5.722019639670653e-06, "loss": 0.21048583984375, "step": 31945 }, { "epoch": 0.27626220266145557, "grad_norm": 6.553151142938818, "learning_rate": 5.721933984409412e-06, "loss": 0.123565673828125, "step": 31950 }, { "epoch": 0.27630543618299885, "grad_norm": 3.335278363803823, "learning_rate": 5.721848316594853e-06, "loss": 0.28380794525146485, "step": 31955 }, { "epoch": 0.27634866970454214, "grad_norm": 4.793654229989738, "learning_rate": 5.72176263622737e-06, "loss": 0.30471343994140626, "step": 31960 }, { "epoch": 0.27639190322608537, "grad_norm": 11.084429449841085, "learning_rate": 5.721676943307359e-06, "loss": 0.141424560546875, "step": 31965 }, { "epoch": 0.27643513674762865, "grad_norm": 8.704193728831285, "learning_rate": 5.721591237835213e-06, "loss": 0.14345703125, "step": 31970 }, { "epoch": 0.2764783702691719, "grad_norm": 0.7707979158545646, "learning_rate": 5.72150551981133e-06, "loss": 0.163134765625, "step": 31975 }, { "epoch": 0.27652160379071516, "grad_norm": 18.325027751125567, "learning_rate": 5.721419789236103e-06, "loss": 0.3139228820800781, "step": 31980 }, { "epoch": 0.27656483731225845, "grad_norm": 0.48526023377027855, "learning_rate": 5.7213340461099294e-06, "loss": 0.09347991943359375, "step": 31985 }, { "epoch": 0.2766080708338017, "grad_norm": 8.83199753418615, "learning_rate": 5.721248290433203e-06, "loss": 0.1200408935546875, "step": 31990 }, { "epoch": 0.27665130435534496, "grad_norm": 22.18116723101392, "learning_rate": 5.72116252220632e-06, "loss": 0.1872650146484375, "step": 31995 }, { "epoch": 0.27669453787688825, "grad_norm": 18.802833810082895, "learning_rate": 5.721076741429675e-06, "loss": 0.13647937774658203, "step": 32000 }, { "epoch": 0.2767377713984315, "grad_norm": 5.469253868648913, "learning_rate": 5.720990948103666e-06, "loss": 0.3206066131591797, "step": 32005 }, { "epoch": 0.27678100491997476, "grad_norm": 7.315116690771275, "learning_rate": 5.720905142228686e-06, "loss": 0.0597076416015625, "step": 32010 }, { "epoch": 0.276824238441518, "grad_norm": 15.803471288788522, "learning_rate": 5.720819323805134e-06, "loss": 0.35952911376953123, "step": 32015 }, { "epoch": 0.2768674719630613, "grad_norm": 11.827378606739263, "learning_rate": 5.720733492833401e-06, "loss": 0.168597412109375, "step": 32020 }, { "epoch": 0.27691070548460456, "grad_norm": 1.9619962352327587, "learning_rate": 5.720647649313887e-06, "loss": 0.15268707275390625, "step": 32025 }, { "epoch": 0.2769539390061478, "grad_norm": 4.765917032026281, "learning_rate": 5.720561793246986e-06, "loss": 0.1899850845336914, "step": 32030 }, { "epoch": 0.27699717252769107, "grad_norm": 6.224047647606458, "learning_rate": 5.7204759246330945e-06, "loss": 0.20098419189453126, "step": 32035 }, { "epoch": 0.27704040604923436, "grad_norm": 8.308276172246606, "learning_rate": 5.720390043472609e-06, "loss": 0.123309326171875, "step": 32040 }, { "epoch": 0.2770836395707776, "grad_norm": 9.280535226526338, "learning_rate": 5.720304149765924e-06, "loss": 0.14666328430175782, "step": 32045 }, { "epoch": 0.27712687309232087, "grad_norm": 1.0543223040498644, "learning_rate": 5.720218243513438e-06, "loss": 0.09599227905273437, "step": 32050 }, { "epoch": 0.27717010661386415, "grad_norm": 2.433263409600901, "learning_rate": 5.720132324715544e-06, "loss": 0.07257080078125, "step": 32055 }, { "epoch": 0.2772133401354074, "grad_norm": 3.3803482595432794, "learning_rate": 5.7200463933726415e-06, "loss": 0.12025146484375, "step": 32060 }, { "epoch": 0.27725657365695067, "grad_norm": 5.040460765727538, "learning_rate": 5.719960449485125e-06, "loss": 0.2569755554199219, "step": 32065 }, { "epoch": 0.2772998071784939, "grad_norm": 31.245289105909364, "learning_rate": 5.719874493053391e-06, "loss": 0.3293853759765625, "step": 32070 }, { "epoch": 0.2773430407000372, "grad_norm": 0.1919454058778401, "learning_rate": 5.719788524077836e-06, "loss": 0.26252403259277346, "step": 32075 }, { "epoch": 0.27738627422158046, "grad_norm": 5.066452219835852, "learning_rate": 5.719702542558857e-06, "loss": 0.1575286865234375, "step": 32080 }, { "epoch": 0.2774295077431237, "grad_norm": 25.66131194172717, "learning_rate": 5.719616548496851e-06, "loss": 0.36829376220703125, "step": 32085 }, { "epoch": 0.277472741264667, "grad_norm": 0.4101987574830099, "learning_rate": 5.719530541892213e-06, "loss": 0.1734405517578125, "step": 32090 }, { "epoch": 0.27751597478621026, "grad_norm": 20.21230430855127, "learning_rate": 5.719444522745339e-06, "loss": 0.34990768432617186, "step": 32095 }, { "epoch": 0.2775592083077535, "grad_norm": 20.978265523186607, "learning_rate": 5.7193584910566285e-06, "loss": 0.17490386962890625, "step": 32100 }, { "epoch": 0.2776024418292968, "grad_norm": 15.395485068207414, "learning_rate": 5.719272446826477e-06, "loss": 0.100494384765625, "step": 32105 }, { "epoch": 0.27764567535084, "grad_norm": 19.15980078397313, "learning_rate": 5.719186390055281e-06, "loss": 0.14674072265625, "step": 32110 }, { "epoch": 0.2776889088723833, "grad_norm": 1.8574380493183251, "learning_rate": 5.719100320743439e-06, "loss": 0.30010528564453126, "step": 32115 }, { "epoch": 0.2777321423939266, "grad_norm": 25.627458698107233, "learning_rate": 5.7190142388913455e-06, "loss": 0.18763885498046876, "step": 32120 }, { "epoch": 0.2777753759154698, "grad_norm": 1.6596291908256589, "learning_rate": 5.718928144499397e-06, "loss": 0.054449462890625, "step": 32125 }, { "epoch": 0.2778186094370131, "grad_norm": 0.581982798828866, "learning_rate": 5.718842037567994e-06, "loss": 0.0216796875, "step": 32130 }, { "epoch": 0.27786184295855637, "grad_norm": 7.2605284913688095, "learning_rate": 5.718755918097532e-06, "loss": 0.14845008850097657, "step": 32135 }, { "epoch": 0.2779050764800996, "grad_norm": 19.76182257642276, "learning_rate": 5.718669786088407e-06, "loss": 0.12671003341674805, "step": 32140 }, { "epoch": 0.2779483100016429, "grad_norm": 31.06804142284207, "learning_rate": 5.718583641541018e-06, "loss": 0.1337249755859375, "step": 32145 }, { "epoch": 0.2779915435231861, "grad_norm": 38.75725436543846, "learning_rate": 5.718497484455761e-06, "loss": 0.6437191009521485, "step": 32150 }, { "epoch": 0.2780347770447294, "grad_norm": 8.964001322238195, "learning_rate": 5.7184113148330335e-06, "loss": 0.1546112060546875, "step": 32155 }, { "epoch": 0.2780780105662727, "grad_norm": 6.16145903763676, "learning_rate": 5.718325132673233e-06, "loss": 0.1750274658203125, "step": 32160 }, { "epoch": 0.2781212440878159, "grad_norm": 13.417316186574453, "learning_rate": 5.7182389379767584e-06, "loss": 0.241839599609375, "step": 32165 }, { "epoch": 0.2781644776093592, "grad_norm": 2.165068151402627, "learning_rate": 5.718152730744005e-06, "loss": 0.22142486572265624, "step": 32170 }, { "epoch": 0.2782077111309025, "grad_norm": 6.636352013171224, "learning_rate": 5.718066510975372e-06, "loss": 0.3294158935546875, "step": 32175 }, { "epoch": 0.2782509446524457, "grad_norm": 2.192903091160774, "learning_rate": 5.717980278671256e-06, "loss": 0.21551475524902344, "step": 32180 }, { "epoch": 0.278294178173989, "grad_norm": 22.475934196045582, "learning_rate": 5.717894033832055e-06, "loss": 0.13656578063964844, "step": 32185 }, { "epoch": 0.2783374116955322, "grad_norm": 7.990240072320231, "learning_rate": 5.717807776458168e-06, "loss": 0.171405029296875, "step": 32190 }, { "epoch": 0.2783806452170755, "grad_norm": 63.875152527045785, "learning_rate": 5.7177215065499906e-06, "loss": 0.16770248413085936, "step": 32195 }, { "epoch": 0.2784238787386188, "grad_norm": 26.478565431999982, "learning_rate": 5.717635224107921e-06, "loss": 0.2940887451171875, "step": 32200 }, { "epoch": 0.278467112260162, "grad_norm": 1.1371229776599698, "learning_rate": 5.717548929132359e-06, "loss": 0.20928497314453126, "step": 32205 }, { "epoch": 0.2785103457817053, "grad_norm": 3.175764489320583, "learning_rate": 5.717462621623701e-06, "loss": 0.098370361328125, "step": 32210 }, { "epoch": 0.2785535793032486, "grad_norm": 17.58340419806856, "learning_rate": 5.717376301582346e-06, "loss": 0.19754791259765625, "step": 32215 }, { "epoch": 0.2785968128247918, "grad_norm": 0.10101063352412547, "learning_rate": 5.717289969008692e-06, "loss": 0.1103759765625, "step": 32220 }, { "epoch": 0.2786400463463351, "grad_norm": 2.3257599453736306, "learning_rate": 5.717203623903136e-06, "loss": 0.1345916748046875, "step": 32225 }, { "epoch": 0.27868327986787833, "grad_norm": 10.97563378771435, "learning_rate": 5.717117266266077e-06, "loss": 0.35165252685546877, "step": 32230 }, { "epoch": 0.2787265133894216, "grad_norm": 4.09161011671208, "learning_rate": 5.717030896097914e-06, "loss": 0.0720947265625, "step": 32235 }, { "epoch": 0.2787697469109649, "grad_norm": 8.545006045094173, "learning_rate": 5.716944513399044e-06, "loss": 0.10869064331054687, "step": 32240 }, { "epoch": 0.27881298043250813, "grad_norm": 7.233327999924011, "learning_rate": 5.716858118169865e-06, "loss": 0.10413055419921875, "step": 32245 }, { "epoch": 0.2788562139540514, "grad_norm": 2.309963054211625, "learning_rate": 5.716771710410778e-06, "loss": 0.07564430236816407, "step": 32250 }, { "epoch": 0.2788994474755947, "grad_norm": 25.934715244783995, "learning_rate": 5.71668529012218e-06, "loss": 0.17907791137695311, "step": 32255 }, { "epoch": 0.2789426809971379, "grad_norm": 4.034628411437151, "learning_rate": 5.716598857304468e-06, "loss": 0.06996536254882812, "step": 32260 }, { "epoch": 0.2789859145186812, "grad_norm": 22.288058048039453, "learning_rate": 5.716512411958043e-06, "loss": 0.2477783203125, "step": 32265 }, { "epoch": 0.2790291480402245, "grad_norm": 4.768037540785927, "learning_rate": 5.716425954083303e-06, "loss": 0.17568893432617189, "step": 32270 }, { "epoch": 0.2790723815617677, "grad_norm": 6.689161852380759, "learning_rate": 5.716339483680646e-06, "loss": 0.16432876586914064, "step": 32275 }, { "epoch": 0.279115615083311, "grad_norm": 16.45850913339697, "learning_rate": 5.716253000750472e-06, "loss": 0.23707952499389648, "step": 32280 }, { "epoch": 0.27915884860485424, "grad_norm": 3.126225769685289, "learning_rate": 5.716166505293179e-06, "loss": 0.06726036071777344, "step": 32285 }, { "epoch": 0.2792020821263975, "grad_norm": 13.125515644052006, "learning_rate": 5.716079997309166e-06, "loss": 0.38322982788085935, "step": 32290 }, { "epoch": 0.2792453156479408, "grad_norm": 6.207457353949518, "learning_rate": 5.715993476798831e-06, "loss": 0.27607574462890627, "step": 32295 }, { "epoch": 0.27928854916948403, "grad_norm": 3.0646111379931655, "learning_rate": 5.715906943762575e-06, "loss": 0.11673507690429688, "step": 32300 }, { "epoch": 0.2793317826910273, "grad_norm": 73.55315322035763, "learning_rate": 5.715820398200796e-06, "loss": 0.4196647644042969, "step": 32305 }, { "epoch": 0.2793750162125706, "grad_norm": 26.20352680409145, "learning_rate": 5.715733840113894e-06, "loss": 0.2528961181640625, "step": 32310 }, { "epoch": 0.27941824973411383, "grad_norm": 43.565087732568124, "learning_rate": 5.715647269502266e-06, "loss": 0.8544479370117187, "step": 32315 }, { "epoch": 0.2794614832556571, "grad_norm": 5.287852245919932, "learning_rate": 5.7155606863663145e-06, "loss": 0.5761810302734375, "step": 32320 }, { "epoch": 0.27950471677720035, "grad_norm": 22.58820284542382, "learning_rate": 5.715474090706436e-06, "loss": 0.31085205078125, "step": 32325 }, { "epoch": 0.27954795029874363, "grad_norm": 1.0596048975626524, "learning_rate": 5.7153874825230315e-06, "loss": 0.060662460327148435, "step": 32330 }, { "epoch": 0.2795911838202869, "grad_norm": 2.0854513808737174, "learning_rate": 5.7153008618165e-06, "loss": 0.049782943725585935, "step": 32335 }, { "epoch": 0.27963441734183014, "grad_norm": 43.10460823955474, "learning_rate": 5.71521422858724e-06, "loss": 0.1304473876953125, "step": 32340 }, { "epoch": 0.27967765086337343, "grad_norm": 0.9413175089775286, "learning_rate": 5.715127582835651e-06, "loss": 0.24437713623046875, "step": 32345 }, { "epoch": 0.2797208843849167, "grad_norm": 7.388057534052577, "learning_rate": 5.715040924562136e-06, "loss": 0.064837646484375, "step": 32350 }, { "epoch": 0.27976411790645994, "grad_norm": 10.214414955948797, "learning_rate": 5.714954253767091e-06, "loss": 0.13851051330566405, "step": 32355 }, { "epoch": 0.2798073514280032, "grad_norm": 1.054478673199317, "learning_rate": 5.714867570450917e-06, "loss": 0.12401046752929687, "step": 32360 }, { "epoch": 0.27985058494954645, "grad_norm": 2.5398659408436752, "learning_rate": 5.714780874614012e-06, "loss": 0.13538818359375, "step": 32365 }, { "epoch": 0.27989381847108974, "grad_norm": 30.454418988561667, "learning_rate": 5.71469416625678e-06, "loss": 0.300408935546875, "step": 32370 }, { "epoch": 0.279937051992633, "grad_norm": 12.041182195840655, "learning_rate": 5.714607445379617e-06, "loss": 0.121917724609375, "step": 32375 }, { "epoch": 0.27998028551417625, "grad_norm": 10.063513492472175, "learning_rate": 5.714520711982925e-06, "loss": 0.27914886474609374, "step": 32380 }, { "epoch": 0.28002351903571954, "grad_norm": 3.8147848454575364, "learning_rate": 5.7144339660671024e-06, "loss": 0.12117233276367187, "step": 32385 }, { "epoch": 0.2800667525572628, "grad_norm": 3.3660655368611736, "learning_rate": 5.714347207632551e-06, "loss": 0.1408905029296875, "step": 32390 }, { "epoch": 0.28010998607880605, "grad_norm": 8.16620653960762, "learning_rate": 5.714260436679671e-06, "loss": 0.41626739501953125, "step": 32395 }, { "epoch": 0.28015321960034933, "grad_norm": 8.035130364061754, "learning_rate": 5.71417365320886e-06, "loss": 0.053338623046875, "step": 32400 }, { "epoch": 0.28019645312189256, "grad_norm": 26.986436006948903, "learning_rate": 5.714086857220521e-06, "loss": 0.316802978515625, "step": 32405 }, { "epoch": 0.28023968664343585, "grad_norm": 10.621947731041617, "learning_rate": 5.7140000487150525e-06, "loss": 0.305731201171875, "step": 32410 }, { "epoch": 0.28028292016497913, "grad_norm": 3.639874853338867, "learning_rate": 5.713913227692856e-06, "loss": 0.5780059814453125, "step": 32415 }, { "epoch": 0.28032615368652236, "grad_norm": 3.9071225216129917, "learning_rate": 5.713826394154331e-06, "loss": 0.19503860473632811, "step": 32420 }, { "epoch": 0.28036938720806565, "grad_norm": 11.564864478373112, "learning_rate": 5.7137395480998795e-06, "loss": 0.27798309326171877, "step": 32425 }, { "epoch": 0.28041262072960893, "grad_norm": 2.016992221341489, "learning_rate": 5.7136526895299e-06, "loss": 0.2826042175292969, "step": 32430 }, { "epoch": 0.28045585425115216, "grad_norm": 13.929348302993764, "learning_rate": 5.713565818444794e-06, "loss": 0.28160400390625, "step": 32435 }, { "epoch": 0.28049908777269544, "grad_norm": 11.43562141624273, "learning_rate": 5.713478934844962e-06, "loss": 0.1321441650390625, "step": 32440 }, { "epoch": 0.28054232129423873, "grad_norm": 2.1206244339334135, "learning_rate": 5.713392038730807e-06, "loss": 0.06275138854980469, "step": 32445 }, { "epoch": 0.28058555481578196, "grad_norm": 12.20201253878195, "learning_rate": 5.713305130102725e-06, "loss": 0.27338180541992185, "step": 32450 }, { "epoch": 0.28062878833732524, "grad_norm": 13.019763749419822, "learning_rate": 5.713218208961121e-06, "loss": 0.1313507080078125, "step": 32455 }, { "epoch": 0.28067202185886847, "grad_norm": 2.7055950543431515, "learning_rate": 5.713131275306394e-06, "loss": 0.10678482055664062, "step": 32460 }, { "epoch": 0.28071525538041175, "grad_norm": 25.968688515992113, "learning_rate": 5.713044329138946e-06, "loss": 0.24709320068359375, "step": 32465 }, { "epoch": 0.28075848890195504, "grad_norm": 6.79051000544415, "learning_rate": 5.712957370459176e-06, "loss": 0.23637313842773439, "step": 32470 }, { "epoch": 0.28080172242349827, "grad_norm": 5.65417380213552, "learning_rate": 5.712870399267487e-06, "loss": 0.16740989685058594, "step": 32475 }, { "epoch": 0.28084495594504155, "grad_norm": 28.09239458516723, "learning_rate": 5.71278341556428e-06, "loss": 0.25714874267578125, "step": 32480 }, { "epoch": 0.28088818946658484, "grad_norm": 6.095141948940669, "learning_rate": 5.712696419349954e-06, "loss": 0.13420257568359376, "step": 32485 }, { "epoch": 0.28093142298812807, "grad_norm": 1.2444554306009037, "learning_rate": 5.712609410624914e-06, "loss": 0.21563491821289063, "step": 32490 }, { "epoch": 0.28097465650967135, "grad_norm": 4.568990717615066, "learning_rate": 5.712522389389558e-06, "loss": 0.2839569091796875, "step": 32495 }, { "epoch": 0.2810178900312146, "grad_norm": 2.478483455921944, "learning_rate": 5.712435355644288e-06, "loss": 0.3125701904296875, "step": 32500 }, { "epoch": 0.28106112355275786, "grad_norm": 17.679654768715203, "learning_rate": 5.7123483093895066e-06, "loss": 0.4348876953125, "step": 32505 }, { "epoch": 0.28110435707430115, "grad_norm": 23.30130054671403, "learning_rate": 5.712261250625614e-06, "loss": 0.18791046142578124, "step": 32510 }, { "epoch": 0.2811475905958444, "grad_norm": 5.105702089559686, "learning_rate": 5.712174179353012e-06, "loss": 0.325189208984375, "step": 32515 }, { "epoch": 0.28119082411738766, "grad_norm": 10.411968011005264, "learning_rate": 5.712087095572102e-06, "loss": 0.2858062744140625, "step": 32520 }, { "epoch": 0.28123405763893095, "grad_norm": 27.86046125591227, "learning_rate": 5.711999999283286e-06, "loss": 0.18492431640625, "step": 32525 }, { "epoch": 0.2812772911604742, "grad_norm": 26.500277505834028, "learning_rate": 5.711912890486967e-06, "loss": 0.34012451171875, "step": 32530 }, { "epoch": 0.28132052468201746, "grad_norm": 36.68806654791653, "learning_rate": 5.711825769183544e-06, "loss": 0.3759880065917969, "step": 32535 }, { "epoch": 0.2813637582035607, "grad_norm": 4.5797717561889355, "learning_rate": 5.711738635373421e-06, "loss": 0.09210662841796875, "step": 32540 }, { "epoch": 0.28140699172510397, "grad_norm": 8.022816396113987, "learning_rate": 5.711651489056999e-06, "loss": 0.230126953125, "step": 32545 }, { "epoch": 0.28145022524664726, "grad_norm": 51.8227983216737, "learning_rate": 5.7115643302346794e-06, "loss": 0.4008636474609375, "step": 32550 }, { "epoch": 0.2814934587681905, "grad_norm": 0.37070743898483705, "learning_rate": 5.7114771589068655e-06, "loss": 0.2116241455078125, "step": 32555 }, { "epoch": 0.28153669228973377, "grad_norm": 1.1800233390115662, "learning_rate": 5.711389975073958e-06, "loss": 0.129443359375, "step": 32560 }, { "epoch": 0.28157992581127705, "grad_norm": 3.936851713543133, "learning_rate": 5.711302778736359e-06, "loss": 0.08626251220703125, "step": 32565 }, { "epoch": 0.2816231593328203, "grad_norm": 18.676976817563094, "learning_rate": 5.711215569894472e-06, "loss": 0.293634033203125, "step": 32570 }, { "epoch": 0.28166639285436357, "grad_norm": 1.7125852478986785, "learning_rate": 5.711128348548698e-06, "loss": 0.12762584686279296, "step": 32575 }, { "epoch": 0.2817096263759068, "grad_norm": 39.53236938387904, "learning_rate": 5.71104111469944e-06, "loss": 0.3841644287109375, "step": 32580 }, { "epoch": 0.2817528598974501, "grad_norm": 1.0668672761008609, "learning_rate": 5.7109538683471e-06, "loss": 0.14012298583984376, "step": 32585 }, { "epoch": 0.28179609341899337, "grad_norm": 15.250433205682867, "learning_rate": 5.71086660949208e-06, "loss": 0.07717437744140625, "step": 32590 }, { "epoch": 0.2818393269405366, "grad_norm": 45.770589540601826, "learning_rate": 5.710779338134781e-06, "loss": 0.34495086669921876, "step": 32595 }, { "epoch": 0.2818825604620799, "grad_norm": 8.703266660042013, "learning_rate": 5.71069205427561e-06, "loss": 0.22355880737304687, "step": 32600 }, { "epoch": 0.28192579398362316, "grad_norm": 15.812534155546066, "learning_rate": 5.710604757914964e-06, "loss": 0.16200485229492187, "step": 32605 }, { "epoch": 0.2819690275051664, "grad_norm": 6.997895922500608, "learning_rate": 5.710517449053251e-06, "loss": 0.122174072265625, "step": 32610 }, { "epoch": 0.2820122610267097, "grad_norm": 37.42059754545604, "learning_rate": 5.71043012769087e-06, "loss": 0.1215576171875, "step": 32615 }, { "epoch": 0.28205549454825296, "grad_norm": 39.30872718366661, "learning_rate": 5.7103427938282245e-06, "loss": 0.4510852813720703, "step": 32620 }, { "epoch": 0.2820987280697962, "grad_norm": 19.770407500819196, "learning_rate": 5.710255447465717e-06, "loss": 0.24208984375, "step": 32625 }, { "epoch": 0.2821419615913395, "grad_norm": 13.869004927513473, "learning_rate": 5.710168088603753e-06, "loss": 0.21141204833984376, "step": 32630 }, { "epoch": 0.2821851951128827, "grad_norm": 0.31965879029590505, "learning_rate": 5.710080717242731e-06, "loss": 0.31683197021484377, "step": 32635 }, { "epoch": 0.282228428634426, "grad_norm": 2.741773151870257, "learning_rate": 5.709993333383058e-06, "loss": 0.2747528076171875, "step": 32640 }, { "epoch": 0.28227166215596927, "grad_norm": 5.104390651573195, "learning_rate": 5.709905937025134e-06, "loss": 0.0775634765625, "step": 32645 }, { "epoch": 0.2823148956775125, "grad_norm": 10.669111524863775, "learning_rate": 5.709818528169364e-06, "loss": 0.1397308349609375, "step": 32650 }, { "epoch": 0.2823581291990558, "grad_norm": 14.139087540093223, "learning_rate": 5.709731106816149e-06, "loss": 0.21715011596679687, "step": 32655 }, { "epoch": 0.28240136272059907, "grad_norm": 17.823006910569784, "learning_rate": 5.709643672965895e-06, "loss": 0.15727081298828124, "step": 32660 }, { "epoch": 0.2824445962421423, "grad_norm": 0.590627342535353, "learning_rate": 5.709556226619003e-06, "loss": 0.134521484375, "step": 32665 }, { "epoch": 0.2824878297636856, "grad_norm": 4.4671728987028825, "learning_rate": 5.709468767775878e-06, "loss": 0.04238471984863281, "step": 32670 }, { "epoch": 0.2825310632852288, "grad_norm": 35.985675763935795, "learning_rate": 5.7093812964369214e-06, "loss": 0.260986328125, "step": 32675 }, { "epoch": 0.2825742968067721, "grad_norm": 0.44463317350206033, "learning_rate": 5.709293812602538e-06, "loss": 0.2628448486328125, "step": 32680 }, { "epoch": 0.2826175303283154, "grad_norm": 25.429955598889954, "learning_rate": 5.709206316273132e-06, "loss": 0.15642318725585938, "step": 32685 }, { "epoch": 0.2826607638498586, "grad_norm": 9.36693623872853, "learning_rate": 5.709118807449104e-06, "loss": 0.392431640625, "step": 32690 }, { "epoch": 0.2827039973714019, "grad_norm": 7.877511577816259, "learning_rate": 5.709031286130861e-06, "loss": 0.05874481201171875, "step": 32695 }, { "epoch": 0.2827472308929452, "grad_norm": 39.510837187285574, "learning_rate": 5.7089437523188046e-06, "loss": 0.3364471435546875, "step": 32700 }, { "epoch": 0.2827904644144884, "grad_norm": 7.285813737654926, "learning_rate": 5.7088562060133385e-06, "loss": 0.1646240234375, "step": 32705 }, { "epoch": 0.2828336979360317, "grad_norm": 0.14828393396203526, "learning_rate": 5.708768647214867e-06, "loss": 0.20598602294921875, "step": 32710 }, { "epoch": 0.2828769314575749, "grad_norm": 6.4322999540119765, "learning_rate": 5.708681075923794e-06, "loss": 0.2443267822265625, "step": 32715 }, { "epoch": 0.2829201649791182, "grad_norm": 13.064540170540335, "learning_rate": 5.708593492140524e-06, "loss": 0.14674415588378906, "step": 32720 }, { "epoch": 0.2829633985006615, "grad_norm": 48.55003456033517, "learning_rate": 5.70850589586546e-06, "loss": 0.3035728454589844, "step": 32725 }, { "epoch": 0.2830066320222047, "grad_norm": 1.089588046358207, "learning_rate": 5.708418287099005e-06, "loss": 0.08963623046875, "step": 32730 }, { "epoch": 0.283049865543748, "grad_norm": 13.954465566103513, "learning_rate": 5.708330665841564e-06, "loss": 0.241998291015625, "step": 32735 }, { "epoch": 0.2830930990652913, "grad_norm": 9.917094836812392, "learning_rate": 5.708243032093542e-06, "loss": 0.2755950927734375, "step": 32740 }, { "epoch": 0.2831363325868345, "grad_norm": 20.07947111852866, "learning_rate": 5.708155385855343e-06, "loss": 0.383282470703125, "step": 32745 }, { "epoch": 0.2831795661083778, "grad_norm": 1.1452115618482586, "learning_rate": 5.708067727127371e-06, "loss": 0.18856582641601563, "step": 32750 }, { "epoch": 0.28322279962992103, "grad_norm": 27.587219887266215, "learning_rate": 5.707980055910029e-06, "loss": 0.1089630126953125, "step": 32755 }, { "epoch": 0.2832660331514643, "grad_norm": 2.672558874969092, "learning_rate": 5.7078923722037215e-06, "loss": 0.14276885986328125, "step": 32760 }, { "epoch": 0.2833092666730076, "grad_norm": 43.711343891778476, "learning_rate": 5.707804676008855e-06, "loss": 0.555999755859375, "step": 32765 }, { "epoch": 0.2833525001945508, "grad_norm": 28.23017121412429, "learning_rate": 5.707716967325832e-06, "loss": 0.3826507568359375, "step": 32770 }, { "epoch": 0.2833957337160941, "grad_norm": 3.5020022243317026, "learning_rate": 5.7076292461550585e-06, "loss": 0.10234146118164063, "step": 32775 }, { "epoch": 0.2834389672376374, "grad_norm": 13.516560237239316, "learning_rate": 5.707541512496937e-06, "loss": 0.12988433837890626, "step": 32780 }, { "epoch": 0.2834822007591806, "grad_norm": 10.308129413217209, "learning_rate": 5.707453766351874e-06, "loss": 0.21295318603515626, "step": 32785 }, { "epoch": 0.2835254342807239, "grad_norm": 7.973608912770049, "learning_rate": 5.707366007720272e-06, "loss": 0.3406829833984375, "step": 32790 }, { "epoch": 0.2835686678022672, "grad_norm": 13.787851510140909, "learning_rate": 5.707278236602539e-06, "loss": 0.08129425048828125, "step": 32795 }, { "epoch": 0.2836119013238104, "grad_norm": 9.76222762997871, "learning_rate": 5.707190452999077e-06, "loss": 0.13535919189453124, "step": 32800 }, { "epoch": 0.2836551348453537, "grad_norm": 65.04650387454451, "learning_rate": 5.707102656910293e-06, "loss": 0.21133804321289062, "step": 32805 }, { "epoch": 0.28369836836689694, "grad_norm": 37.08748139614883, "learning_rate": 5.707014848336589e-06, "loss": 0.43520317077636717, "step": 32810 }, { "epoch": 0.2837416018884402, "grad_norm": 0.05346165003750084, "learning_rate": 5.706927027278373e-06, "loss": 0.41904449462890625, "step": 32815 }, { "epoch": 0.2837848354099835, "grad_norm": 11.34812810505826, "learning_rate": 5.706839193736048e-06, "loss": 0.1090118408203125, "step": 32820 }, { "epoch": 0.28382806893152673, "grad_norm": 13.629772959844923, "learning_rate": 5.706751347710021e-06, "loss": 0.12837982177734375, "step": 32825 }, { "epoch": 0.28387130245307, "grad_norm": 8.468898025529402, "learning_rate": 5.706663489200694e-06, "loss": 0.1421255111694336, "step": 32830 }, { "epoch": 0.2839145359746133, "grad_norm": 15.496019614918318, "learning_rate": 5.706575618208476e-06, "loss": 0.238848876953125, "step": 32835 }, { "epoch": 0.28395776949615653, "grad_norm": 2.84391503192815, "learning_rate": 5.7064877347337695e-06, "loss": 0.1758636474609375, "step": 32840 }, { "epoch": 0.2840010030176998, "grad_norm": 3.2517050921341037, "learning_rate": 5.706399838776981e-06, "loss": 0.22673568725585938, "step": 32845 }, { "epoch": 0.28404423653924304, "grad_norm": 31.369221008010044, "learning_rate": 5.706311930338516e-06, "loss": 0.13800888061523436, "step": 32850 }, { "epoch": 0.28408747006078633, "grad_norm": 2.907877791516347, "learning_rate": 5.706224009418779e-06, "loss": 0.471551513671875, "step": 32855 }, { "epoch": 0.2841307035823296, "grad_norm": 28.67000097291913, "learning_rate": 5.706136076018175e-06, "loss": 0.22627429962158202, "step": 32860 }, { "epoch": 0.28417393710387284, "grad_norm": 11.218747233636499, "learning_rate": 5.706048130137113e-06, "loss": 0.2661346435546875, "step": 32865 }, { "epoch": 0.2842171706254161, "grad_norm": 10.753219793706716, "learning_rate": 5.705960171775994e-06, "loss": 0.11700897216796875, "step": 32870 }, { "epoch": 0.2842604041469594, "grad_norm": 5.916833103531452, "learning_rate": 5.705872200935227e-06, "loss": 0.2022308349609375, "step": 32875 }, { "epoch": 0.28430363766850264, "grad_norm": 1.6362635846366547, "learning_rate": 5.705784217615216e-06, "loss": 0.1314483642578125, "step": 32880 }, { "epoch": 0.2843468711900459, "grad_norm": 10.504199174871326, "learning_rate": 5.705696221816367e-06, "loss": 0.23538589477539062, "step": 32885 }, { "epoch": 0.28439010471158915, "grad_norm": 2.987855072375423, "learning_rate": 5.705608213539086e-06, "loss": 0.4448890686035156, "step": 32890 }, { "epoch": 0.28443333823313244, "grad_norm": 29.21687223111923, "learning_rate": 5.705520192783779e-06, "loss": 0.2450439453125, "step": 32895 }, { "epoch": 0.2844765717546757, "grad_norm": 9.787823099029314, "learning_rate": 5.7054321595508525e-06, "loss": 0.19371337890625, "step": 32900 }, { "epoch": 0.28451980527621895, "grad_norm": 5.9932195238511285, "learning_rate": 5.705344113840712e-06, "loss": 0.3146453857421875, "step": 32905 }, { "epoch": 0.28456303879776224, "grad_norm": 2.6796627328914293, "learning_rate": 5.7052560556537635e-06, "loss": 0.4286102294921875, "step": 32910 }, { "epoch": 0.2846062723193055, "grad_norm": 8.044639067810932, "learning_rate": 5.705167984990413e-06, "loss": 0.06683807373046875, "step": 32915 }, { "epoch": 0.28464950584084875, "grad_norm": 29.586545579603957, "learning_rate": 5.705079901851066e-06, "loss": 0.211187744140625, "step": 32920 }, { "epoch": 0.28469273936239203, "grad_norm": 2.8064318534854875, "learning_rate": 5.7049918062361305e-06, "loss": 0.24440231323242187, "step": 32925 }, { "epoch": 0.28473597288393526, "grad_norm": 16.613350554648214, "learning_rate": 5.704903698146011e-06, "loss": 0.23409423828125, "step": 32930 }, { "epoch": 0.28477920640547855, "grad_norm": 2.088141328713423, "learning_rate": 5.704815577581115e-06, "loss": 0.0308868408203125, "step": 32935 }, { "epoch": 0.28482243992702183, "grad_norm": 39.468363262305026, "learning_rate": 5.704727444541849e-06, "loss": 0.24175567626953126, "step": 32940 }, { "epoch": 0.28486567344856506, "grad_norm": 1.1689775532691995, "learning_rate": 5.704639299028619e-06, "loss": 0.08531494140625, "step": 32945 }, { "epoch": 0.28490890697010834, "grad_norm": 1.1091300062177882, "learning_rate": 5.704551141041831e-06, "loss": 0.057073974609375, "step": 32950 }, { "epoch": 0.28495214049165163, "grad_norm": 8.13237020810355, "learning_rate": 5.7044629705818925e-06, "loss": 0.17168731689453126, "step": 32955 }, { "epoch": 0.28499537401319486, "grad_norm": 16.691178855339967, "learning_rate": 5.704374787649209e-06, "loss": 0.24508895874023437, "step": 32960 }, { "epoch": 0.28503860753473814, "grad_norm": 0.2195041006671929, "learning_rate": 5.704286592244189e-06, "loss": 0.09951705932617187, "step": 32965 }, { "epoch": 0.28508184105628137, "grad_norm": 1.526646723995614, "learning_rate": 5.7041983843672375e-06, "loss": 0.03782806396484375, "step": 32970 }, { "epoch": 0.28512507457782466, "grad_norm": 0.2195253942548394, "learning_rate": 5.704110164018763e-06, "loss": 0.554351806640625, "step": 32975 }, { "epoch": 0.28516830809936794, "grad_norm": 20.011577859383525, "learning_rate": 5.70402193119917e-06, "loss": 0.242803955078125, "step": 32980 }, { "epoch": 0.28521154162091117, "grad_norm": 5.1918976760987, "learning_rate": 5.703933685908868e-06, "loss": 0.1389404296875, "step": 32985 }, { "epoch": 0.28525477514245445, "grad_norm": 6.525054634971255, "learning_rate": 5.703845428148262e-06, "loss": 0.157269287109375, "step": 32990 }, { "epoch": 0.28529800866399774, "grad_norm": 3.3187135496837104, "learning_rate": 5.703757157917759e-06, "loss": 0.17435302734375, "step": 32995 }, { "epoch": 0.28534124218554097, "grad_norm": 29.586168055226697, "learning_rate": 5.703668875217769e-06, "loss": 0.4050445556640625, "step": 33000 }, { "epoch": 0.28538447570708425, "grad_norm": 3.262894950051958, "learning_rate": 5.703580580048695e-06, "loss": 0.08810958862304688, "step": 33005 }, { "epoch": 0.28542770922862754, "grad_norm": 1.0703694871734428, "learning_rate": 5.703492272410948e-06, "loss": 0.17995147705078124, "step": 33010 }, { "epoch": 0.28547094275017076, "grad_norm": 22.085793627743264, "learning_rate": 5.703403952304932e-06, "loss": 0.1957061767578125, "step": 33015 }, { "epoch": 0.28551417627171405, "grad_norm": 3.2684959308732013, "learning_rate": 5.7033156197310565e-06, "loss": 0.262152099609375, "step": 33020 }, { "epoch": 0.2855574097932573, "grad_norm": 0.22339222132868838, "learning_rate": 5.703227274689727e-06, "loss": 0.20795745849609376, "step": 33025 }, { "epoch": 0.28560064331480056, "grad_norm": 6.42565464153906, "learning_rate": 5.703138917181354e-06, "loss": 0.109368896484375, "step": 33030 }, { "epoch": 0.28564387683634385, "grad_norm": 8.194117582978805, "learning_rate": 5.703050547206343e-06, "loss": 0.18115882873535155, "step": 33035 }, { "epoch": 0.2856871103578871, "grad_norm": 11.633410195234877, "learning_rate": 5.702962164765101e-06, "loss": 0.07072982788085938, "step": 33040 }, { "epoch": 0.28573034387943036, "grad_norm": 0.28479769376880765, "learning_rate": 5.702873769858037e-06, "loss": 0.12855224609375, "step": 33045 }, { "epoch": 0.28577357740097364, "grad_norm": 2.068738291336417, "learning_rate": 5.702785362485557e-06, "loss": 0.1085174560546875, "step": 33050 }, { "epoch": 0.2858168109225169, "grad_norm": 4.577813153051502, "learning_rate": 5.7026969426480714e-06, "loss": 0.049463081359863284, "step": 33055 }, { "epoch": 0.28586004444406016, "grad_norm": 23.55356804600776, "learning_rate": 5.702608510345985e-06, "loss": 0.204461669921875, "step": 33060 }, { "epoch": 0.2859032779656034, "grad_norm": 13.564752647431467, "learning_rate": 5.702520065579707e-06, "loss": 0.20189437866210938, "step": 33065 }, { "epoch": 0.28594651148714667, "grad_norm": 33.551520836808486, "learning_rate": 5.702431608349645e-06, "loss": 0.3769683837890625, "step": 33070 }, { "epoch": 0.28598974500868996, "grad_norm": 29.32112428351551, "learning_rate": 5.702343138656208e-06, "loss": 0.38226318359375, "step": 33075 }, { "epoch": 0.2860329785302332, "grad_norm": 12.712454483729777, "learning_rate": 5.702254656499803e-06, "loss": 0.297271728515625, "step": 33080 }, { "epoch": 0.28607621205177647, "grad_norm": 0.32666726853493105, "learning_rate": 5.702166161880839e-06, "loss": 0.12231979370117188, "step": 33085 }, { "epoch": 0.28611944557331975, "grad_norm": 5.817938297525336, "learning_rate": 5.702077654799722e-06, "loss": 0.04729766845703125, "step": 33090 }, { "epoch": 0.286162679094863, "grad_norm": 0.39411704418995336, "learning_rate": 5.701989135256863e-06, "loss": 0.1337432861328125, "step": 33095 }, { "epoch": 0.28620591261640627, "grad_norm": 14.819828853266454, "learning_rate": 5.701900603252667e-06, "loss": 0.20048294067382813, "step": 33100 }, { "epoch": 0.2862491461379495, "grad_norm": 12.998007774964035, "learning_rate": 5.7018120587875455e-06, "loss": 0.25064659118652344, "step": 33105 }, { "epoch": 0.2862923796594928, "grad_norm": 4.090159019678311, "learning_rate": 5.701723501861905e-06, "loss": 0.1040863037109375, "step": 33110 }, { "epoch": 0.28633561318103606, "grad_norm": 97.46885626974723, "learning_rate": 5.701634932476155e-06, "loss": 0.6391143798828125, "step": 33115 }, { "epoch": 0.2863788467025793, "grad_norm": 7.084253056082746, "learning_rate": 5.7015463506307034e-06, "loss": 0.14660186767578126, "step": 33120 }, { "epoch": 0.2864220802241226, "grad_norm": 29.72366424256011, "learning_rate": 5.701457756325959e-06, "loss": 0.3483673095703125, "step": 33125 }, { "epoch": 0.28646531374566586, "grad_norm": 2.5638303960911832, "learning_rate": 5.701369149562328e-06, "loss": 0.235455322265625, "step": 33130 }, { "epoch": 0.2865085472672091, "grad_norm": 18.331701419815587, "learning_rate": 5.701280530340224e-06, "loss": 0.24447784423828126, "step": 33135 }, { "epoch": 0.2865517807887524, "grad_norm": 16.514262276349186, "learning_rate": 5.701191898660052e-06, "loss": 0.2832206726074219, "step": 33140 }, { "epoch": 0.2865950143102956, "grad_norm": 0.38971399699619824, "learning_rate": 5.701103254522221e-06, "loss": 0.3162109375, "step": 33145 }, { "epoch": 0.2866382478318389, "grad_norm": 31.236432733498326, "learning_rate": 5.70101459792714e-06, "loss": 0.23926219940185547, "step": 33150 }, { "epoch": 0.2866814813533822, "grad_norm": 4.912047834441746, "learning_rate": 5.70092592887522e-06, "loss": 0.39260711669921877, "step": 33155 }, { "epoch": 0.2867247148749254, "grad_norm": 8.86092586280775, "learning_rate": 5.7008372473668676e-06, "loss": 0.10263214111328126, "step": 33160 }, { "epoch": 0.2867679483964687, "grad_norm": 1.4337327431728768, "learning_rate": 5.700748553402492e-06, "loss": 0.12291107177734376, "step": 33165 }, { "epoch": 0.28681118191801197, "grad_norm": 3.291314682204805, "learning_rate": 5.700659846982503e-06, "loss": 0.11803207397460938, "step": 33170 }, { "epoch": 0.2868544154395552, "grad_norm": 2.823085276543035, "learning_rate": 5.700571128107309e-06, "loss": 0.18003997802734376, "step": 33175 }, { "epoch": 0.2868976489610985, "grad_norm": 24.21976598430517, "learning_rate": 5.70048239677732e-06, "loss": 0.07034912109375, "step": 33180 }, { "epoch": 0.28694088248264177, "grad_norm": 35.034026735829315, "learning_rate": 5.700393652992945e-06, "loss": 0.37071685791015624, "step": 33185 }, { "epoch": 0.286984116004185, "grad_norm": 0.45867251863925473, "learning_rate": 5.700304896754593e-06, "loss": 0.20634613037109376, "step": 33190 }, { "epoch": 0.2870273495257283, "grad_norm": 7.502313170231103, "learning_rate": 5.700216128062673e-06, "loss": 0.1986724853515625, "step": 33195 }, { "epoch": 0.2870705830472715, "grad_norm": 11.715989020776934, "learning_rate": 5.700127346917595e-06, "loss": 0.2061450958251953, "step": 33200 }, { "epoch": 0.2871138165688148, "grad_norm": 0.28733133059838273, "learning_rate": 5.7000385533197685e-06, "loss": 0.14855194091796875, "step": 33205 }, { "epoch": 0.2871570500903581, "grad_norm": 6.510084066761343, "learning_rate": 5.6999497472696025e-06, "loss": 0.12490158081054688, "step": 33210 }, { "epoch": 0.2872002836119013, "grad_norm": 7.355307717148653, "learning_rate": 5.6998609287675075e-06, "loss": 0.13460693359375, "step": 33215 }, { "epoch": 0.2872435171334446, "grad_norm": 11.652206831404218, "learning_rate": 5.699772097813892e-06, "loss": 0.27275390625, "step": 33220 }, { "epoch": 0.2872867506549879, "grad_norm": 22.361416579811387, "learning_rate": 5.699683254409166e-06, "loss": 0.1928375244140625, "step": 33225 }, { "epoch": 0.2873299841765311, "grad_norm": 10.896575625226083, "learning_rate": 5.69959439855374e-06, "loss": 0.0882354736328125, "step": 33230 }, { "epoch": 0.2873732176980744, "grad_norm": 14.35052750837095, "learning_rate": 5.699505530248022e-06, "loss": 0.22389545440673828, "step": 33235 }, { "epoch": 0.2874164512196176, "grad_norm": 25.866104366934856, "learning_rate": 5.699416649492423e-06, "loss": 0.32111663818359376, "step": 33240 }, { "epoch": 0.2874596847411609, "grad_norm": 1.796756950219203, "learning_rate": 5.699327756287354e-06, "loss": 0.13537750244140626, "step": 33245 }, { "epoch": 0.2875029182627042, "grad_norm": 4.912939879095256, "learning_rate": 5.699238850633224e-06, "loss": 0.3700469970703125, "step": 33250 }, { "epoch": 0.2875461517842474, "grad_norm": 6.4775640134820485, "learning_rate": 5.699149932530442e-06, "loss": 0.05599822998046875, "step": 33255 }, { "epoch": 0.2875893853057907, "grad_norm": 30.851926761820266, "learning_rate": 5.69906100197942e-06, "loss": 0.2613250732421875, "step": 33260 }, { "epoch": 0.287632618827334, "grad_norm": 3.183724554821298, "learning_rate": 5.698972058980566e-06, "loss": 0.165869140625, "step": 33265 }, { "epoch": 0.2876758523488772, "grad_norm": 20.81642107308581, "learning_rate": 5.698883103534292e-06, "loss": 0.2375030517578125, "step": 33270 }, { "epoch": 0.2877190858704205, "grad_norm": 3.345573156257276, "learning_rate": 5.698794135641007e-06, "loss": 0.239898681640625, "step": 33275 }, { "epoch": 0.28776231939196373, "grad_norm": 27.932329252532806, "learning_rate": 5.698705155301123e-06, "loss": 0.22573623657226563, "step": 33280 }, { "epoch": 0.287805552913507, "grad_norm": 1.2752796381703333, "learning_rate": 5.698616162515049e-06, "loss": 0.426544189453125, "step": 33285 }, { "epoch": 0.2878487864350503, "grad_norm": 1.687700347843407, "learning_rate": 5.698527157283195e-06, "loss": 0.34682159423828124, "step": 33290 }, { "epoch": 0.2878920199565935, "grad_norm": 19.776252384870673, "learning_rate": 5.698438139605973e-06, "loss": 0.0898193359375, "step": 33295 }, { "epoch": 0.2879352534781368, "grad_norm": 3.6787266156824865, "learning_rate": 5.6983491094837925e-06, "loss": 0.49025115966796873, "step": 33300 }, { "epoch": 0.2879784869996801, "grad_norm": 0.1467937560962666, "learning_rate": 5.698260066917065e-06, "loss": 0.03944091796875, "step": 33305 }, { "epoch": 0.2880217205212233, "grad_norm": 5.44655701345261, "learning_rate": 5.698171011906199e-06, "loss": 0.089617919921875, "step": 33310 }, { "epoch": 0.2880649540427666, "grad_norm": 16.725080992701592, "learning_rate": 5.698081944451607e-06, "loss": 0.17837600708007811, "step": 33315 }, { "epoch": 0.28810818756430984, "grad_norm": 32.80055376160702, "learning_rate": 5.6979928645537e-06, "loss": 0.550518798828125, "step": 33320 }, { "epoch": 0.2881514210858531, "grad_norm": 36.670290127344686, "learning_rate": 5.697903772212888e-06, "loss": 0.4624908447265625, "step": 33325 }, { "epoch": 0.2881946546073964, "grad_norm": 40.30189345735284, "learning_rate": 5.697814667429583e-06, "loss": 0.1570037841796875, "step": 33330 }, { "epoch": 0.28823788812893963, "grad_norm": 1.1932194922743187, "learning_rate": 5.6977255502041945e-06, "loss": 0.07532958984375, "step": 33335 }, { "epoch": 0.2882811216504829, "grad_norm": 13.909136783683726, "learning_rate": 5.697636420537134e-06, "loss": 0.1241851806640625, "step": 33340 }, { "epoch": 0.2883243551720262, "grad_norm": 19.291544219883058, "learning_rate": 5.697547278428813e-06, "loss": 0.163677978515625, "step": 33345 }, { "epoch": 0.28836758869356943, "grad_norm": 26.486785382457292, "learning_rate": 5.697458123879642e-06, "loss": 0.297821044921875, "step": 33350 }, { "epoch": 0.2884108222151127, "grad_norm": 21.48693013049611, "learning_rate": 5.697368956890033e-06, "loss": 0.15594635009765626, "step": 33355 }, { "epoch": 0.288454055736656, "grad_norm": 9.080259934080875, "learning_rate": 5.697279777460397e-06, "loss": 0.23847808837890624, "step": 33360 }, { "epoch": 0.28849728925819923, "grad_norm": 2.462317008258063, "learning_rate": 5.697190585591144e-06, "loss": 0.25338058471679686, "step": 33365 }, { "epoch": 0.2885405227797425, "grad_norm": 20.124334745758528, "learning_rate": 5.697101381282686e-06, "loss": 0.17150802612304689, "step": 33370 }, { "epoch": 0.28858375630128574, "grad_norm": 24.68433887603342, "learning_rate": 5.697012164535436e-06, "loss": 0.3161865234375, "step": 33375 }, { "epoch": 0.288626989822829, "grad_norm": 43.81687863933759, "learning_rate": 5.696922935349804e-06, "loss": 0.24813079833984375, "step": 33380 }, { "epoch": 0.2886702233443723, "grad_norm": 9.258798472554933, "learning_rate": 5.6968336937262015e-06, "loss": 0.1300048828125, "step": 33385 }, { "epoch": 0.28871345686591554, "grad_norm": 34.58005685162237, "learning_rate": 5.696744439665039e-06, "loss": 0.20184326171875, "step": 33390 }, { "epoch": 0.2887566903874588, "grad_norm": 3.232677010843073, "learning_rate": 5.696655173166732e-06, "loss": 0.22578125, "step": 33395 }, { "epoch": 0.2887999239090021, "grad_norm": 3.2852715702498574, "learning_rate": 5.696565894231688e-06, "loss": 0.18096542358398438, "step": 33400 }, { "epoch": 0.28884315743054534, "grad_norm": 34.132754378702906, "learning_rate": 5.69647660286032e-06, "loss": 0.16133041381835939, "step": 33405 }, { "epoch": 0.2888863909520886, "grad_norm": 5.096482025915024, "learning_rate": 5.696387299053041e-06, "loss": 0.0630279541015625, "step": 33410 }, { "epoch": 0.28892962447363185, "grad_norm": 0.5015254246718197, "learning_rate": 5.696297982810262e-06, "loss": 0.19015789031982422, "step": 33415 }, { "epoch": 0.28897285799517514, "grad_norm": 4.186716216202129, "learning_rate": 5.696208654132395e-06, "loss": 0.36768798828125, "step": 33420 }, { "epoch": 0.2890160915167184, "grad_norm": 15.003822823678794, "learning_rate": 5.696119313019851e-06, "loss": 0.1642578125, "step": 33425 }, { "epoch": 0.28905932503826165, "grad_norm": 6.60205391963604, "learning_rate": 5.696029959473044e-06, "loss": 0.268017578125, "step": 33430 }, { "epoch": 0.28910255855980493, "grad_norm": 9.713377937653734, "learning_rate": 5.695940593492385e-06, "loss": 0.1553619384765625, "step": 33435 }, { "epoch": 0.2891457920813482, "grad_norm": 1.33378090556234, "learning_rate": 5.695851215078285e-06, "loss": 0.036540985107421875, "step": 33440 }, { "epoch": 0.28918902560289145, "grad_norm": 21.68980144983463, "learning_rate": 5.69576182423116e-06, "loss": 0.49278411865234373, "step": 33445 }, { "epoch": 0.28923225912443473, "grad_norm": 2.916957828881203, "learning_rate": 5.695672420951418e-06, "loss": 0.0891693115234375, "step": 33450 }, { "epoch": 0.28927549264597796, "grad_norm": 4.2863167586126485, "learning_rate": 5.6955830052394725e-06, "loss": 0.1130523681640625, "step": 33455 }, { "epoch": 0.28931872616752125, "grad_norm": 13.57729688519755, "learning_rate": 5.6954935770957366e-06, "loss": 0.5418354034423828, "step": 33460 }, { "epoch": 0.28936195968906453, "grad_norm": 6.170165732011743, "learning_rate": 5.695404136520622e-06, "loss": 0.24418869018554687, "step": 33465 }, { "epoch": 0.28940519321060776, "grad_norm": 2.8265396567769985, "learning_rate": 5.695314683514543e-06, "loss": 0.10562744140625, "step": 33470 }, { "epoch": 0.28944842673215104, "grad_norm": 0.31582033816882626, "learning_rate": 5.69522521807791e-06, "loss": 0.04518890380859375, "step": 33475 }, { "epoch": 0.2894916602536943, "grad_norm": 7.199936003277751, "learning_rate": 5.695135740211137e-06, "loss": 0.32108154296875, "step": 33480 }, { "epoch": 0.28953489377523756, "grad_norm": 18.777029510732294, "learning_rate": 5.6950462499146356e-06, "loss": 0.339483642578125, "step": 33485 }, { "epoch": 0.28957812729678084, "grad_norm": 6.194375342531329, "learning_rate": 5.69495674718882e-06, "loss": 0.13616943359375, "step": 33490 }, { "epoch": 0.28962136081832407, "grad_norm": 23.93353986066997, "learning_rate": 5.694867232034101e-06, "loss": 0.34452056884765625, "step": 33495 }, { "epoch": 0.28966459433986735, "grad_norm": 23.944275639927593, "learning_rate": 5.694777704450893e-06, "loss": 0.24107208251953124, "step": 33500 }, { "epoch": 0.28970782786141064, "grad_norm": 10.546905998745904, "learning_rate": 5.694688164439608e-06, "loss": 0.2106201171875, "step": 33505 }, { "epoch": 0.28975106138295387, "grad_norm": 5.091261181164597, "learning_rate": 5.69459861200066e-06, "loss": 0.08439178466796875, "step": 33510 }, { "epoch": 0.28979429490449715, "grad_norm": 14.143110852245126, "learning_rate": 5.69450904713446e-06, "loss": 0.030326461791992186, "step": 33515 }, { "epoch": 0.28983752842604044, "grad_norm": 2.78907604321231, "learning_rate": 5.694419469841423e-06, "loss": 0.04992218017578125, "step": 33520 }, { "epoch": 0.28988076194758366, "grad_norm": 5.489076114797912, "learning_rate": 5.6943298801219615e-06, "loss": 0.2540283203125, "step": 33525 }, { "epoch": 0.28992399546912695, "grad_norm": 2.5255763729510226, "learning_rate": 5.694240277976489e-06, "loss": 0.0812530517578125, "step": 33530 }, { "epoch": 0.2899672289906702, "grad_norm": 7.885947492233533, "learning_rate": 5.694150663405418e-06, "loss": 0.231622314453125, "step": 33535 }, { "epoch": 0.29001046251221346, "grad_norm": 7.507737552819898, "learning_rate": 5.694061036409162e-06, "loss": 0.1727294921875, "step": 33540 }, { "epoch": 0.29005369603375675, "grad_norm": 31.463606934135523, "learning_rate": 5.693971396988135e-06, "loss": 0.24188499450683593, "step": 33545 }, { "epoch": 0.2900969295553, "grad_norm": 0.5325576276002963, "learning_rate": 5.6938817451427504e-06, "loss": 0.513555908203125, "step": 33550 }, { "epoch": 0.29014016307684326, "grad_norm": 3.4972354987318917, "learning_rate": 5.69379208087342e-06, "loss": 0.3267333984375, "step": 33555 }, { "epoch": 0.29018339659838654, "grad_norm": 1.3304656296082424, "learning_rate": 5.6937024041805595e-06, "loss": 0.21893730163574218, "step": 33560 }, { "epoch": 0.2902266301199298, "grad_norm": 2.5249899163478995, "learning_rate": 5.693612715064581e-06, "loss": 0.05639801025390625, "step": 33565 }, { "epoch": 0.29026986364147306, "grad_norm": 13.749975165614652, "learning_rate": 5.693523013525899e-06, "loss": 0.134722900390625, "step": 33570 }, { "epoch": 0.29031309716301634, "grad_norm": 27.05753621652113, "learning_rate": 5.693433299564927e-06, "loss": 0.153125, "step": 33575 }, { "epoch": 0.29035633068455957, "grad_norm": 3.6704720592125124, "learning_rate": 5.693343573182079e-06, "loss": 0.2042266845703125, "step": 33580 }, { "epoch": 0.29039956420610286, "grad_norm": 19.104786091609917, "learning_rate": 5.693253834377767e-06, "loss": 0.531683349609375, "step": 33585 }, { "epoch": 0.2904427977276461, "grad_norm": 2.2092817209559152, "learning_rate": 5.693164083152407e-06, "loss": 0.01503753662109375, "step": 33590 }, { "epoch": 0.29048603124918937, "grad_norm": 9.526729610055352, "learning_rate": 5.6930743195064125e-06, "loss": 0.29969940185546873, "step": 33595 }, { "epoch": 0.29052926477073265, "grad_norm": 7.923243691089042, "learning_rate": 5.692984543440197e-06, "loss": 0.37840423583984373, "step": 33600 }, { "epoch": 0.2905724982922759, "grad_norm": 9.880239017165346, "learning_rate": 5.6928947549541745e-06, "loss": 0.2914886474609375, "step": 33605 }, { "epoch": 0.29061573181381917, "grad_norm": 0.408415590263555, "learning_rate": 5.69280495404876e-06, "loss": 0.16610565185546874, "step": 33610 }, { "epoch": 0.29065896533536245, "grad_norm": 13.436570213033889, "learning_rate": 5.6927151407243665e-06, "loss": 0.36967926025390624, "step": 33615 }, { "epoch": 0.2907021988569057, "grad_norm": 16.275852266303925, "learning_rate": 5.692625314981409e-06, "loss": 0.26663360595703123, "step": 33620 }, { "epoch": 0.29074543237844896, "grad_norm": 5.636604508723783, "learning_rate": 5.692535476820302e-06, "loss": 0.19129791259765624, "step": 33625 }, { "epoch": 0.2907886658999922, "grad_norm": 8.124942583638502, "learning_rate": 5.692445626241458e-06, "loss": 0.545587158203125, "step": 33630 }, { "epoch": 0.2908318994215355, "grad_norm": 26.246185598257764, "learning_rate": 5.692355763245294e-06, "loss": 0.3249603271484375, "step": 33635 }, { "epoch": 0.29087513294307876, "grad_norm": 2.8557441892263777, "learning_rate": 5.692265887832222e-06, "loss": 0.10539360046386718, "step": 33640 }, { "epoch": 0.290918366464622, "grad_norm": 10.818371188297617, "learning_rate": 5.692176000002658e-06, "loss": 0.07129325866699218, "step": 33645 }, { "epoch": 0.2909615999861653, "grad_norm": 1.508668325354867, "learning_rate": 5.692086099757015e-06, "loss": 0.22872467041015626, "step": 33650 }, { "epoch": 0.29100483350770856, "grad_norm": 3.000721240022243, "learning_rate": 5.69199618709571e-06, "loss": 0.2736083984375, "step": 33655 }, { "epoch": 0.2910480670292518, "grad_norm": 4.929571976237845, "learning_rate": 5.691906262019157e-06, "loss": 0.0923919677734375, "step": 33660 }, { "epoch": 0.2910913005507951, "grad_norm": 4.405596126428721, "learning_rate": 5.691816324527769e-06, "loss": 0.0675323486328125, "step": 33665 }, { "epoch": 0.2911345340723383, "grad_norm": 28.587510942197166, "learning_rate": 5.691726374621962e-06, "loss": 0.23180694580078126, "step": 33670 }, { "epoch": 0.2911777675938816, "grad_norm": 3.628662738403044, "learning_rate": 5.691636412302152e-06, "loss": 0.16596603393554688, "step": 33675 }, { "epoch": 0.29122100111542487, "grad_norm": 4.361315031070349, "learning_rate": 5.6915464375687515e-06, "loss": 0.437164306640625, "step": 33680 }, { "epoch": 0.2912642346369681, "grad_norm": 4.013227645446693, "learning_rate": 5.691456450422178e-06, "loss": 0.0687896728515625, "step": 33685 }, { "epoch": 0.2913074681585114, "grad_norm": 14.588657836106961, "learning_rate": 5.691366450862845e-06, "loss": 0.49312667846679686, "step": 33690 }, { "epoch": 0.29135070168005467, "grad_norm": 9.164641040421909, "learning_rate": 5.691276438891167e-06, "loss": 0.32521514892578124, "step": 33695 }, { "epoch": 0.2913939352015979, "grad_norm": 3.991912810493246, "learning_rate": 5.691186414507559e-06, "loss": 0.24092884063720704, "step": 33700 }, { "epoch": 0.2914371687231412, "grad_norm": 2.645085582618612, "learning_rate": 5.691096377712438e-06, "loss": 0.0641845703125, "step": 33705 }, { "epoch": 0.2914804022446844, "grad_norm": 8.084780044095792, "learning_rate": 5.6910063285062185e-06, "loss": 0.13496856689453124, "step": 33710 }, { "epoch": 0.2915236357662277, "grad_norm": 134.07129872630844, "learning_rate": 5.6909162668893155e-06, "loss": 0.392938232421875, "step": 33715 }, { "epoch": 0.291566869287771, "grad_norm": 38.57223656053756, "learning_rate": 5.690826192862145e-06, "loss": 0.199462890625, "step": 33720 }, { "epoch": 0.2916101028093142, "grad_norm": 2.313493366829992, "learning_rate": 5.690736106425121e-06, "loss": 0.44449691772460936, "step": 33725 }, { "epoch": 0.2916533363308575, "grad_norm": 12.655942185968831, "learning_rate": 5.69064600757866e-06, "loss": 0.1485118865966797, "step": 33730 }, { "epoch": 0.2916965698524008, "grad_norm": 0.3271509957653004, "learning_rate": 5.690555896323177e-06, "loss": 0.06971893310546876, "step": 33735 }, { "epoch": 0.291739803373944, "grad_norm": 0.24233907139442837, "learning_rate": 5.69046577265909e-06, "loss": 0.27260665893554686, "step": 33740 }, { "epoch": 0.2917830368954873, "grad_norm": 9.410682600276735, "learning_rate": 5.69037563658681e-06, "loss": 0.0853912353515625, "step": 33745 }, { "epoch": 0.2918262704170306, "grad_norm": 13.520227041576687, "learning_rate": 5.690285488106756e-06, "loss": 0.3064117431640625, "step": 33750 }, { "epoch": 0.2918695039385738, "grad_norm": 40.99836841676305, "learning_rate": 5.690195327219344e-06, "loss": 0.442041015625, "step": 33755 }, { "epoch": 0.2919127374601171, "grad_norm": 7.904087635178896, "learning_rate": 5.690105153924987e-06, "loss": 0.188446044921875, "step": 33760 }, { "epoch": 0.2919559709816603, "grad_norm": 9.926373351183747, "learning_rate": 5.690014968224104e-06, "loss": 0.12303695678710938, "step": 33765 }, { "epoch": 0.2919992045032036, "grad_norm": 3.844399839083764, "learning_rate": 5.68992477011711e-06, "loss": 0.27854766845703127, "step": 33770 }, { "epoch": 0.2920424380247469, "grad_norm": 3.429417357421809, "learning_rate": 5.6898345596044205e-06, "loss": 0.24515380859375, "step": 33775 }, { "epoch": 0.2920856715462901, "grad_norm": 12.434072533942471, "learning_rate": 5.689744336686451e-06, "loss": 0.28612060546875, "step": 33780 }, { "epoch": 0.2921289050678334, "grad_norm": 4.378553436530737, "learning_rate": 5.6896541013636196e-06, "loss": 0.4055419921875, "step": 33785 }, { "epoch": 0.2921721385893767, "grad_norm": 1.8493645500341631, "learning_rate": 5.689563853636339e-06, "loss": 0.16937103271484374, "step": 33790 }, { "epoch": 0.2922153721109199, "grad_norm": 5.316200116932208, "learning_rate": 5.6894735935050296e-06, "loss": 0.12227096557617187, "step": 33795 }, { "epoch": 0.2922586056324632, "grad_norm": 0.2730232463362653, "learning_rate": 5.689383320970105e-06, "loss": 0.3871063232421875, "step": 33800 }, { "epoch": 0.2923018391540064, "grad_norm": 0.6425868634123458, "learning_rate": 5.689293036031982e-06, "loss": 0.15066070556640626, "step": 33805 }, { "epoch": 0.2923450726755497, "grad_norm": 5.1012539302122715, "learning_rate": 5.6892027386910765e-06, "loss": 0.21847000122070312, "step": 33810 }, { "epoch": 0.292388306197093, "grad_norm": 5.639900325042001, "learning_rate": 5.689112428947806e-06, "loss": 0.11846046447753907, "step": 33815 }, { "epoch": 0.2924315397186362, "grad_norm": 11.59586394764112, "learning_rate": 5.689022106802587e-06, "loss": 0.12459449768066407, "step": 33820 }, { "epoch": 0.2924747732401795, "grad_norm": 16.527822174185392, "learning_rate": 5.688931772255836e-06, "loss": 0.13749771118164061, "step": 33825 }, { "epoch": 0.2925180067617228, "grad_norm": 5.4111887649577195, "learning_rate": 5.688841425307969e-06, "loss": 0.1644317626953125, "step": 33830 }, { "epoch": 0.292561240283266, "grad_norm": 15.576471386756012, "learning_rate": 5.688751065959402e-06, "loss": 0.456744384765625, "step": 33835 }, { "epoch": 0.2926044738048093, "grad_norm": 16.96352048643928, "learning_rate": 5.688660694210553e-06, "loss": 0.19111709594726561, "step": 33840 }, { "epoch": 0.29264770732635254, "grad_norm": 59.52946436625829, "learning_rate": 5.6885703100618395e-06, "loss": 0.439794921875, "step": 33845 }, { "epoch": 0.2926909408478958, "grad_norm": 2.7005805774012472, "learning_rate": 5.688479913513677e-06, "loss": 0.1333984375, "step": 33850 }, { "epoch": 0.2927341743694391, "grad_norm": 11.98865809268237, "learning_rate": 5.688389504566482e-06, "loss": 0.2267303466796875, "step": 33855 }, { "epoch": 0.29277740789098233, "grad_norm": 5.477070448839911, "learning_rate": 5.688299083220672e-06, "loss": 0.11705169677734376, "step": 33860 }, { "epoch": 0.2928206414125256, "grad_norm": 40.34040083196526, "learning_rate": 5.688208649476665e-06, "loss": 0.21879196166992188, "step": 33865 }, { "epoch": 0.2928638749340689, "grad_norm": 9.343129345294374, "learning_rate": 5.688118203334877e-06, "loss": 0.3324981689453125, "step": 33870 }, { "epoch": 0.29290710845561213, "grad_norm": 34.956997704746705, "learning_rate": 5.688027744795725e-06, "loss": 0.22610321044921874, "step": 33875 }, { "epoch": 0.2929503419771554, "grad_norm": 20.675501359881068, "learning_rate": 5.687937273859627e-06, "loss": 0.1704864501953125, "step": 33880 }, { "epoch": 0.29299357549869864, "grad_norm": 1.413621784842577, "learning_rate": 5.687846790527e-06, "loss": 0.24580230712890624, "step": 33885 }, { "epoch": 0.29303680902024193, "grad_norm": 9.672764228241178, "learning_rate": 5.687756294798259e-06, "loss": 0.344970703125, "step": 33890 }, { "epoch": 0.2930800425417852, "grad_norm": 38.25272097317197, "learning_rate": 5.687665786673826e-06, "loss": 0.5703887939453125, "step": 33895 }, { "epoch": 0.29312327606332844, "grad_norm": 0.4040594021465325, "learning_rate": 5.687575266154115e-06, "loss": 0.42715301513671877, "step": 33900 }, { "epoch": 0.2931665095848717, "grad_norm": 3.5191216197817425, "learning_rate": 5.687484733239545e-06, "loss": 0.15567169189453126, "step": 33905 }, { "epoch": 0.293209743106415, "grad_norm": 7.635931267792977, "learning_rate": 5.687394187930532e-06, "loss": 0.30401077270507815, "step": 33910 }, { "epoch": 0.29325297662795824, "grad_norm": 28.322175474657065, "learning_rate": 5.687303630227495e-06, "loss": 0.18917083740234375, "step": 33915 }, { "epoch": 0.2932962101495015, "grad_norm": 6.657547462646137, "learning_rate": 5.68721306013085e-06, "loss": 0.3208274841308594, "step": 33920 }, { "epoch": 0.2933394436710448, "grad_norm": 1.1134911161539522, "learning_rate": 5.687122477641017e-06, "loss": 0.20806884765625, "step": 33925 }, { "epoch": 0.29338267719258804, "grad_norm": 5.2319867764394115, "learning_rate": 5.687031882758412e-06, "loss": 0.2698211669921875, "step": 33930 }, { "epoch": 0.2934259107141313, "grad_norm": 21.38336559449887, "learning_rate": 5.686941275483454e-06, "loss": 0.338299560546875, "step": 33935 }, { "epoch": 0.29346914423567455, "grad_norm": 3.5925079203907013, "learning_rate": 5.68685065581656e-06, "loss": 0.17408294677734376, "step": 33940 }, { "epoch": 0.29351237775721783, "grad_norm": 26.33147021444267, "learning_rate": 5.686760023758148e-06, "loss": 0.4359649658203125, "step": 33945 }, { "epoch": 0.2935556112787611, "grad_norm": 1.5468080303946978, "learning_rate": 5.686669379308635e-06, "loss": 0.1898193359375, "step": 33950 }, { "epoch": 0.29359884480030435, "grad_norm": 5.645011075529381, "learning_rate": 5.686578722468442e-06, "loss": 0.09316864013671874, "step": 33955 }, { "epoch": 0.29364207832184763, "grad_norm": 0.24864074033220662, "learning_rate": 5.686488053237985e-06, "loss": 0.13046112060546874, "step": 33960 }, { "epoch": 0.2936853118433909, "grad_norm": 0.23956080570536833, "learning_rate": 5.686397371617682e-06, "loss": 0.14480094909667968, "step": 33965 }, { "epoch": 0.29372854536493415, "grad_norm": 13.147845593609391, "learning_rate": 5.6863066776079525e-06, "loss": 0.34705810546875, "step": 33970 }, { "epoch": 0.29377177888647743, "grad_norm": 43.803274747549075, "learning_rate": 5.686215971209213e-06, "loss": 0.25447998046875, "step": 33975 }, { "epoch": 0.29381501240802066, "grad_norm": 35.2382699145814, "learning_rate": 5.686125252421884e-06, "loss": 0.341815185546875, "step": 33980 }, { "epoch": 0.29385824592956394, "grad_norm": 3.8452721251125874, "learning_rate": 5.686034521246381e-06, "loss": 0.40015411376953125, "step": 33985 }, { "epoch": 0.29390147945110723, "grad_norm": 4.839794650517175, "learning_rate": 5.685943777683125e-06, "loss": 0.38532257080078125, "step": 33990 }, { "epoch": 0.29394471297265046, "grad_norm": 4.104657477092446, "learning_rate": 5.685853021732534e-06, "loss": 0.1909393310546875, "step": 33995 }, { "epoch": 0.29398794649419374, "grad_norm": 18.172523711512433, "learning_rate": 5.6857622533950265e-06, "loss": 0.13529586791992188, "step": 34000 }, { "epoch": 0.294031180015737, "grad_norm": 14.10938740983107, "learning_rate": 5.68567147267102e-06, "loss": 0.080902099609375, "step": 34005 }, { "epoch": 0.29407441353728025, "grad_norm": 55.40604637479861, "learning_rate": 5.685580679560934e-06, "loss": 0.32576828002929686, "step": 34010 }, { "epoch": 0.29411764705882354, "grad_norm": 12.136575535808388, "learning_rate": 5.685489874065187e-06, "loss": 0.2473602294921875, "step": 34015 }, { "epoch": 0.29416088058036677, "grad_norm": 1.8969529120146893, "learning_rate": 5.685399056184199e-06, "loss": 0.225347900390625, "step": 34020 }, { "epoch": 0.29420411410191005, "grad_norm": 2.3659718787043875, "learning_rate": 5.685308225918386e-06, "loss": 0.0818206787109375, "step": 34025 }, { "epoch": 0.29424734762345334, "grad_norm": 18.420259077493174, "learning_rate": 5.68521738326817e-06, "loss": 0.2438568115234375, "step": 34030 }, { "epoch": 0.29429058114499657, "grad_norm": 12.926299362743634, "learning_rate": 5.685126528233969e-06, "loss": 0.28155059814453126, "step": 34035 }, { "epoch": 0.29433381466653985, "grad_norm": 0.7162991100298077, "learning_rate": 5.685035660816202e-06, "loss": 0.14172592163085937, "step": 34040 }, { "epoch": 0.29437704818808313, "grad_norm": 0.9194917162652421, "learning_rate": 5.6849447810152865e-06, "loss": 0.140155029296875, "step": 34045 }, { "epoch": 0.29442028170962636, "grad_norm": 1.8165216417729684, "learning_rate": 5.684853888831644e-06, "loss": 0.16573543548583985, "step": 34050 }, { "epoch": 0.29446351523116965, "grad_norm": 1.2107875388025, "learning_rate": 5.684762984265692e-06, "loss": 0.0446014404296875, "step": 34055 }, { "epoch": 0.2945067487527129, "grad_norm": 8.6288133630707, "learning_rate": 5.68467206731785e-06, "loss": 0.11940231323242187, "step": 34060 }, { "epoch": 0.29454998227425616, "grad_norm": 2.449658723998932, "learning_rate": 5.6845811379885385e-06, "loss": 0.0643829345703125, "step": 34065 }, { "epoch": 0.29459321579579945, "grad_norm": 21.06712399402287, "learning_rate": 5.684490196278176e-06, "loss": 0.3224395751953125, "step": 34070 }, { "epoch": 0.2946364493173427, "grad_norm": 3.3523998965019652, "learning_rate": 5.684399242187181e-06, "loss": 0.1151519775390625, "step": 34075 }, { "epoch": 0.29467968283888596, "grad_norm": 0.3915787207905951, "learning_rate": 5.6843082757159745e-06, "loss": 0.6311553955078125, "step": 34080 }, { "epoch": 0.29472291636042924, "grad_norm": 35.35248187597768, "learning_rate": 5.6842172968649754e-06, "loss": 0.32987060546875, "step": 34085 }, { "epoch": 0.2947661498819725, "grad_norm": 6.846668319254251, "learning_rate": 5.684126305634603e-06, "loss": 0.2558319091796875, "step": 34090 }, { "epoch": 0.29480938340351576, "grad_norm": 0.6418094364235108, "learning_rate": 5.684035302025278e-06, "loss": 0.10352935791015624, "step": 34095 }, { "epoch": 0.29485261692505904, "grad_norm": 17.263927129520553, "learning_rate": 5.683944286037418e-06, "loss": 0.1230621337890625, "step": 34100 }, { "epoch": 0.29489585044660227, "grad_norm": 20.08906139491862, "learning_rate": 5.683853257671446e-06, "loss": 0.156988525390625, "step": 34105 }, { "epoch": 0.29493908396814555, "grad_norm": 5.58389938350375, "learning_rate": 5.683762216927779e-06, "loss": 0.13759841918945312, "step": 34110 }, { "epoch": 0.2949823174896888, "grad_norm": 12.999744540284329, "learning_rate": 5.683671163806837e-06, "loss": 0.10542869567871094, "step": 34115 }, { "epoch": 0.29502555101123207, "grad_norm": 17.196896287618006, "learning_rate": 5.683580098309042e-06, "loss": 0.14595489501953124, "step": 34120 }, { "epoch": 0.29506878453277535, "grad_norm": 6.31097162507488, "learning_rate": 5.683489020434812e-06, "loss": 0.407257080078125, "step": 34125 }, { "epoch": 0.2951120180543186, "grad_norm": 23.003290563648328, "learning_rate": 5.683397930184568e-06, "loss": 0.20386962890625, "step": 34130 }, { "epoch": 0.29515525157586187, "grad_norm": 17.06652143482866, "learning_rate": 5.68330682755873e-06, "loss": 0.36103515625, "step": 34135 }, { "epoch": 0.29519848509740515, "grad_norm": 13.282906363451644, "learning_rate": 5.683215712557717e-06, "loss": 0.2558174133300781, "step": 34140 }, { "epoch": 0.2952417186189484, "grad_norm": 15.546696066532103, "learning_rate": 5.6831245851819516e-06, "loss": 0.17690095901489258, "step": 34145 }, { "epoch": 0.29528495214049166, "grad_norm": 5.743101324716272, "learning_rate": 5.683033445431852e-06, "loss": 0.1181060791015625, "step": 34150 }, { "epoch": 0.2953281856620349, "grad_norm": 1.7501013892855801, "learning_rate": 5.682942293307839e-06, "loss": 0.13154144287109376, "step": 34155 }, { "epoch": 0.2953714191835782, "grad_norm": 5.07864116973134, "learning_rate": 5.6828511288103336e-06, "loss": 0.21521434783935547, "step": 34160 }, { "epoch": 0.29541465270512146, "grad_norm": 22.156601165576543, "learning_rate": 5.682759951939756e-06, "loss": 0.1688873291015625, "step": 34165 }, { "epoch": 0.2954578862266647, "grad_norm": 23.11200124796792, "learning_rate": 5.682668762696527e-06, "loss": 0.20124855041503906, "step": 34170 }, { "epoch": 0.295501119748208, "grad_norm": 3.6683940148154224, "learning_rate": 5.682577561081065e-06, "loss": 0.141986083984375, "step": 34175 }, { "epoch": 0.29554435326975126, "grad_norm": 4.284751441528241, "learning_rate": 5.682486347093794e-06, "loss": 0.0601165771484375, "step": 34180 }, { "epoch": 0.2955875867912945, "grad_norm": 1.4019928602299427, "learning_rate": 5.682395120735132e-06, "loss": 0.1020904541015625, "step": 34185 }, { "epoch": 0.29563082031283777, "grad_norm": 3.115505005304172, "learning_rate": 5.682303882005501e-06, "loss": 0.44250946044921874, "step": 34190 }, { "epoch": 0.295674053834381, "grad_norm": 34.97788719600421, "learning_rate": 5.682212630905321e-06, "loss": 0.238739013671875, "step": 34195 }, { "epoch": 0.2957172873559243, "grad_norm": 1.0054118575059467, "learning_rate": 5.682121367435014e-06, "loss": 0.1296478271484375, "step": 34200 }, { "epoch": 0.29576052087746757, "grad_norm": 7.784437961975888, "learning_rate": 5.682030091595e-06, "loss": 0.10695819854736328, "step": 34205 }, { "epoch": 0.2958037543990108, "grad_norm": 39.89565531813655, "learning_rate": 5.6819388033857015e-06, "loss": 0.56259765625, "step": 34210 }, { "epoch": 0.2958469879205541, "grad_norm": 2.4983971692418145, "learning_rate": 5.681847502807537e-06, "loss": 0.04539699554443359, "step": 34215 }, { "epoch": 0.29589022144209737, "grad_norm": 8.287083852530335, "learning_rate": 5.681756189860929e-06, "loss": 0.0752105712890625, "step": 34220 }, { "epoch": 0.2959334549636406, "grad_norm": 2.75822736404354, "learning_rate": 5.681664864546298e-06, "loss": 0.14481658935546876, "step": 34225 }, { "epoch": 0.2959766884851839, "grad_norm": 9.36235035706145, "learning_rate": 5.681573526864066e-06, "loss": 0.2532695770263672, "step": 34230 }, { "epoch": 0.2960199220067271, "grad_norm": 21.19876292328851, "learning_rate": 5.681482176814654e-06, "loss": 0.33121337890625, "step": 34235 }, { "epoch": 0.2960631555282704, "grad_norm": 9.223508673573031, "learning_rate": 5.681390814398483e-06, "loss": 0.3188873291015625, "step": 34240 }, { "epoch": 0.2961063890498137, "grad_norm": 9.567362984959969, "learning_rate": 5.681299439615974e-06, "loss": 0.40946044921875, "step": 34245 }, { "epoch": 0.2961496225713569, "grad_norm": 11.878766863970394, "learning_rate": 5.681208052467549e-06, "loss": 0.0675628662109375, "step": 34250 }, { "epoch": 0.2961928560929002, "grad_norm": 6.148994254407652, "learning_rate": 5.68111665295363e-06, "loss": 0.24381256103515625, "step": 34255 }, { "epoch": 0.2962360896144435, "grad_norm": 0.8753111442712252, "learning_rate": 5.681025241074638e-06, "loss": 0.5514907836914062, "step": 34260 }, { "epoch": 0.2962793231359867, "grad_norm": 21.700161157475442, "learning_rate": 5.680933816830994e-06, "loss": 0.1461273193359375, "step": 34265 }, { "epoch": 0.29632255665753, "grad_norm": 1.4734905642773652, "learning_rate": 5.680842380223119e-06, "loss": 0.04891357421875, "step": 34270 }, { "epoch": 0.2963657901790732, "grad_norm": 0.3609329973194765, "learning_rate": 5.680750931251437e-06, "loss": 0.2005706787109375, "step": 34275 }, { "epoch": 0.2964090237006165, "grad_norm": 21.55272509126759, "learning_rate": 5.680659469916369e-06, "loss": 0.105352783203125, "step": 34280 }, { "epoch": 0.2964522572221598, "grad_norm": 2.10306861007823, "learning_rate": 5.680567996218337e-06, "loss": 0.08672027587890625, "step": 34285 }, { "epoch": 0.296495490743703, "grad_norm": 11.74895068905094, "learning_rate": 5.6804765101577605e-06, "loss": 0.45545654296875, "step": 34290 }, { "epoch": 0.2965387242652463, "grad_norm": 31.03225430298496, "learning_rate": 5.680385011735064e-06, "loss": 0.20176467895507813, "step": 34295 }, { "epoch": 0.2965819577867896, "grad_norm": 4.131975404243687, "learning_rate": 5.6802935009506685e-06, "loss": 0.04156417846679687, "step": 34300 }, { "epoch": 0.2966251913083328, "grad_norm": 0.047747252006656776, "learning_rate": 5.6802019778049965e-06, "loss": 0.3324871063232422, "step": 34305 }, { "epoch": 0.2966684248298761, "grad_norm": 58.60179253271194, "learning_rate": 5.68011044229847e-06, "loss": 0.507159423828125, "step": 34310 }, { "epoch": 0.2967116583514194, "grad_norm": 13.959632566991315, "learning_rate": 5.680018894431511e-06, "loss": 0.32169952392578127, "step": 34315 }, { "epoch": 0.2967548918729626, "grad_norm": 6.429618728105332, "learning_rate": 5.6799273342045405e-06, "loss": 0.1150054931640625, "step": 34320 }, { "epoch": 0.2967981253945059, "grad_norm": 40.812462889666676, "learning_rate": 5.679835761617982e-06, "loss": 0.3071929931640625, "step": 34325 }, { "epoch": 0.2968413589160491, "grad_norm": 13.353441074687801, "learning_rate": 5.679744176672259e-06, "loss": 0.12593994140625, "step": 34330 }, { "epoch": 0.2968845924375924, "grad_norm": 4.470456500255615, "learning_rate": 5.679652579367793e-06, "loss": 0.0522186279296875, "step": 34335 }, { "epoch": 0.2969278259591357, "grad_norm": 62.60030785753438, "learning_rate": 5.679560969705005e-06, "loss": 0.21423492431640626, "step": 34340 }, { "epoch": 0.2969710594806789, "grad_norm": 3.0391391768986287, "learning_rate": 5.679469347684318e-06, "loss": 0.4622894287109375, "step": 34345 }, { "epoch": 0.2970142930022222, "grad_norm": 1.9561560214752416, "learning_rate": 5.679377713306156e-06, "loss": 0.21803817749023438, "step": 34350 }, { "epoch": 0.2970575265237655, "grad_norm": 18.543090806398187, "learning_rate": 5.67928606657094e-06, "loss": 0.20023193359375, "step": 34355 }, { "epoch": 0.2971007600453087, "grad_norm": 22.888253065097924, "learning_rate": 5.679194407479095e-06, "loss": 0.08294296264648438, "step": 34360 }, { "epoch": 0.297143993566852, "grad_norm": 1.6246335174440751, "learning_rate": 5.6791027360310405e-06, "loss": 0.08369789123535157, "step": 34365 }, { "epoch": 0.29718722708839523, "grad_norm": 3.9100664944648402, "learning_rate": 5.679011052227202e-06, "loss": 0.2866161346435547, "step": 34370 }, { "epoch": 0.2972304606099385, "grad_norm": 13.509495425518981, "learning_rate": 5.678919356068001e-06, "loss": 0.34231929779052733, "step": 34375 }, { "epoch": 0.2972736941314818, "grad_norm": 3.398321641550167, "learning_rate": 5.67882764755386e-06, "loss": 0.3171539306640625, "step": 34380 }, { "epoch": 0.29731692765302503, "grad_norm": 38.74897620506276, "learning_rate": 5.678735926685203e-06, "loss": 0.24707412719726562, "step": 34385 }, { "epoch": 0.2973601611745683, "grad_norm": 12.416307531425028, "learning_rate": 5.678644193462453e-06, "loss": 0.11701202392578125, "step": 34390 }, { "epoch": 0.2974033946961116, "grad_norm": 19.453784388921104, "learning_rate": 5.678552447886032e-06, "loss": 0.328179931640625, "step": 34395 }, { "epoch": 0.29744662821765483, "grad_norm": 7.198784571500187, "learning_rate": 5.6784606899563645e-06, "loss": 0.8016189575195313, "step": 34400 }, { "epoch": 0.2974898617391981, "grad_norm": 9.673107462005666, "learning_rate": 5.6783689196738725e-06, "loss": 0.28012847900390625, "step": 34405 }, { "epoch": 0.29753309526074134, "grad_norm": 0.4104691456174364, "learning_rate": 5.6782771370389795e-06, "loss": 0.04216842651367188, "step": 34410 }, { "epoch": 0.2975763287822846, "grad_norm": 15.147691933489211, "learning_rate": 5.678185342052109e-06, "loss": 0.170208740234375, "step": 34415 }, { "epoch": 0.2976195623038279, "grad_norm": 0.7524652879942642, "learning_rate": 5.678093534713685e-06, "loss": 0.20321044921875, "step": 34420 }, { "epoch": 0.29766279582537114, "grad_norm": 1.2660668088732354, "learning_rate": 5.67800171502413e-06, "loss": 0.1063323974609375, "step": 34425 }, { "epoch": 0.2977060293469144, "grad_norm": 22.467690407474382, "learning_rate": 5.677909882983867e-06, "loss": 0.28209228515625, "step": 34430 }, { "epoch": 0.2977492628684577, "grad_norm": 11.441400207283769, "learning_rate": 5.677818038593322e-06, "loss": 0.122015380859375, "step": 34435 }, { "epoch": 0.29779249639000094, "grad_norm": 1.6436835697514547, "learning_rate": 5.6777261818529155e-06, "loss": 0.13248748779296876, "step": 34440 }, { "epoch": 0.2978357299115442, "grad_norm": 3.6078592265604885, "learning_rate": 5.677634312763073e-06, "loss": 0.187701416015625, "step": 34445 }, { "epoch": 0.29787896343308745, "grad_norm": 1.9911268760350629, "learning_rate": 5.677542431324217e-06, "loss": 0.0812835693359375, "step": 34450 }, { "epoch": 0.29792219695463074, "grad_norm": 28.999465213982525, "learning_rate": 5.677450537536772e-06, "loss": 0.2211458206176758, "step": 34455 }, { "epoch": 0.297965430476174, "grad_norm": 9.678687897013555, "learning_rate": 5.677358631401163e-06, "loss": 0.19158935546875, "step": 34460 }, { "epoch": 0.29800866399771725, "grad_norm": 50.62278891633109, "learning_rate": 5.677266712917811e-06, "loss": 0.33668212890625, "step": 34465 }, { "epoch": 0.29805189751926053, "grad_norm": 6.504494161087869, "learning_rate": 5.677174782087142e-06, "loss": 0.15554962158203126, "step": 34470 }, { "epoch": 0.2980951310408038, "grad_norm": 11.005933886621666, "learning_rate": 5.677082838909579e-06, "loss": 0.14350967407226561, "step": 34475 }, { "epoch": 0.29813836456234705, "grad_norm": 7.768771650049046, "learning_rate": 5.6769908833855475e-06, "loss": 0.16822433471679688, "step": 34480 }, { "epoch": 0.29818159808389033, "grad_norm": 0.5951340762700712, "learning_rate": 5.676898915515469e-06, "loss": 0.14523849487304688, "step": 34485 }, { "epoch": 0.2982248316054336, "grad_norm": 35.182634378895756, "learning_rate": 5.67680693529977e-06, "loss": 0.41044464111328127, "step": 34490 }, { "epoch": 0.29826806512697684, "grad_norm": 0.8652133818112682, "learning_rate": 5.676714942738875e-06, "loss": 0.2955963134765625, "step": 34495 }, { "epoch": 0.29831129864852013, "grad_norm": 12.082467837740797, "learning_rate": 5.676622937833206e-06, "loss": 0.21320343017578125, "step": 34500 }, { "epoch": 0.29835453217006336, "grad_norm": 2.803133544266185, "learning_rate": 5.676530920583189e-06, "loss": 0.1234130859375, "step": 34505 }, { "epoch": 0.29839776569160664, "grad_norm": 1.7632419281840805, "learning_rate": 5.676438890989248e-06, "loss": 0.22994384765625, "step": 34510 }, { "epoch": 0.2984409992131499, "grad_norm": 5.697340060472345, "learning_rate": 5.676346849051807e-06, "loss": 0.07831039428710937, "step": 34515 }, { "epoch": 0.29848423273469316, "grad_norm": 2.616508870277796, "learning_rate": 5.676254794771291e-06, "loss": 0.2387420654296875, "step": 34520 }, { "epoch": 0.29852746625623644, "grad_norm": 7.205421598668792, "learning_rate": 5.676162728148124e-06, "loss": 0.1625091552734375, "step": 34525 }, { "epoch": 0.2985706997777797, "grad_norm": 2.5047331907602457, "learning_rate": 5.676070649182732e-06, "loss": 0.3664947509765625, "step": 34530 }, { "epoch": 0.29861393329932295, "grad_norm": 2.035022961761645, "learning_rate": 5.675978557875537e-06, "loss": 0.1472320556640625, "step": 34535 }, { "epoch": 0.29865716682086624, "grad_norm": 11.897066730237764, "learning_rate": 5.675886454226966e-06, "loss": 0.23612747192382813, "step": 34540 }, { "epoch": 0.29870040034240947, "grad_norm": 13.853343616048612, "learning_rate": 5.675794338237444e-06, "loss": 0.08306503295898438, "step": 34545 }, { "epoch": 0.29874363386395275, "grad_norm": 19.312965731138263, "learning_rate": 5.675702209907395e-06, "loss": 0.18164825439453125, "step": 34550 }, { "epoch": 0.29878686738549604, "grad_norm": 15.056555624681003, "learning_rate": 5.675610069237242e-06, "loss": 0.07759246826171876, "step": 34555 }, { "epoch": 0.29883010090703926, "grad_norm": 4.548061263730798, "learning_rate": 5.675517916227413e-06, "loss": 0.158953857421875, "step": 34560 }, { "epoch": 0.29887333442858255, "grad_norm": 3.9621887797949467, "learning_rate": 5.675425750878332e-06, "loss": 0.025885009765625, "step": 34565 }, { "epoch": 0.29891656795012583, "grad_norm": 33.33086173406417, "learning_rate": 5.675333573190423e-06, "loss": 0.12742462158203124, "step": 34570 }, { "epoch": 0.29895980147166906, "grad_norm": 46.05595930112337, "learning_rate": 5.675241383164114e-06, "loss": 0.2804561614990234, "step": 34575 }, { "epoch": 0.29900303499321235, "grad_norm": 0.9214966860443448, "learning_rate": 5.675149180799826e-06, "loss": 0.12234840393066407, "step": 34580 }, { "epoch": 0.2990462685147556, "grad_norm": 14.306659130504734, "learning_rate": 5.6750569660979865e-06, "loss": 0.1038848876953125, "step": 34585 }, { "epoch": 0.29908950203629886, "grad_norm": 22.97934735918612, "learning_rate": 5.674964739059021e-06, "loss": 0.1753662109375, "step": 34590 }, { "epoch": 0.29913273555784214, "grad_norm": 29.850719395759175, "learning_rate": 5.674872499683356e-06, "loss": 0.16058502197265626, "step": 34595 }, { "epoch": 0.2991759690793854, "grad_norm": 7.2804903025971575, "learning_rate": 5.674780247971413e-06, "loss": 0.2369171142578125, "step": 34600 }, { "epoch": 0.29921920260092866, "grad_norm": 41.97358981058354, "learning_rate": 5.674687983923621e-06, "loss": 0.555908203125, "step": 34605 }, { "epoch": 0.29926243612247194, "grad_norm": 15.172065903839187, "learning_rate": 5.674595707540404e-06, "loss": 0.06495437622070313, "step": 34610 }, { "epoch": 0.29930566964401517, "grad_norm": 28.151696480550125, "learning_rate": 5.674503418822189e-06, "loss": 0.17069244384765625, "step": 34615 }, { "epoch": 0.29934890316555846, "grad_norm": 4.025617838344525, "learning_rate": 5.674411117769399e-06, "loss": 0.16402206420898438, "step": 34620 }, { "epoch": 0.2993921366871017, "grad_norm": 2.7780900154241177, "learning_rate": 5.6743188043824625e-06, "loss": 0.349566650390625, "step": 34625 }, { "epoch": 0.29943537020864497, "grad_norm": 3.8718529313791725, "learning_rate": 5.674226478661803e-06, "loss": 0.1331512451171875, "step": 34630 }, { "epoch": 0.29947860373018825, "grad_norm": 12.165099820651346, "learning_rate": 5.674134140607848e-06, "loss": 0.07729034423828125, "step": 34635 }, { "epoch": 0.2995218372517315, "grad_norm": 28.366324685764067, "learning_rate": 5.674041790221023e-06, "loss": 0.1336181640625, "step": 34640 }, { "epoch": 0.29956507077327477, "grad_norm": 9.659431274254002, "learning_rate": 5.6739494275017525e-06, "loss": 0.13797607421875, "step": 34645 }, { "epoch": 0.29960830429481805, "grad_norm": 6.218538021567126, "learning_rate": 5.673857052450464e-06, "loss": 0.07051849365234375, "step": 34650 }, { "epoch": 0.2996515378163613, "grad_norm": 35.546670395258246, "learning_rate": 5.673764665067583e-06, "loss": 0.35740814208984373, "step": 34655 }, { "epoch": 0.29969477133790456, "grad_norm": 14.164567903858753, "learning_rate": 5.673672265353535e-06, "loss": 0.2872661590576172, "step": 34660 }, { "epoch": 0.29973800485944785, "grad_norm": 35.15672092520423, "learning_rate": 5.673579853308748e-06, "loss": 0.2941314697265625, "step": 34665 }, { "epoch": 0.2997812383809911, "grad_norm": 3.8897897432389184, "learning_rate": 5.673487428933646e-06, "loss": 0.03223114013671875, "step": 34670 }, { "epoch": 0.29982447190253436, "grad_norm": 39.02552766782976, "learning_rate": 5.673394992228656e-06, "loss": 0.2166667938232422, "step": 34675 }, { "epoch": 0.2998677054240776, "grad_norm": 60.81062765988923, "learning_rate": 5.673302543194205e-06, "loss": 0.17969512939453125, "step": 34680 }, { "epoch": 0.2999109389456209, "grad_norm": 1.079922334839676, "learning_rate": 5.673210081830719e-06, "loss": 0.28128814697265625, "step": 34685 }, { "epoch": 0.29995417246716416, "grad_norm": 8.629535316926882, "learning_rate": 5.673117608138624e-06, "loss": 0.12959442138671876, "step": 34690 }, { "epoch": 0.2999974059887074, "grad_norm": 17.96553095455947, "learning_rate": 5.6730251221183475e-06, "loss": 0.1948760986328125, "step": 34695 }, { "epoch": 0.3000406395102507, "grad_norm": 0.4538035547993137, "learning_rate": 5.672932623770314e-06, "loss": 0.23369903564453126, "step": 34700 }, { "epoch": 0.30008387303179396, "grad_norm": 55.57717596845839, "learning_rate": 5.672840113094953e-06, "loss": 0.58336181640625, "step": 34705 }, { "epoch": 0.3001271065533372, "grad_norm": 22.15306534636674, "learning_rate": 5.672747590092689e-06, "loss": 0.20770492553710937, "step": 34710 }, { "epoch": 0.30017034007488047, "grad_norm": 5.762735399076602, "learning_rate": 5.672655054763949e-06, "loss": 0.11160049438476563, "step": 34715 }, { "epoch": 0.3002135735964237, "grad_norm": 21.882677544312262, "learning_rate": 5.67256250710916e-06, "loss": 0.2051025390625, "step": 34720 }, { "epoch": 0.300256807117967, "grad_norm": 6.542771382133509, "learning_rate": 5.67246994712875e-06, "loss": 0.0416168212890625, "step": 34725 }, { "epoch": 0.30030004063951027, "grad_norm": 2.554927948728634, "learning_rate": 5.672377374823143e-06, "loss": 0.11236572265625, "step": 34730 }, { "epoch": 0.3003432741610535, "grad_norm": 3.4736543852409363, "learning_rate": 5.672284790192769e-06, "loss": 0.060150146484375, "step": 34735 }, { "epoch": 0.3003865076825968, "grad_norm": 0.2594356202452105, "learning_rate": 5.672192193238052e-06, "loss": 0.11951885223388672, "step": 34740 }, { "epoch": 0.30042974120414007, "grad_norm": 1.3984489030034999, "learning_rate": 5.6720995839594225e-06, "loss": 0.09430389404296875, "step": 34745 }, { "epoch": 0.3004729747256833, "grad_norm": 0.22881445514364243, "learning_rate": 5.672006962357305e-06, "loss": 0.3120231628417969, "step": 34750 }, { "epoch": 0.3005162082472266, "grad_norm": 16.751505487492572, "learning_rate": 5.671914328432127e-06, "loss": 0.25976409912109377, "step": 34755 }, { "epoch": 0.3005594417687698, "grad_norm": 4.790169527420734, "learning_rate": 5.671821682184318e-06, "loss": 0.1595184326171875, "step": 34760 }, { "epoch": 0.3006026752903131, "grad_norm": 5.341400162034357, "learning_rate": 5.671729023614302e-06, "loss": 0.13567657470703126, "step": 34765 }, { "epoch": 0.3006459088118564, "grad_norm": 28.26662394300077, "learning_rate": 5.671636352722508e-06, "loss": 0.3214691162109375, "step": 34770 }, { "epoch": 0.3006891423333996, "grad_norm": 19.28493609365237, "learning_rate": 5.671543669509363e-06, "loss": 0.2199432373046875, "step": 34775 }, { "epoch": 0.3007323758549429, "grad_norm": 3.9069459173886183, "learning_rate": 5.6714509739752955e-06, "loss": 0.1336639404296875, "step": 34780 }, { "epoch": 0.3007756093764862, "grad_norm": 0.17980925098928152, "learning_rate": 5.671358266120732e-06, "loss": 0.17446365356445312, "step": 34785 }, { "epoch": 0.3008188428980294, "grad_norm": 1.0156426412187651, "learning_rate": 5.6712655459460995e-06, "loss": 0.10347938537597656, "step": 34790 }, { "epoch": 0.3008620764195727, "grad_norm": 1.7404437142377285, "learning_rate": 5.671172813451827e-06, "loss": 0.0777130126953125, "step": 34795 }, { "epoch": 0.3009053099411159, "grad_norm": 14.052006653871263, "learning_rate": 5.671080068638341e-06, "loss": 0.06843414306640624, "step": 34800 }, { "epoch": 0.3009485434626592, "grad_norm": 2.8045963010727855, "learning_rate": 5.67098731150607e-06, "loss": 0.1463409423828125, "step": 34805 }, { "epoch": 0.3009917769842025, "grad_norm": 73.07879977832773, "learning_rate": 5.670894542055442e-06, "loss": 0.311395263671875, "step": 34810 }, { "epoch": 0.3010350105057457, "grad_norm": 0.281300885468111, "learning_rate": 5.670801760286884e-06, "loss": 0.14144134521484375, "step": 34815 }, { "epoch": 0.301078244027289, "grad_norm": 12.882559590039907, "learning_rate": 5.670708966200824e-06, "loss": 0.4072845458984375, "step": 34820 }, { "epoch": 0.3011214775488323, "grad_norm": 5.143130402814036, "learning_rate": 5.67061615979769e-06, "loss": 0.5367263793945313, "step": 34825 }, { "epoch": 0.3011647110703755, "grad_norm": 39.52442818688151, "learning_rate": 5.670523341077912e-06, "loss": 0.1276092529296875, "step": 34830 }, { "epoch": 0.3012079445919188, "grad_norm": 8.306586106154596, "learning_rate": 5.6704305100419145e-06, "loss": 0.2180206298828125, "step": 34835 }, { "epoch": 0.3012511781134621, "grad_norm": 4.7710910397138875, "learning_rate": 5.670337666690128e-06, "loss": 0.230322265625, "step": 34840 }, { "epoch": 0.3012944116350053, "grad_norm": 23.732757566947992, "learning_rate": 5.6702448110229795e-06, "loss": 0.20233688354492188, "step": 34845 }, { "epoch": 0.3013376451565486, "grad_norm": 0.3727477808471082, "learning_rate": 5.670151943040899e-06, "loss": 0.1092041015625, "step": 34850 }, { "epoch": 0.3013808786780918, "grad_norm": 7.452996687820868, "learning_rate": 5.670059062744313e-06, "loss": 0.08661880493164062, "step": 34855 }, { "epoch": 0.3014241121996351, "grad_norm": 52.06496588884591, "learning_rate": 5.669966170133651e-06, "loss": 0.6269954681396485, "step": 34860 }, { "epoch": 0.3014673457211784, "grad_norm": 11.096562203999577, "learning_rate": 5.66987326520934e-06, "loss": 0.243951416015625, "step": 34865 }, { "epoch": 0.3015105792427216, "grad_norm": 2.247239304043515, "learning_rate": 5.6697803479718105e-06, "loss": 0.08568878173828125, "step": 34870 }, { "epoch": 0.3015538127642649, "grad_norm": 5.7065477775885975, "learning_rate": 5.6696874184214895e-06, "loss": 0.0750518798828125, "step": 34875 }, { "epoch": 0.3015970462858082, "grad_norm": 42.26255318515956, "learning_rate": 5.669594476558806e-06, "loss": 0.40162506103515627, "step": 34880 }, { "epoch": 0.3016402798073514, "grad_norm": 2.3998561291087306, "learning_rate": 5.6695015223841895e-06, "loss": 0.08488273620605469, "step": 34885 }, { "epoch": 0.3016835133288947, "grad_norm": 5.824004235734912, "learning_rate": 5.669408555898067e-06, "loss": 0.118768310546875, "step": 34890 }, { "epoch": 0.30172674685043793, "grad_norm": 6.627691539581229, "learning_rate": 5.669315577100867e-06, "loss": 0.25333709716796876, "step": 34895 }, { "epoch": 0.3017699803719812, "grad_norm": 1.1507148101832796, "learning_rate": 5.669222585993021e-06, "loss": 0.1464263916015625, "step": 34900 }, { "epoch": 0.3018132138935245, "grad_norm": 5.192070869430769, "learning_rate": 5.669129582574956e-06, "loss": 0.17772216796875, "step": 34905 }, { "epoch": 0.30185644741506773, "grad_norm": 31.187576922512154, "learning_rate": 5.669036566847101e-06, "loss": 0.3774139404296875, "step": 34910 }, { "epoch": 0.301899680936611, "grad_norm": 21.678179882729737, "learning_rate": 5.668943538809886e-06, "loss": 0.16199951171875, "step": 34915 }, { "epoch": 0.3019429144581543, "grad_norm": 10.30089489929703, "learning_rate": 5.668850498463738e-06, "loss": 0.397369384765625, "step": 34920 }, { "epoch": 0.30198614797969753, "grad_norm": 3.2871699696449976, "learning_rate": 5.668757445809088e-06, "loss": 0.16460189819335938, "step": 34925 }, { "epoch": 0.3020293815012408, "grad_norm": 5.99785300746752, "learning_rate": 5.668664380846364e-06, "loss": 0.2347808837890625, "step": 34930 }, { "epoch": 0.30207261502278404, "grad_norm": 1.046732045975379, "learning_rate": 5.668571303575996e-06, "loss": 0.27876739501953124, "step": 34935 }, { "epoch": 0.3021158485443273, "grad_norm": 2.584032085547988, "learning_rate": 5.6684782139984135e-06, "loss": 0.32061767578125, "step": 34940 }, { "epoch": 0.3021590820658706, "grad_norm": 11.523750451629063, "learning_rate": 5.668385112114044e-06, "loss": 0.22623291015625, "step": 34945 }, { "epoch": 0.30220231558741384, "grad_norm": 59.31194477767776, "learning_rate": 5.6682919979233185e-06, "loss": 0.444256591796875, "step": 34950 }, { "epoch": 0.3022455491089571, "grad_norm": 9.357637679532726, "learning_rate": 5.6681988714266675e-06, "loss": 0.14892196655273438, "step": 34955 }, { "epoch": 0.3022887826305004, "grad_norm": 4.133187323389542, "learning_rate": 5.6681057326245174e-06, "loss": 0.2078125, "step": 34960 }, { "epoch": 0.30233201615204364, "grad_norm": 13.762336245012905, "learning_rate": 5.6680125815173e-06, "loss": 0.264996337890625, "step": 34965 }, { "epoch": 0.3023752496735869, "grad_norm": 1.2911239367008245, "learning_rate": 5.667919418105444e-06, "loss": 0.029022216796875, "step": 34970 }, { "epoch": 0.30241848319513015, "grad_norm": 474.66048993268737, "learning_rate": 5.6678262423893805e-06, "loss": 0.3221099853515625, "step": 34975 }, { "epoch": 0.30246171671667343, "grad_norm": 2.8639914023844475, "learning_rate": 5.6677330543695366e-06, "loss": 0.09135055541992188, "step": 34980 }, { "epoch": 0.3025049502382167, "grad_norm": 0.6577623491819314, "learning_rate": 5.667639854046345e-06, "loss": 0.2634422302246094, "step": 34985 }, { "epoch": 0.30254818375975995, "grad_norm": 13.5209336588666, "learning_rate": 5.667546641420233e-06, "loss": 0.18964691162109376, "step": 34990 }, { "epoch": 0.30259141728130323, "grad_norm": 0.659479646588496, "learning_rate": 5.667453416491632e-06, "loss": 0.32535152435302733, "step": 34995 }, { "epoch": 0.3026346508028465, "grad_norm": 13.27634599000302, "learning_rate": 5.667360179260972e-06, "loss": 0.21422958374023438, "step": 35000 }, { "epoch": 0.30267788432438975, "grad_norm": 1.1213298069725013, "learning_rate": 5.667266929728682e-06, "loss": 0.22517852783203124, "step": 35005 }, { "epoch": 0.30272111784593303, "grad_norm": 3.301777096074967, "learning_rate": 5.667173667895192e-06, "loss": 0.147198486328125, "step": 35010 }, { "epoch": 0.30276435136747626, "grad_norm": 20.87128990756931, "learning_rate": 5.6670803937609344e-06, "loss": 0.2418701171875, "step": 35015 }, { "epoch": 0.30280758488901954, "grad_norm": 10.931269523833214, "learning_rate": 5.6669871073263364e-06, "loss": 0.188641357421875, "step": 35020 }, { "epoch": 0.3028508184105628, "grad_norm": 1.3098896399979645, "learning_rate": 5.66689380859183e-06, "loss": 0.1360076904296875, "step": 35025 }, { "epoch": 0.30289405193210606, "grad_norm": 33.4756313887948, "learning_rate": 5.666800497557845e-06, "loss": 0.25598878860473634, "step": 35030 }, { "epoch": 0.30293728545364934, "grad_norm": 15.65544442107718, "learning_rate": 5.666707174224811e-06, "loss": 0.10102996826171876, "step": 35035 }, { "epoch": 0.3029805189751926, "grad_norm": 4.525221039591773, "learning_rate": 5.66661383859316e-06, "loss": 0.1405975341796875, "step": 35040 }, { "epoch": 0.30302375249673585, "grad_norm": 21.310115304471164, "learning_rate": 5.666520490663321e-06, "loss": 0.14934844970703126, "step": 35045 }, { "epoch": 0.30306698601827914, "grad_norm": 7.8917181391243725, "learning_rate": 5.666427130435725e-06, "loss": 0.15859832763671874, "step": 35050 }, { "epoch": 0.3031102195398224, "grad_norm": 12.662415287992298, "learning_rate": 5.666333757910804e-06, "loss": 0.058935546875, "step": 35055 }, { "epoch": 0.30315345306136565, "grad_norm": 1.9222810136133353, "learning_rate": 5.666240373088987e-06, "loss": 0.17731819152832032, "step": 35060 }, { "epoch": 0.30319668658290894, "grad_norm": 6.883580505872698, "learning_rate": 5.6661469759707045e-06, "loss": 0.2491943359375, "step": 35065 }, { "epoch": 0.30323992010445217, "grad_norm": 4.440141138537543, "learning_rate": 5.666053566556387e-06, "loss": 0.31768455505371096, "step": 35070 }, { "epoch": 0.30328315362599545, "grad_norm": 3.6381498639569583, "learning_rate": 5.665960144846467e-06, "loss": 0.261724853515625, "step": 35075 }, { "epoch": 0.30332638714753873, "grad_norm": 7.0254728582588895, "learning_rate": 5.665866710841374e-06, "loss": 0.0742340087890625, "step": 35080 }, { "epoch": 0.30336962066908196, "grad_norm": 1.9043529193868316, "learning_rate": 5.665773264541539e-06, "loss": 0.20015029907226561, "step": 35085 }, { "epoch": 0.30341285419062525, "grad_norm": 5.759499739517881, "learning_rate": 5.665679805947394e-06, "loss": 0.2622161865234375, "step": 35090 }, { "epoch": 0.30345608771216853, "grad_norm": 20.431513773907767, "learning_rate": 5.665586335059368e-06, "loss": 0.234552001953125, "step": 35095 }, { "epoch": 0.30349932123371176, "grad_norm": 8.23786992945655, "learning_rate": 5.665492851877895e-06, "loss": 0.17816200256347656, "step": 35100 }, { "epoch": 0.30354255475525505, "grad_norm": 26.581308053503093, "learning_rate": 5.6653993564034035e-06, "loss": 0.3366790771484375, "step": 35105 }, { "epoch": 0.3035857882767983, "grad_norm": 4.953107121683199, "learning_rate": 5.6653058486363265e-06, "loss": 0.1934661865234375, "step": 35110 }, { "epoch": 0.30362902179834156, "grad_norm": 8.971100722171613, "learning_rate": 5.665212328577094e-06, "loss": 0.12429733276367187, "step": 35115 }, { "epoch": 0.30367225531988484, "grad_norm": 71.41147322796365, "learning_rate": 5.665118796226138e-06, "loss": 0.25504302978515625, "step": 35120 }, { "epoch": 0.30371548884142807, "grad_norm": 19.320141985828606, "learning_rate": 5.665025251583889e-06, "loss": 0.35218505859375, "step": 35125 }, { "epoch": 0.30375872236297136, "grad_norm": 61.58607639116055, "learning_rate": 5.664931694650779e-06, "loss": 0.3119842529296875, "step": 35130 }, { "epoch": 0.30380195588451464, "grad_norm": 2.8008726321178465, "learning_rate": 5.664838125427239e-06, "loss": 0.05623817443847656, "step": 35135 }, { "epoch": 0.30384518940605787, "grad_norm": 3.632043127927079, "learning_rate": 5.664744543913702e-06, "loss": 0.167462158203125, "step": 35140 }, { "epoch": 0.30388842292760115, "grad_norm": 7.3420488462990745, "learning_rate": 5.664650950110598e-06, "loss": 0.043267822265625, "step": 35145 }, { "epoch": 0.3039316564491444, "grad_norm": 0.42214910877613027, "learning_rate": 5.6645573440183605e-06, "loss": 0.13721389770507814, "step": 35150 }, { "epoch": 0.30397488997068767, "grad_norm": 6.067384264999855, "learning_rate": 5.664463725637419e-06, "loss": 0.0748565673828125, "step": 35155 }, { "epoch": 0.30401812349223095, "grad_norm": 1.9011625291848007, "learning_rate": 5.664370094968207e-06, "loss": 0.1072265625, "step": 35160 }, { "epoch": 0.3040613570137742, "grad_norm": 1.1584929829708241, "learning_rate": 5.664276452011154e-06, "loss": 0.05488872528076172, "step": 35165 }, { "epoch": 0.30410459053531746, "grad_norm": 23.292563347224647, "learning_rate": 5.664182796766694e-06, "loss": 0.3613983154296875, "step": 35170 }, { "epoch": 0.30414782405686075, "grad_norm": 1.021624092577504, "learning_rate": 5.6640891292352596e-06, "loss": 0.1593017578125, "step": 35175 }, { "epoch": 0.304191057578404, "grad_norm": 1.0244197632171737, "learning_rate": 5.663995449417281e-06, "loss": 0.0771514892578125, "step": 35180 }, { "epoch": 0.30423429109994726, "grad_norm": 20.29497604295653, "learning_rate": 5.66390175731319e-06, "loss": 0.19703369140625, "step": 35185 }, { "epoch": 0.3042775246214905, "grad_norm": 43.36874692651735, "learning_rate": 5.66380805292342e-06, "loss": 0.3633697509765625, "step": 35190 }, { "epoch": 0.3043207581430338, "grad_norm": 11.115786812122952, "learning_rate": 5.6637143362484026e-06, "loss": 0.11295166015625, "step": 35195 }, { "epoch": 0.30436399166457706, "grad_norm": 0.39070339159600415, "learning_rate": 5.66362060728857e-06, "loss": 0.13957290649414061, "step": 35200 }, { "epoch": 0.3044072251861203, "grad_norm": 0.4928103286548035, "learning_rate": 5.663526866044355e-06, "loss": 0.09272003173828125, "step": 35205 }, { "epoch": 0.3044504587076636, "grad_norm": 10.925238637608475, "learning_rate": 5.663433112516188e-06, "loss": 0.21419219970703124, "step": 35210 }, { "epoch": 0.30449369222920686, "grad_norm": 22.934177083767466, "learning_rate": 5.663339346704504e-06, "loss": 0.12279052734375, "step": 35215 }, { "epoch": 0.3045369257507501, "grad_norm": 10.996896642683982, "learning_rate": 5.6632455686097345e-06, "loss": 0.09518070220947265, "step": 35220 }, { "epoch": 0.30458015927229337, "grad_norm": 7.707432320966506, "learning_rate": 5.663151778232311e-06, "loss": 0.38422088623046874, "step": 35225 }, { "epoch": 0.30462339279383666, "grad_norm": 26.40964360373981, "learning_rate": 5.663057975572667e-06, "loss": 0.10617218017578126, "step": 35230 }, { "epoch": 0.3046666263153799, "grad_norm": 25.349243539962398, "learning_rate": 5.6629641606312355e-06, "loss": 0.548681640625, "step": 35235 }, { "epoch": 0.30470985983692317, "grad_norm": 2.1691813520796637, "learning_rate": 5.662870333408448e-06, "loss": 0.28469161987304686, "step": 35240 }, { "epoch": 0.3047530933584664, "grad_norm": 26.655936852544684, "learning_rate": 5.6627764939047375e-06, "loss": 0.18533172607421874, "step": 35245 }, { "epoch": 0.3047963268800097, "grad_norm": 9.477563709531317, "learning_rate": 5.662682642120538e-06, "loss": 0.0487274169921875, "step": 35250 }, { "epoch": 0.30483956040155297, "grad_norm": 13.945242454610137, "learning_rate": 5.6625887780562815e-06, "loss": 0.38195114135742186, "step": 35255 }, { "epoch": 0.3048827939230962, "grad_norm": 2.32472097093018, "learning_rate": 5.6624949017124e-06, "loss": 0.15382843017578124, "step": 35260 }, { "epoch": 0.3049260274446395, "grad_norm": 11.706890926957671, "learning_rate": 5.662401013089327e-06, "loss": 0.180816650390625, "step": 35265 }, { "epoch": 0.30496926096618276, "grad_norm": 16.905526898420195, "learning_rate": 5.662307112187497e-06, "loss": 0.11253662109375, "step": 35270 }, { "epoch": 0.305012494487726, "grad_norm": 32.80074937134953, "learning_rate": 5.662213199007341e-06, "loss": 0.09976425170898437, "step": 35275 }, { "epoch": 0.3050557280092693, "grad_norm": 34.49755104942082, "learning_rate": 5.6621192735492936e-06, "loss": 0.32784576416015626, "step": 35280 }, { "epoch": 0.3050989615308125, "grad_norm": 19.125634175774824, "learning_rate": 5.662025335813787e-06, "loss": 0.137396240234375, "step": 35285 }, { "epoch": 0.3051421950523558, "grad_norm": 20.40930518230756, "learning_rate": 5.661931385801255e-06, "loss": 0.19941635131835939, "step": 35290 }, { "epoch": 0.3051854285738991, "grad_norm": 27.064818507709564, "learning_rate": 5.66183742351213e-06, "loss": 0.2498138427734375, "step": 35295 }, { "epoch": 0.3052286620954423, "grad_norm": 0.861596617923565, "learning_rate": 5.661743448946847e-06, "loss": 0.218243408203125, "step": 35300 }, { "epoch": 0.3052718956169856, "grad_norm": 1.8824557319977313, "learning_rate": 5.661649462105837e-06, "loss": 0.1149139404296875, "step": 35305 }, { "epoch": 0.3053151291385289, "grad_norm": 22.954295926868863, "learning_rate": 5.661555462989536e-06, "loss": 0.14272670745849608, "step": 35310 }, { "epoch": 0.3053583626600721, "grad_norm": 43.69549540741199, "learning_rate": 5.661461451598376e-06, "loss": 0.346588134765625, "step": 35315 }, { "epoch": 0.3054015961816154, "grad_norm": 37.6318493862111, "learning_rate": 5.661367427932791e-06, "loss": 0.299310302734375, "step": 35320 }, { "epoch": 0.3054448297031586, "grad_norm": 17.941769559268987, "learning_rate": 5.661273391993215e-06, "loss": 0.17588768005371094, "step": 35325 }, { "epoch": 0.3054880632247019, "grad_norm": 1.5154747838154436, "learning_rate": 5.66117934378008e-06, "loss": 0.411907958984375, "step": 35330 }, { "epoch": 0.3055312967462452, "grad_norm": 0.9180008572510325, "learning_rate": 5.661085283293822e-06, "loss": 0.120550537109375, "step": 35335 }, { "epoch": 0.3055745302677884, "grad_norm": 43.29838244043593, "learning_rate": 5.660991210534874e-06, "loss": 0.24273834228515626, "step": 35340 }, { "epoch": 0.3056177637893317, "grad_norm": 2.816168986125297, "learning_rate": 5.6608971255036694e-06, "loss": 0.203350830078125, "step": 35345 }, { "epoch": 0.305660997310875, "grad_norm": 2.3667880931834455, "learning_rate": 5.660803028200642e-06, "loss": 0.09771270751953125, "step": 35350 }, { "epoch": 0.3057042308324182, "grad_norm": 5.560273138001805, "learning_rate": 5.660708918626226e-06, "loss": 0.148736572265625, "step": 35355 }, { "epoch": 0.3057474643539615, "grad_norm": 32.520295978920146, "learning_rate": 5.660614796780857e-06, "loss": 0.2249481201171875, "step": 35360 }, { "epoch": 0.3057906978755047, "grad_norm": 7.110949092116069, "learning_rate": 5.660520662664967e-06, "loss": 0.142791748046875, "step": 35365 }, { "epoch": 0.305833931397048, "grad_norm": 3.1027955256451003, "learning_rate": 5.66042651627899e-06, "loss": 0.056136703491210936, "step": 35370 }, { "epoch": 0.3058771649185913, "grad_norm": 17.799710964387526, "learning_rate": 5.660332357623361e-06, "loss": 0.19279251098632813, "step": 35375 }, { "epoch": 0.3059203984401345, "grad_norm": 0.6083118557078853, "learning_rate": 5.660238186698515e-06, "loss": 0.6414466857910156, "step": 35380 }, { "epoch": 0.3059636319616778, "grad_norm": 20.530503073534824, "learning_rate": 5.660144003504885e-06, "loss": 0.087744140625, "step": 35385 }, { "epoch": 0.3060068654832211, "grad_norm": 12.945883942559654, "learning_rate": 5.660049808042907e-06, "loss": 0.0767852783203125, "step": 35390 }, { "epoch": 0.3060500990047643, "grad_norm": 2.845586654560123, "learning_rate": 5.659955600313013e-06, "loss": 0.09300384521484376, "step": 35395 }, { "epoch": 0.3060933325263076, "grad_norm": 12.462719576780337, "learning_rate": 5.659861380315639e-06, "loss": 0.1655853271484375, "step": 35400 }, { "epoch": 0.3061365660478509, "grad_norm": 32.68519211678814, "learning_rate": 5.65976714805122e-06, "loss": 0.2723876953125, "step": 35405 }, { "epoch": 0.3061797995693941, "grad_norm": 15.655484204366173, "learning_rate": 5.659672903520189e-06, "loss": 0.17003326416015624, "step": 35410 }, { "epoch": 0.3062230330909374, "grad_norm": 25.19273840237559, "learning_rate": 5.659578646722983e-06, "loss": 0.1945709228515625, "step": 35415 }, { "epoch": 0.30626626661248063, "grad_norm": 26.46479298656278, "learning_rate": 5.659484377660034e-06, "loss": 0.18184928894042968, "step": 35420 }, { "epoch": 0.3063095001340239, "grad_norm": 0.2935756825578229, "learning_rate": 5.659390096331779e-06, "loss": 0.24453334808349608, "step": 35425 }, { "epoch": 0.3063527336555672, "grad_norm": 31.893324486236914, "learning_rate": 5.659295802738651e-06, "loss": 0.1405487060546875, "step": 35430 }, { "epoch": 0.30639596717711043, "grad_norm": 4.966901994284098, "learning_rate": 5.6592014968810865e-06, "loss": 0.45253677368164064, "step": 35435 }, { "epoch": 0.3064392006986537, "grad_norm": 9.979196087251553, "learning_rate": 5.6591071787595185e-06, "loss": 0.09457817077636718, "step": 35440 }, { "epoch": 0.306482434220197, "grad_norm": 30.493042702035734, "learning_rate": 5.659012848374384e-06, "loss": 0.1599506378173828, "step": 35445 }, { "epoch": 0.3065256677417402, "grad_norm": 16.178823510054087, "learning_rate": 5.658918505726117e-06, "loss": 0.20618133544921874, "step": 35450 }, { "epoch": 0.3065689012632835, "grad_norm": 16.005169973508927, "learning_rate": 5.658824150815154e-06, "loss": 0.2524566650390625, "step": 35455 }, { "epoch": 0.30661213478482674, "grad_norm": 3.695171049082386, "learning_rate": 5.658729783641927e-06, "loss": 0.1703929901123047, "step": 35460 }, { "epoch": 0.30665536830637, "grad_norm": 6.842178821492992, "learning_rate": 5.658635404206875e-06, "loss": 0.051213645935058595, "step": 35465 }, { "epoch": 0.3066986018279133, "grad_norm": 10.471607052460891, "learning_rate": 5.65854101251043e-06, "loss": 0.576617431640625, "step": 35470 }, { "epoch": 0.30674183534945654, "grad_norm": 4.0190411350708635, "learning_rate": 5.658446608553029e-06, "loss": 0.0903076171875, "step": 35475 }, { "epoch": 0.3067850688709998, "grad_norm": 0.41973305895506346, "learning_rate": 5.658352192335107e-06, "loss": 0.17041854858398436, "step": 35480 }, { "epoch": 0.3068283023925431, "grad_norm": 1.3924297531555687, "learning_rate": 5.6582577638571e-06, "loss": 0.148284912109375, "step": 35485 }, { "epoch": 0.30687153591408634, "grad_norm": 14.794803875259138, "learning_rate": 5.658163323119443e-06, "loss": 0.35540924072265623, "step": 35490 }, { "epoch": 0.3069147694356296, "grad_norm": 27.42225337267716, "learning_rate": 5.658068870122572e-06, "loss": 0.169329833984375, "step": 35495 }, { "epoch": 0.30695800295717285, "grad_norm": 13.224365791620812, "learning_rate": 5.657974404866922e-06, "loss": 0.20861434936523438, "step": 35500 }, { "epoch": 0.30700123647871613, "grad_norm": 3.6136097595546897, "learning_rate": 5.657879927352928e-06, "loss": 0.059114837646484376, "step": 35505 }, { "epoch": 0.3070444700002594, "grad_norm": 3.9958270510892357, "learning_rate": 5.657785437581029e-06, "loss": 0.412896728515625, "step": 35510 }, { "epoch": 0.30708770352180265, "grad_norm": 1.7509442726913815, "learning_rate": 5.657690935551656e-06, "loss": 0.3031829833984375, "step": 35515 }, { "epoch": 0.30713093704334593, "grad_norm": 5.003644931844607, "learning_rate": 5.657596421265248e-06, "loss": 0.13743667602539061, "step": 35520 }, { "epoch": 0.3071741705648892, "grad_norm": 9.230460218521278, "learning_rate": 5.65750189472224e-06, "loss": 0.4206672668457031, "step": 35525 }, { "epoch": 0.30721740408643244, "grad_norm": 5.285796362599028, "learning_rate": 5.6574073559230685e-06, "loss": 0.19463424682617186, "step": 35530 }, { "epoch": 0.30726063760797573, "grad_norm": 3.1397342410011966, "learning_rate": 5.657312804868169e-06, "loss": 0.07633209228515625, "step": 35535 }, { "epoch": 0.30730387112951896, "grad_norm": 27.63998502023215, "learning_rate": 5.657218241557978e-06, "loss": 0.22066802978515626, "step": 35540 }, { "epoch": 0.30734710465106224, "grad_norm": 3.5900108813580274, "learning_rate": 5.65712366599293e-06, "loss": 0.0927703857421875, "step": 35545 }, { "epoch": 0.3073903381726055, "grad_norm": 1.7455720711400569, "learning_rate": 5.657029078173464e-06, "loss": 0.0636383056640625, "step": 35550 }, { "epoch": 0.30743357169414876, "grad_norm": 0.6848608875585004, "learning_rate": 5.656934478100013e-06, "loss": 0.17392425537109374, "step": 35555 }, { "epoch": 0.30747680521569204, "grad_norm": 26.120845225278348, "learning_rate": 5.6568398657730165e-06, "loss": 0.2119781494140625, "step": 35560 }, { "epoch": 0.3075200387372353, "grad_norm": 7.845920317411341, "learning_rate": 5.656745241192909e-06, "loss": 0.276104736328125, "step": 35565 }, { "epoch": 0.30756327225877855, "grad_norm": 1.0548228095834302, "learning_rate": 5.656650604360127e-06, "loss": 0.38927135467529295, "step": 35570 }, { "epoch": 0.30760650578032184, "grad_norm": 1.193137591783013, "learning_rate": 5.656555955275107e-06, "loss": 0.435362434387207, "step": 35575 }, { "epoch": 0.3076497393018651, "grad_norm": 33.11581767355793, "learning_rate": 5.6564612939382856e-06, "loss": 0.419647216796875, "step": 35580 }, { "epoch": 0.30769297282340835, "grad_norm": 1.2745737609087506, "learning_rate": 5.656366620350099e-06, "loss": 0.095233154296875, "step": 35585 }, { "epoch": 0.30773620634495163, "grad_norm": 1.3258008144871847, "learning_rate": 5.656271934510985e-06, "loss": 0.24014892578125, "step": 35590 }, { "epoch": 0.30777943986649486, "grad_norm": 16.791078425666157, "learning_rate": 5.6561772364213796e-06, "loss": 0.162744140625, "step": 35595 }, { "epoch": 0.30782267338803815, "grad_norm": 2.9180324962266813, "learning_rate": 5.656082526081719e-06, "loss": 0.038762664794921874, "step": 35600 }, { "epoch": 0.30786590690958143, "grad_norm": 4.681177065317217, "learning_rate": 5.655987803492441e-06, "loss": 0.1269256591796875, "step": 35605 }, { "epoch": 0.30790914043112466, "grad_norm": 3.1963266231704424, "learning_rate": 5.655893068653982e-06, "loss": 0.5990036010742188, "step": 35610 }, { "epoch": 0.30795237395266795, "grad_norm": 13.84927974204587, "learning_rate": 5.655798321566778e-06, "loss": 0.22926025390625, "step": 35615 }, { "epoch": 0.30799560747421123, "grad_norm": 2.435317772800361, "learning_rate": 5.655703562231267e-06, "loss": 0.111700439453125, "step": 35620 }, { "epoch": 0.30803884099575446, "grad_norm": 20.013596446862742, "learning_rate": 5.655608790647887e-06, "loss": 0.1384746551513672, "step": 35625 }, { "epoch": 0.30808207451729774, "grad_norm": 36.8021974849765, "learning_rate": 5.655514006817073e-06, "loss": 0.24968185424804687, "step": 35630 }, { "epoch": 0.308125308038841, "grad_norm": 2.6778933717545055, "learning_rate": 5.6554192107392625e-06, "loss": 0.121026611328125, "step": 35635 }, { "epoch": 0.30816854156038426, "grad_norm": 1.1129647617930678, "learning_rate": 5.655324402414895e-06, "loss": 0.1717041015625, "step": 35640 }, { "epoch": 0.30821177508192754, "grad_norm": 15.006409395337485, "learning_rate": 5.655229581844404e-06, "loss": 0.08816375732421874, "step": 35645 }, { "epoch": 0.30825500860347077, "grad_norm": 39.12378886687061, "learning_rate": 5.65513474902823e-06, "loss": 0.22104339599609374, "step": 35650 }, { "epoch": 0.30829824212501405, "grad_norm": 13.943311033441574, "learning_rate": 5.655039903966808e-06, "loss": 0.238079833984375, "step": 35655 }, { "epoch": 0.30834147564655734, "grad_norm": 1.3212645738970212, "learning_rate": 5.654945046660578e-06, "loss": 0.15815353393554688, "step": 35660 }, { "epoch": 0.30838470916810057, "grad_norm": 21.162548538491734, "learning_rate": 5.654850177109975e-06, "loss": 0.2176300048828125, "step": 35665 }, { "epoch": 0.30842794268964385, "grad_norm": 1.0508967774515285, "learning_rate": 5.654755295315439e-06, "loss": 0.047137451171875, "step": 35670 }, { "epoch": 0.3084711762111871, "grad_norm": 0.6215462861224903, "learning_rate": 5.654660401277404e-06, "loss": 0.08768463134765625, "step": 35675 }, { "epoch": 0.30851440973273037, "grad_norm": 0.4276727968384022, "learning_rate": 5.654565494996311e-06, "loss": 0.1435028076171875, "step": 35680 }, { "epoch": 0.30855764325427365, "grad_norm": 0.8388071004212652, "learning_rate": 5.654470576472597e-06, "loss": 0.14579544067382813, "step": 35685 }, { "epoch": 0.3086008767758169, "grad_norm": 17.491759171308427, "learning_rate": 5.654375645706699e-06, "loss": 0.10400238037109374, "step": 35690 }, { "epoch": 0.30864411029736016, "grad_norm": 0.9228017355801842, "learning_rate": 5.654280702699054e-06, "loss": 0.36126346588134767, "step": 35695 }, { "epoch": 0.30868734381890345, "grad_norm": 31.7666051254643, "learning_rate": 5.654185747450102e-06, "loss": 0.2362396240234375, "step": 35700 }, { "epoch": 0.3087305773404467, "grad_norm": 1.0647281253878442, "learning_rate": 5.654090779960279e-06, "loss": 0.13757095336914063, "step": 35705 }, { "epoch": 0.30877381086198996, "grad_norm": 7.882973440038241, "learning_rate": 5.653995800230025e-06, "loss": 0.071246337890625, "step": 35710 }, { "epoch": 0.3088170443835332, "grad_norm": 21.86270587537461, "learning_rate": 5.653900808259776e-06, "loss": 0.148797607421875, "step": 35715 }, { "epoch": 0.3088602779050765, "grad_norm": 53.151675883101284, "learning_rate": 5.653805804049971e-06, "loss": 0.261798095703125, "step": 35720 }, { "epoch": 0.30890351142661976, "grad_norm": 15.063247119929244, "learning_rate": 5.653710787601049e-06, "loss": 0.06905326843261719, "step": 35725 }, { "epoch": 0.308946744948163, "grad_norm": 0.276773113085488, "learning_rate": 5.6536157589134456e-06, "loss": 0.371527099609375, "step": 35730 }, { "epoch": 0.3089899784697063, "grad_norm": 45.78613335563515, "learning_rate": 5.653520717987602e-06, "loss": 0.32842388153076174, "step": 35735 }, { "epoch": 0.30903321199124956, "grad_norm": 16.89773266241962, "learning_rate": 5.653425664823955e-06, "loss": 0.12626953125, "step": 35740 }, { "epoch": 0.3090764455127928, "grad_norm": 5.24368949038201, "learning_rate": 5.653330599422944e-06, "loss": 0.09044342041015625, "step": 35745 }, { "epoch": 0.30911967903433607, "grad_norm": 3.7773282697960697, "learning_rate": 5.653235521785006e-06, "loss": 0.15821571350097657, "step": 35750 }, { "epoch": 0.3091629125558793, "grad_norm": 23.284543196500863, "learning_rate": 5.6531404319105815e-06, "loss": 0.35816802978515627, "step": 35755 }, { "epoch": 0.3092061460774226, "grad_norm": 4.381798853477227, "learning_rate": 5.653045329800107e-06, "loss": 0.160528564453125, "step": 35760 }, { "epoch": 0.30924937959896587, "grad_norm": 4.17562441586429, "learning_rate": 5.652950215454021e-06, "loss": 0.11064910888671875, "step": 35765 }, { "epoch": 0.3092926131205091, "grad_norm": 2.112331663373736, "learning_rate": 5.6528550888727635e-06, "loss": 0.07792816162109376, "step": 35770 }, { "epoch": 0.3093358466420524, "grad_norm": 1.7588150401407079, "learning_rate": 5.6527599500567725e-06, "loss": 0.08489913940429687, "step": 35775 }, { "epoch": 0.30937908016359567, "grad_norm": 2.2827763543026367, "learning_rate": 5.6526647990064874e-06, "loss": 0.24631500244140625, "step": 35780 }, { "epoch": 0.3094223136851389, "grad_norm": 1.6051869858387453, "learning_rate": 5.652569635722347e-06, "loss": 0.013746833801269532, "step": 35785 }, { "epoch": 0.3094655472066822, "grad_norm": 27.665007317663093, "learning_rate": 5.6524744602047895e-06, "loss": 0.563250732421875, "step": 35790 }, { "epoch": 0.30950878072822546, "grad_norm": 1.3193890785866282, "learning_rate": 5.652379272454253e-06, "loss": 0.07656936645507813, "step": 35795 }, { "epoch": 0.3095520142497687, "grad_norm": 30.295669988429655, "learning_rate": 5.652284072471179e-06, "loss": 0.3625457763671875, "step": 35800 }, { "epoch": 0.309595247771312, "grad_norm": 0.7042859377286909, "learning_rate": 5.652188860256005e-06, "loss": 0.05523529052734375, "step": 35805 }, { "epoch": 0.3096384812928552, "grad_norm": 4.367515521238749, "learning_rate": 5.652093635809171e-06, "loss": 0.14855728149414063, "step": 35810 }, { "epoch": 0.3096817148143985, "grad_norm": 59.171829153440164, "learning_rate": 5.651998399131114e-06, "loss": 0.5069282531738282, "step": 35815 }, { "epoch": 0.3097249483359418, "grad_norm": 26.827016233318073, "learning_rate": 5.651903150222276e-06, "loss": 0.419158935546875, "step": 35820 }, { "epoch": 0.309768181857485, "grad_norm": 0.8551608145247368, "learning_rate": 5.651807889083095e-06, "loss": 0.15762786865234374, "step": 35825 }, { "epoch": 0.3098114153790283, "grad_norm": 2.7142475452184343, "learning_rate": 5.651712615714009e-06, "loss": 0.176397705078125, "step": 35830 }, { "epoch": 0.30985464890057157, "grad_norm": 2.0780233215064814, "learning_rate": 5.65161733011546e-06, "loss": 0.37719268798828126, "step": 35835 }, { "epoch": 0.3098978824221148, "grad_norm": 5.441142691133919, "learning_rate": 5.651522032287886e-06, "loss": 0.296893310546875, "step": 35840 }, { "epoch": 0.3099411159436581, "grad_norm": 7.955627082224128, "learning_rate": 5.651426722231726e-06, "loss": 0.23102493286132814, "step": 35845 }, { "epoch": 0.3099843494652013, "grad_norm": 15.762541786187976, "learning_rate": 5.65133139994742e-06, "loss": 0.23237133026123047, "step": 35850 }, { "epoch": 0.3100275829867446, "grad_norm": 4.108372571422783, "learning_rate": 5.651236065435409e-06, "loss": 0.06200485229492188, "step": 35855 }, { "epoch": 0.3100708165082879, "grad_norm": 1.2382487033769058, "learning_rate": 5.651140718696131e-06, "loss": 0.3012939453125, "step": 35860 }, { "epoch": 0.3101140500298311, "grad_norm": 3.8088731201327906, "learning_rate": 5.651045359730027e-06, "loss": 0.16054611206054686, "step": 35865 }, { "epoch": 0.3101572835513744, "grad_norm": 7.271513831169026, "learning_rate": 5.650949988537536e-06, "loss": 0.26195068359375, "step": 35870 }, { "epoch": 0.3102005170729177, "grad_norm": 6.275267487609342, "learning_rate": 5.650854605119095e-06, "loss": 0.22418975830078125, "step": 35875 }, { "epoch": 0.3102437505944609, "grad_norm": 4.690909083124023, "learning_rate": 5.65075920947515e-06, "loss": 0.090850830078125, "step": 35880 }, { "epoch": 0.3102869841160042, "grad_norm": 0.8727075411218912, "learning_rate": 5.650663801606137e-06, "loss": 0.265606689453125, "step": 35885 }, { "epoch": 0.3103302176375474, "grad_norm": 3.1805182277947828, "learning_rate": 5.6505683815124966e-06, "loss": 0.9016815185546875, "step": 35890 }, { "epoch": 0.3103734511590907, "grad_norm": 3.320197008748215, "learning_rate": 5.650472949194669e-06, "loss": 0.4172271728515625, "step": 35895 }, { "epoch": 0.310416684680634, "grad_norm": 4.569285362877149, "learning_rate": 5.6503775046530944e-06, "loss": 0.1626220703125, "step": 35900 }, { "epoch": 0.3104599182021772, "grad_norm": 17.583999066633126, "learning_rate": 5.650282047888213e-06, "loss": 0.10985527038574219, "step": 35905 }, { "epoch": 0.3105031517237205, "grad_norm": 18.42776364844614, "learning_rate": 5.6501865789004646e-06, "loss": 0.23340377807617188, "step": 35910 }, { "epoch": 0.3105463852452638, "grad_norm": 14.981159390927727, "learning_rate": 5.65009109769029e-06, "loss": 0.079620361328125, "step": 35915 }, { "epoch": 0.310589618766807, "grad_norm": 12.673499048523494, "learning_rate": 5.64999560425813e-06, "loss": 0.1598388671875, "step": 35920 }, { "epoch": 0.3106328522883503, "grad_norm": 9.798490636832435, "learning_rate": 5.649900098604424e-06, "loss": 0.35782318115234374, "step": 35925 }, { "epoch": 0.31067608580989353, "grad_norm": 67.94585098219278, "learning_rate": 5.649804580729613e-06, "loss": 0.371142578125, "step": 35930 }, { "epoch": 0.3107193193314368, "grad_norm": 80.51791567071045, "learning_rate": 5.649709050634137e-06, "loss": 0.0751678466796875, "step": 35935 }, { "epoch": 0.3107625528529801, "grad_norm": 1.0132014115896686, "learning_rate": 5.649613508318437e-06, "loss": 0.0813812255859375, "step": 35940 }, { "epoch": 0.31080578637452333, "grad_norm": 4.108827932344216, "learning_rate": 5.649517953782954e-06, "loss": 0.1635009765625, "step": 35945 }, { "epoch": 0.3108490198960666, "grad_norm": 0.4533054835723854, "learning_rate": 5.649422387028129e-06, "loss": 0.2252145767211914, "step": 35950 }, { "epoch": 0.3108922534176099, "grad_norm": 0.37494896925670923, "learning_rate": 5.649326808054401e-06, "loss": 0.1462493896484375, "step": 35955 }, { "epoch": 0.3109354869391531, "grad_norm": 2.2426873681604147, "learning_rate": 5.6492312168622116e-06, "loss": 0.04672164916992187, "step": 35960 }, { "epoch": 0.3109787204606964, "grad_norm": 0.33217059822535333, "learning_rate": 5.649135613452003e-06, "loss": 0.039020538330078125, "step": 35965 }, { "epoch": 0.3110219539822397, "grad_norm": 34.700948012464266, "learning_rate": 5.6490399978242145e-06, "loss": 0.611224365234375, "step": 35970 }, { "epoch": 0.3110651875037829, "grad_norm": 1.7255558586130653, "learning_rate": 5.6489443699792876e-06, "loss": 0.5377662658691407, "step": 35975 }, { "epoch": 0.3111084210253262, "grad_norm": 6.8569837140045085, "learning_rate": 5.648848729917663e-06, "loss": 0.04552001953125, "step": 35980 }, { "epoch": 0.31115165454686944, "grad_norm": 6.579587935507502, "learning_rate": 5.648753077639783e-06, "loss": 0.08783340454101562, "step": 35985 }, { "epoch": 0.3111948880684127, "grad_norm": 52.6893497951623, "learning_rate": 5.648657413146087e-06, "loss": 0.6396354675292969, "step": 35990 }, { "epoch": 0.311238121589956, "grad_norm": 12.880564470179982, "learning_rate": 5.648561736437017e-06, "loss": 0.23861846923828126, "step": 35995 }, { "epoch": 0.31128135511149924, "grad_norm": 17.948763864051575, "learning_rate": 5.648466047513015e-06, "loss": 0.313690185546875, "step": 36000 }, { "epoch": 0.3113245886330425, "grad_norm": 4.396402466213762, "learning_rate": 5.648370346374521e-06, "loss": 0.07823028564453124, "step": 36005 }, { "epoch": 0.3113678221545858, "grad_norm": 28.062225305099084, "learning_rate": 5.6482746330219775e-06, "loss": 0.33451385498046876, "step": 36010 }, { "epoch": 0.31141105567612903, "grad_norm": 0.871066410083372, "learning_rate": 5.648178907455825e-06, "loss": 0.1694061279296875, "step": 36015 }, { "epoch": 0.3114542891976723, "grad_norm": 0.27395170317944195, "learning_rate": 5.6480831696765045e-06, "loss": 0.10774765014648438, "step": 36020 }, { "epoch": 0.31149752271921555, "grad_norm": 40.451465154358225, "learning_rate": 5.64798741968446e-06, "loss": 0.193865966796875, "step": 36025 }, { "epoch": 0.31154075624075883, "grad_norm": 2.9563311613417373, "learning_rate": 5.647891657480132e-06, "loss": 0.04491119384765625, "step": 36030 }, { "epoch": 0.3115839897623021, "grad_norm": 5.651078993740017, "learning_rate": 5.647795883063959e-06, "loss": 0.24135169982910157, "step": 36035 }, { "epoch": 0.31162722328384534, "grad_norm": 18.38015509167032, "learning_rate": 5.647700096436388e-06, "loss": 0.09198074340820313, "step": 36040 }, { "epoch": 0.31167045680538863, "grad_norm": 0.9689115736545411, "learning_rate": 5.647604297597857e-06, "loss": 0.19177093505859374, "step": 36045 }, { "epoch": 0.3117136903269319, "grad_norm": 9.39445058454502, "learning_rate": 5.647508486548809e-06, "loss": 0.28518524169921877, "step": 36050 }, { "epoch": 0.31175692384847514, "grad_norm": 14.875254062883432, "learning_rate": 5.6474126632896855e-06, "loss": 0.3167236328125, "step": 36055 }, { "epoch": 0.3118001573700184, "grad_norm": 1.4832160729854087, "learning_rate": 5.6473168278209285e-06, "loss": 0.24754638671875, "step": 36060 }, { "epoch": 0.31184339089156166, "grad_norm": 8.529532511470594, "learning_rate": 5.64722098014298e-06, "loss": 0.246746826171875, "step": 36065 }, { "epoch": 0.31188662441310494, "grad_norm": 15.042938369051177, "learning_rate": 5.647125120256284e-06, "loss": 0.2708282470703125, "step": 36070 }, { "epoch": 0.3119298579346482, "grad_norm": 39.738791967390085, "learning_rate": 5.64702924816128e-06, "loss": 0.15892677307128905, "step": 36075 }, { "epoch": 0.31197309145619145, "grad_norm": 8.822524894233053, "learning_rate": 5.64693336385841e-06, "loss": 0.25106239318847656, "step": 36080 }, { "epoch": 0.31201632497773474, "grad_norm": 0.7586349854726611, "learning_rate": 5.646837467348118e-06, "loss": 0.16045074462890624, "step": 36085 }, { "epoch": 0.312059558499278, "grad_norm": 32.94686149724854, "learning_rate": 5.646741558630845e-06, "loss": 0.22035980224609375, "step": 36090 }, { "epoch": 0.31210279202082125, "grad_norm": 9.163254157606886, "learning_rate": 5.646645637707035e-06, "loss": 0.101788330078125, "step": 36095 }, { "epoch": 0.31214602554236454, "grad_norm": 6.510071913721599, "learning_rate": 5.646549704577128e-06, "loss": 0.129290771484375, "step": 36100 }, { "epoch": 0.31218925906390776, "grad_norm": 116.16321791316622, "learning_rate": 5.6464537592415684e-06, "loss": 0.19212646484375, "step": 36105 }, { "epoch": 0.31223249258545105, "grad_norm": 0.5976687412649849, "learning_rate": 5.646357801700797e-06, "loss": 0.3103675842285156, "step": 36110 }, { "epoch": 0.31227572610699433, "grad_norm": 3.9637629162909818, "learning_rate": 5.646261831955259e-06, "loss": 0.22373046875, "step": 36115 }, { "epoch": 0.31231895962853756, "grad_norm": 1.3880492387265897, "learning_rate": 5.646165850005394e-06, "loss": 0.15402069091796874, "step": 36120 }, { "epoch": 0.31236219315008085, "grad_norm": 5.4828735795492145, "learning_rate": 5.6460698558516455e-06, "loss": 0.2273773193359375, "step": 36125 }, { "epoch": 0.31240542667162413, "grad_norm": 17.603808840311643, "learning_rate": 5.6459738494944565e-06, "loss": 0.4681827545166016, "step": 36130 }, { "epoch": 0.31244866019316736, "grad_norm": 2.9948500540435194, "learning_rate": 5.645877830934271e-06, "loss": 0.3147247314453125, "step": 36135 }, { "epoch": 0.31249189371471064, "grad_norm": 13.181586227499906, "learning_rate": 5.645781800171531e-06, "loss": 0.1822998046875, "step": 36140 }, { "epoch": 0.31253512723625393, "grad_norm": 35.44935757706715, "learning_rate": 5.645685757206678e-06, "loss": 0.28118438720703126, "step": 36145 }, { "epoch": 0.31257836075779716, "grad_norm": 6.866385269614804, "learning_rate": 5.645589702040157e-06, "loss": 0.4679962158203125, "step": 36150 }, { "epoch": 0.31262159427934044, "grad_norm": 6.258179600413498, "learning_rate": 5.6454936346724095e-06, "loss": 0.08848876953125, "step": 36155 }, { "epoch": 0.31266482780088367, "grad_norm": 6.609674540378534, "learning_rate": 5.645397555103879e-06, "loss": 0.3138427734375, "step": 36160 }, { "epoch": 0.31270806132242696, "grad_norm": 0.8102593565836679, "learning_rate": 5.645301463335009e-06, "loss": 0.1062744140625, "step": 36165 }, { "epoch": 0.31275129484397024, "grad_norm": 4.645971545028843, "learning_rate": 5.645205359366242e-06, "loss": 0.183880615234375, "step": 36170 }, { "epoch": 0.31279452836551347, "grad_norm": 36.91984643295803, "learning_rate": 5.645109243198021e-06, "loss": 0.34923782348632815, "step": 36175 }, { "epoch": 0.31283776188705675, "grad_norm": 8.813146398192123, "learning_rate": 5.645013114830791e-06, "loss": 0.23626289367675782, "step": 36180 }, { "epoch": 0.31288099540860004, "grad_norm": 45.79609558856121, "learning_rate": 5.644916974264995e-06, "loss": 0.37676239013671875, "step": 36185 }, { "epoch": 0.31292422893014327, "grad_norm": 14.208456919684124, "learning_rate": 5.644820821501075e-06, "loss": 0.232720947265625, "step": 36190 }, { "epoch": 0.31296746245168655, "grad_norm": 3.500228252597335, "learning_rate": 5.6447246565394744e-06, "loss": 0.189013671875, "step": 36195 }, { "epoch": 0.3130106959732298, "grad_norm": 28.167777341154366, "learning_rate": 5.644628479380637e-06, "loss": 0.241424560546875, "step": 36200 }, { "epoch": 0.31305392949477306, "grad_norm": 1.787782682286997, "learning_rate": 5.644532290025008e-06, "loss": 0.21682281494140626, "step": 36205 }, { "epoch": 0.31309716301631635, "grad_norm": 12.178411201516411, "learning_rate": 5.644436088473029e-06, "loss": 0.23642578125, "step": 36210 }, { "epoch": 0.3131403965378596, "grad_norm": 15.016891324928007, "learning_rate": 5.644339874725145e-06, "loss": 0.1345245361328125, "step": 36215 }, { "epoch": 0.31318363005940286, "grad_norm": 4.035252208264001, "learning_rate": 5.644243648781799e-06, "loss": 0.225762939453125, "step": 36220 }, { "epoch": 0.31322686358094615, "grad_norm": 0.06373940898256869, "learning_rate": 5.644147410643434e-06, "loss": 0.3137836456298828, "step": 36225 }, { "epoch": 0.3132700971024894, "grad_norm": 6.732154451421975, "learning_rate": 5.644051160310496e-06, "loss": 0.17800865173339844, "step": 36230 }, { "epoch": 0.31331333062403266, "grad_norm": 32.0738358842478, "learning_rate": 5.6439548977834266e-06, "loss": 0.5329544067382812, "step": 36235 }, { "epoch": 0.3133565641455759, "grad_norm": 0.31015658134433177, "learning_rate": 5.643858623062672e-06, "loss": 0.05234222412109375, "step": 36240 }, { "epoch": 0.3133997976671192, "grad_norm": 2.4533704962127665, "learning_rate": 5.643762336148674e-06, "loss": 0.082830810546875, "step": 36245 }, { "epoch": 0.31344303118866246, "grad_norm": 30.446875370192284, "learning_rate": 5.643666037041878e-06, "loss": 0.156317138671875, "step": 36250 }, { "epoch": 0.3134862647102057, "grad_norm": 22.59186184607426, "learning_rate": 5.6435697257427274e-06, "loss": 0.154193115234375, "step": 36255 }, { "epoch": 0.31352949823174897, "grad_norm": 22.113672650194246, "learning_rate": 5.643473402251668e-06, "loss": 0.4265174865722656, "step": 36260 }, { "epoch": 0.31357273175329226, "grad_norm": 2.982494745874196, "learning_rate": 5.643377066569142e-06, "loss": 0.12387924194335938, "step": 36265 }, { "epoch": 0.3136159652748355, "grad_norm": 2.3625252608486376, "learning_rate": 5.643280718695595e-06, "loss": 0.1287078857421875, "step": 36270 }, { "epoch": 0.31365919879637877, "grad_norm": 4.792698543283166, "learning_rate": 5.643184358631471e-06, "loss": 0.06996383666992187, "step": 36275 }, { "epoch": 0.313702432317922, "grad_norm": 4.261428823026283, "learning_rate": 5.643087986377214e-06, "loss": 0.041042327880859375, "step": 36280 }, { "epoch": 0.3137456658394653, "grad_norm": 43.80032548821768, "learning_rate": 5.642991601933268e-06, "loss": 0.49761962890625, "step": 36285 }, { "epoch": 0.31378889936100857, "grad_norm": 3.284105798227991, "learning_rate": 5.642895205300079e-06, "loss": 0.1706634521484375, "step": 36290 }, { "epoch": 0.3138321328825518, "grad_norm": 37.35041494792823, "learning_rate": 5.6427987964780915e-06, "loss": 0.12188491821289063, "step": 36295 }, { "epoch": 0.3138753664040951, "grad_norm": 2.0416188854972477, "learning_rate": 5.6427023754677486e-06, "loss": 0.2066934585571289, "step": 36300 }, { "epoch": 0.31391859992563836, "grad_norm": 1.5476805739570194, "learning_rate": 5.642605942269496e-06, "loss": 0.32511558532714846, "step": 36305 }, { "epoch": 0.3139618334471816, "grad_norm": 17.946087009309814, "learning_rate": 5.642509496883778e-06, "loss": 0.20484161376953125, "step": 36310 }, { "epoch": 0.3140050669687249, "grad_norm": 2.917186044861263, "learning_rate": 5.642413039311041e-06, "loss": 0.11628646850585937, "step": 36315 }, { "epoch": 0.31404830049026816, "grad_norm": 29.288636179482594, "learning_rate": 5.642316569551727e-06, "loss": 0.6002525329589844, "step": 36320 }, { "epoch": 0.3140915340118114, "grad_norm": 0.3218126998330867, "learning_rate": 5.642220087606283e-06, "loss": 0.13141994476318358, "step": 36325 }, { "epoch": 0.3141347675333547, "grad_norm": 12.866872720949146, "learning_rate": 5.642123593475154e-06, "loss": 0.09108505249023438, "step": 36330 }, { "epoch": 0.3141780010548979, "grad_norm": 32.13328792170105, "learning_rate": 5.642027087158783e-06, "loss": 0.372222900390625, "step": 36335 }, { "epoch": 0.3142212345764412, "grad_norm": 9.26601930729928, "learning_rate": 5.641930568657618e-06, "loss": 0.17247314453125, "step": 36340 }, { "epoch": 0.3142644680979845, "grad_norm": 20.43849709973703, "learning_rate": 5.641834037972103e-06, "loss": 0.4549278259277344, "step": 36345 }, { "epoch": 0.3143077016195277, "grad_norm": 1.2512491575188984, "learning_rate": 5.6417374951026814e-06, "loss": 0.17237548828125, "step": 36350 }, { "epoch": 0.314350935141071, "grad_norm": 40.476381255753715, "learning_rate": 5.641640940049801e-06, "loss": 0.45835723876953127, "step": 36355 }, { "epoch": 0.31439416866261427, "grad_norm": 0.2981844840518028, "learning_rate": 5.641544372813905e-06, "loss": 0.0301849365234375, "step": 36360 }, { "epoch": 0.3144374021841575, "grad_norm": 7.221158281667303, "learning_rate": 5.641447793395441e-06, "loss": 0.07885360717773438, "step": 36365 }, { "epoch": 0.3144806357057008, "grad_norm": 6.325572689155293, "learning_rate": 5.641351201794852e-06, "loss": 0.14932861328125, "step": 36370 }, { "epoch": 0.314523869227244, "grad_norm": 3.206698499184135, "learning_rate": 5.6412545980125856e-06, "loss": 0.12652587890625, "step": 36375 }, { "epoch": 0.3145671027487873, "grad_norm": 4.713709723179758, "learning_rate": 5.641157982049085e-06, "loss": 0.16484909057617186, "step": 36380 }, { "epoch": 0.3146103362703306, "grad_norm": 2.089452868471237, "learning_rate": 5.641061353904798e-06, "loss": 0.121881103515625, "step": 36385 }, { "epoch": 0.3146535697918738, "grad_norm": 2.754354188522602, "learning_rate": 5.640964713580169e-06, "loss": 0.168505859375, "step": 36390 }, { "epoch": 0.3146968033134171, "grad_norm": 0.8950330493261255, "learning_rate": 5.640868061075645e-06, "loss": 0.40098876953125, "step": 36395 }, { "epoch": 0.3147400368349604, "grad_norm": 0.4801988225700556, "learning_rate": 5.640771396391671e-06, "loss": 0.20274658203125, "step": 36400 }, { "epoch": 0.3147832703565036, "grad_norm": 8.480271102529388, "learning_rate": 5.640674719528692e-06, "loss": 0.33365020751953123, "step": 36405 }, { "epoch": 0.3148265038780469, "grad_norm": 2.536201971278039, "learning_rate": 5.640578030487154e-06, "loss": 0.0491363525390625, "step": 36410 }, { "epoch": 0.3148697373995901, "grad_norm": 9.186433161777597, "learning_rate": 5.640481329267504e-06, "loss": 0.17728195190429688, "step": 36415 }, { "epoch": 0.3149129709211334, "grad_norm": 4.90574359214063, "learning_rate": 5.6403846158701875e-06, "loss": 0.20897216796875, "step": 36420 }, { "epoch": 0.3149562044426767, "grad_norm": 2.4936230686675014, "learning_rate": 5.640287890295651e-06, "loss": 0.2325897216796875, "step": 36425 }, { "epoch": 0.3149994379642199, "grad_norm": 3.3725928355445354, "learning_rate": 5.640191152544339e-06, "loss": 0.25898094177246095, "step": 36430 }, { "epoch": 0.3150426714857632, "grad_norm": 1.657794842875944, "learning_rate": 5.6400944026166995e-06, "loss": 0.133551025390625, "step": 36435 }, { "epoch": 0.3150859050073065, "grad_norm": 1.0141261667352985, "learning_rate": 5.6399976405131776e-06, "loss": 0.16023387908935546, "step": 36440 }, { "epoch": 0.3151291385288497, "grad_norm": 10.907440111957683, "learning_rate": 5.639900866234219e-06, "loss": 0.08040313720703125, "step": 36445 }, { "epoch": 0.315172372050393, "grad_norm": 22.531728783349585, "learning_rate": 5.639804079780272e-06, "loss": 0.245355224609375, "step": 36450 }, { "epoch": 0.31521560557193623, "grad_norm": 0.9096181339172125, "learning_rate": 5.639707281151782e-06, "loss": 0.110662841796875, "step": 36455 }, { "epoch": 0.3152588390934795, "grad_norm": 17.950695764462854, "learning_rate": 5.639610470349194e-06, "loss": 0.2546776294708252, "step": 36460 }, { "epoch": 0.3153020726150228, "grad_norm": 2.685138306219179, "learning_rate": 5.639513647372957e-06, "loss": 0.1288177490234375, "step": 36465 }, { "epoch": 0.31534530613656603, "grad_norm": 17.42598833662186, "learning_rate": 5.639416812223516e-06, "loss": 0.2991455078125, "step": 36470 }, { "epoch": 0.3153885396581093, "grad_norm": 19.368913645441506, "learning_rate": 5.639319964901318e-06, "loss": 0.04398651123046875, "step": 36475 }, { "epoch": 0.3154317731796526, "grad_norm": 43.58490721621504, "learning_rate": 5.639223105406809e-06, "loss": 0.1546539306640625, "step": 36480 }, { "epoch": 0.3154750067011958, "grad_norm": 3.3335590993021227, "learning_rate": 5.639126233740437e-06, "loss": 0.08689498901367188, "step": 36485 }, { "epoch": 0.3155182402227391, "grad_norm": 18.13657394565849, "learning_rate": 5.639029349902647e-06, "loss": 0.11264114379882813, "step": 36490 }, { "epoch": 0.31556147374428234, "grad_norm": 0.8397286120018297, "learning_rate": 5.638932453893888e-06, "loss": 0.451751708984375, "step": 36495 }, { "epoch": 0.3156047072658256, "grad_norm": 5.7705802173409255, "learning_rate": 5.638835545714604e-06, "loss": 0.14169464111328126, "step": 36500 }, { "epoch": 0.3156479407873689, "grad_norm": 3.1536206395363, "learning_rate": 5.6387386253652446e-06, "loss": 0.06692085266113282, "step": 36505 }, { "epoch": 0.31569117430891214, "grad_norm": 4.20674074441131, "learning_rate": 5.638641692846256e-06, "loss": 0.0938232421875, "step": 36510 }, { "epoch": 0.3157344078304554, "grad_norm": 8.894683780298621, "learning_rate": 5.638544748158085e-06, "loss": 0.3994758605957031, "step": 36515 }, { "epoch": 0.3157776413519987, "grad_norm": 1.50396223432959, "learning_rate": 5.638447791301179e-06, "loss": 0.145977783203125, "step": 36520 }, { "epoch": 0.31582087487354193, "grad_norm": 0.36466861244038723, "learning_rate": 5.638350822275984e-06, "loss": 0.039174652099609374, "step": 36525 }, { "epoch": 0.3158641083950852, "grad_norm": 10.208112344346508, "learning_rate": 5.6382538410829495e-06, "loss": 0.069036865234375, "step": 36530 }, { "epoch": 0.3159073419166285, "grad_norm": 5.915102940717511, "learning_rate": 5.63815684772252e-06, "loss": 0.14216232299804688, "step": 36535 }, { "epoch": 0.31595057543817173, "grad_norm": 0.3130655450425709, "learning_rate": 5.6380598421951455e-06, "loss": 0.0346038818359375, "step": 36540 }, { "epoch": 0.315993808959715, "grad_norm": 5.396197733349963, "learning_rate": 5.6379628245012714e-06, "loss": 0.12600555419921874, "step": 36545 }, { "epoch": 0.31603704248125825, "grad_norm": 3.3708028081627237, "learning_rate": 5.637865794641346e-06, "loss": 0.11471633911132813, "step": 36550 }, { "epoch": 0.31608027600280153, "grad_norm": 18.19234128107114, "learning_rate": 5.6377687526158164e-06, "loss": 0.35556488037109374, "step": 36555 }, { "epoch": 0.3161235095243448, "grad_norm": 0.4815261690161264, "learning_rate": 5.637671698425131e-06, "loss": 0.14770050048828126, "step": 36560 }, { "epoch": 0.31616674304588804, "grad_norm": 7.990095016803775, "learning_rate": 5.6375746320697366e-06, "loss": 0.28543243408203123, "step": 36565 }, { "epoch": 0.31620997656743133, "grad_norm": 2.033777081354122, "learning_rate": 5.637477553550081e-06, "loss": 0.215740966796875, "step": 36570 }, { "epoch": 0.3162532100889746, "grad_norm": 36.12961234712313, "learning_rate": 5.637380462866611e-06, "loss": 0.2902008056640625, "step": 36575 }, { "epoch": 0.31629644361051784, "grad_norm": 4.576131489056342, "learning_rate": 5.6372833600197765e-06, "loss": 0.13670148849487304, "step": 36580 }, { "epoch": 0.3163396771320611, "grad_norm": 2.0866067637754293, "learning_rate": 5.637186245010024e-06, "loss": 0.18409423828125, "step": 36585 }, { "epoch": 0.31638291065360435, "grad_norm": 34.106229566009006, "learning_rate": 5.637089117837801e-06, "loss": 0.14426193237304688, "step": 36590 }, { "epoch": 0.31642614417514764, "grad_norm": 14.24875272700083, "learning_rate": 5.636991978503557e-06, "loss": 0.12037487030029297, "step": 36595 }, { "epoch": 0.3164693776966909, "grad_norm": 0.8860143522812555, "learning_rate": 5.636894827007739e-06, "loss": 0.08239402770996093, "step": 36600 }, { "epoch": 0.31651261121823415, "grad_norm": 5.920384930116886, "learning_rate": 5.636797663350794e-06, "loss": 0.08118515014648438, "step": 36605 }, { "epoch": 0.31655584473977744, "grad_norm": 1.117847190597515, "learning_rate": 5.636700487533171e-06, "loss": 0.099072265625, "step": 36610 }, { "epoch": 0.3165990782613207, "grad_norm": 11.17665934439689, "learning_rate": 5.636603299555319e-06, "loss": 0.112200927734375, "step": 36615 }, { "epoch": 0.31664231178286395, "grad_norm": 9.828591333333971, "learning_rate": 5.636506099417686e-06, "loss": 0.1046905517578125, "step": 36620 }, { "epoch": 0.31668554530440723, "grad_norm": 17.882056783847933, "learning_rate": 5.63640888712072e-06, "loss": 0.10691566467285156, "step": 36625 }, { "epoch": 0.31672877882595046, "grad_norm": 0.21643379831206602, "learning_rate": 5.636311662664868e-06, "loss": 0.31190185546875, "step": 36630 }, { "epoch": 0.31677201234749375, "grad_norm": 5.9691526143060845, "learning_rate": 5.63621442605058e-06, "loss": 0.21194534301757811, "step": 36635 }, { "epoch": 0.31681524586903703, "grad_norm": 39.99810488755685, "learning_rate": 5.636117177278304e-06, "loss": 0.35436210632324217, "step": 36640 }, { "epoch": 0.31685847939058026, "grad_norm": 0.6135429585966441, "learning_rate": 5.6360199163484886e-06, "loss": 0.09726486206054688, "step": 36645 }, { "epoch": 0.31690171291212355, "grad_norm": 0.891535450161232, "learning_rate": 5.635922643261583e-06, "loss": 0.130206298828125, "step": 36650 }, { "epoch": 0.31694494643366683, "grad_norm": 4.797436722577689, "learning_rate": 5.6358253580180335e-06, "loss": 0.1116241455078125, "step": 36655 }, { "epoch": 0.31698817995521006, "grad_norm": 22.168859884920085, "learning_rate": 5.6357280606182915e-06, "loss": 0.33324851989746096, "step": 36660 }, { "epoch": 0.31703141347675334, "grad_norm": 5.766207484818412, "learning_rate": 5.635630751062804e-06, "loss": 0.08855438232421875, "step": 36665 }, { "epoch": 0.31707464699829657, "grad_norm": 21.774691574787568, "learning_rate": 5.635533429352021e-06, "loss": 0.35531005859375, "step": 36670 }, { "epoch": 0.31711788051983986, "grad_norm": 0.33552872995205063, "learning_rate": 5.6354360954863905e-06, "loss": 0.1425729751586914, "step": 36675 }, { "epoch": 0.31716111404138314, "grad_norm": 4.919450751233194, "learning_rate": 5.635338749466361e-06, "loss": 0.27325849533081054, "step": 36680 }, { "epoch": 0.31720434756292637, "grad_norm": 2.4661594051270894, "learning_rate": 5.635241391292384e-06, "loss": 0.1161041259765625, "step": 36685 }, { "epoch": 0.31724758108446965, "grad_norm": 7.121216371584509, "learning_rate": 5.635144020964904e-06, "loss": 0.18662490844726562, "step": 36690 }, { "epoch": 0.31729081460601294, "grad_norm": 19.650024329324186, "learning_rate": 5.6350466384843735e-06, "loss": 0.36224365234375, "step": 36695 }, { "epoch": 0.31733404812755617, "grad_norm": 15.62762676113056, "learning_rate": 5.634949243851241e-06, "loss": 0.1425537109375, "step": 36700 }, { "epoch": 0.31737728164909945, "grad_norm": 4.530437968341385, "learning_rate": 5.634851837065956e-06, "loss": 0.12965087890625, "step": 36705 }, { "epoch": 0.31742051517064274, "grad_norm": 11.492791905710199, "learning_rate": 5.634754418128965e-06, "loss": 0.11256103515625, "step": 36710 }, { "epoch": 0.31746374869218597, "grad_norm": 9.233240949138612, "learning_rate": 5.6346569870407215e-06, "loss": 0.1558990478515625, "step": 36715 }, { "epoch": 0.31750698221372925, "grad_norm": 20.169105929479628, "learning_rate": 5.634559543801672e-06, "loss": 0.22905426025390624, "step": 36720 }, { "epoch": 0.3175502157352725, "grad_norm": 25.873260760783786, "learning_rate": 5.634462088412268e-06, "loss": 0.17174854278564453, "step": 36725 }, { "epoch": 0.31759344925681576, "grad_norm": 59.511239045765585, "learning_rate": 5.634364620872956e-06, "loss": 0.15613632202148436, "step": 36730 }, { "epoch": 0.31763668277835905, "grad_norm": 24.98088862713765, "learning_rate": 5.634267141184188e-06, "loss": 0.1267780303955078, "step": 36735 }, { "epoch": 0.3176799162999023, "grad_norm": 5.3182865463173155, "learning_rate": 5.6341696493464124e-06, "loss": 0.21318817138671875, "step": 36740 }, { "epoch": 0.31772314982144556, "grad_norm": 25.885580742887306, "learning_rate": 5.634072145360079e-06, "loss": 0.353033447265625, "step": 36745 }, { "epoch": 0.31776638334298885, "grad_norm": 5.155725450176457, "learning_rate": 5.633974629225638e-06, "loss": 0.12541465759277343, "step": 36750 }, { "epoch": 0.3178096168645321, "grad_norm": 7.142083771118982, "learning_rate": 5.633877100943538e-06, "loss": 0.1433746337890625, "step": 36755 }, { "epoch": 0.31785285038607536, "grad_norm": 2.1262306381881597, "learning_rate": 5.63377956051423e-06, "loss": 0.36841659545898436, "step": 36760 }, { "epoch": 0.3178960839076186, "grad_norm": 1.1033009997324612, "learning_rate": 5.633682007938163e-06, "loss": 0.5308364868164063, "step": 36765 }, { "epoch": 0.31793931742916187, "grad_norm": 10.869408397891235, "learning_rate": 5.633584443215788e-06, "loss": 0.293896484375, "step": 36770 }, { "epoch": 0.31798255095070516, "grad_norm": 9.131879162803989, "learning_rate": 5.6334868663475545e-06, "loss": 0.0884796142578125, "step": 36775 }, { "epoch": 0.3180257844722484, "grad_norm": 4.467223514868299, "learning_rate": 5.6333892773339115e-06, "loss": 0.1447540283203125, "step": 36780 }, { "epoch": 0.31806901799379167, "grad_norm": 11.613929914261695, "learning_rate": 5.63329167617531e-06, "loss": 0.18264312744140626, "step": 36785 }, { "epoch": 0.31811225151533495, "grad_norm": 25.555403566434716, "learning_rate": 5.6331940628721995e-06, "loss": 0.27218055725097656, "step": 36790 }, { "epoch": 0.3181554850368782, "grad_norm": 15.303165493263272, "learning_rate": 5.633096437425032e-06, "loss": 0.15397682189941406, "step": 36795 }, { "epoch": 0.31819871855842147, "grad_norm": 11.134537403562357, "learning_rate": 5.632998799834255e-06, "loss": 0.10951461791992187, "step": 36800 }, { "epoch": 0.3182419520799647, "grad_norm": 8.852692080752362, "learning_rate": 5.632901150100322e-06, "loss": 0.2345367431640625, "step": 36805 }, { "epoch": 0.318285185601508, "grad_norm": 19.079152156058246, "learning_rate": 5.63280348822368e-06, "loss": 0.33469467163085936, "step": 36810 }, { "epoch": 0.31832841912305126, "grad_norm": 40.35381489377063, "learning_rate": 5.63270581420478e-06, "loss": 0.283538818359375, "step": 36815 }, { "epoch": 0.3183716526445945, "grad_norm": 27.603859794200332, "learning_rate": 5.632608128044076e-06, "loss": 0.356903076171875, "step": 36820 }, { "epoch": 0.3184148861661378, "grad_norm": 3.203444776383236, "learning_rate": 5.632510429742014e-06, "loss": 0.12334785461425782, "step": 36825 }, { "epoch": 0.31845811968768106, "grad_norm": 4.253573677068963, "learning_rate": 5.632412719299046e-06, "loss": 0.24838495254516602, "step": 36830 }, { "epoch": 0.3185013532092243, "grad_norm": 6.589335979884317, "learning_rate": 5.632314996715625e-06, "loss": 0.22909469604492189, "step": 36835 }, { "epoch": 0.3185445867307676, "grad_norm": 1.6344578106405352, "learning_rate": 5.6322172619922e-06, "loss": 0.158599853515625, "step": 36840 }, { "epoch": 0.3185878202523108, "grad_norm": 21.600549800799318, "learning_rate": 5.63211951512922e-06, "loss": 0.309381103515625, "step": 36845 }, { "epoch": 0.3186310537738541, "grad_norm": 0.7644131648307001, "learning_rate": 5.632021756127138e-06, "loss": 0.12185592651367187, "step": 36850 }, { "epoch": 0.3186742872953974, "grad_norm": 2.7623614022920955, "learning_rate": 5.6319239849864054e-06, "loss": 0.14165267944335938, "step": 36855 }, { "epoch": 0.3187175208169406, "grad_norm": 1.691653939923438, "learning_rate": 5.6318262017074695e-06, "loss": 0.1630859375, "step": 36860 }, { "epoch": 0.3187607543384839, "grad_norm": 4.676151215113081, "learning_rate": 5.631728406290785e-06, "loss": 0.08607101440429688, "step": 36865 }, { "epoch": 0.31880398786002717, "grad_norm": 24.951604643316834, "learning_rate": 5.6316305987368024e-06, "loss": 0.2847023010253906, "step": 36870 }, { "epoch": 0.3188472213815704, "grad_norm": 14.8444227063645, "learning_rate": 5.631532779045971e-06, "loss": 0.16378173828125, "step": 36875 }, { "epoch": 0.3188904549031137, "grad_norm": 1.8840529433232187, "learning_rate": 5.6314349472187435e-06, "loss": 0.1668701171875, "step": 36880 }, { "epoch": 0.31893368842465697, "grad_norm": 21.587605144712324, "learning_rate": 5.63133710325557e-06, "loss": 0.19289703369140626, "step": 36885 }, { "epoch": 0.3189769219462002, "grad_norm": 7.9405050377729935, "learning_rate": 5.631239247156903e-06, "loss": 0.30325927734375, "step": 36890 }, { "epoch": 0.3190201554677435, "grad_norm": 10.36628868777588, "learning_rate": 5.631141378923192e-06, "loss": 0.09338912963867188, "step": 36895 }, { "epoch": 0.3190633889892867, "grad_norm": 2.3755658903269077, "learning_rate": 5.631043498554891e-06, "loss": 0.1418212890625, "step": 36900 }, { "epoch": 0.31910662251083, "grad_norm": 19.572518105170744, "learning_rate": 5.6309456060524485e-06, "loss": 0.2714805603027344, "step": 36905 }, { "epoch": 0.3191498560323733, "grad_norm": 11.63383774067822, "learning_rate": 5.630847701416318e-06, "loss": 0.08260269165039062, "step": 36910 }, { "epoch": 0.3191930895539165, "grad_norm": 4.717672921469144, "learning_rate": 5.6307497846469505e-06, "loss": 0.090966796875, "step": 36915 }, { "epoch": 0.3192363230754598, "grad_norm": 1.8001464339366322, "learning_rate": 5.630651855744797e-06, "loss": 0.32580108642578126, "step": 36920 }, { "epoch": 0.3192795565970031, "grad_norm": 1.3716365406617632, "learning_rate": 5.630553914710309e-06, "loss": 0.330340576171875, "step": 36925 }, { "epoch": 0.3193227901185463, "grad_norm": 29.813325413128926, "learning_rate": 5.6304559615439395e-06, "loss": 0.16233444213867188, "step": 36930 }, { "epoch": 0.3193660236400896, "grad_norm": 14.51782522412921, "learning_rate": 5.63035799624614e-06, "loss": 0.1075439453125, "step": 36935 }, { "epoch": 0.3194092571616328, "grad_norm": 0.983036617546968, "learning_rate": 5.630260018817362e-06, "loss": 0.058428955078125, "step": 36940 }, { "epoch": 0.3194524906831761, "grad_norm": 6.1643526263587605, "learning_rate": 5.630162029258056e-06, "loss": 0.35060958862304686, "step": 36945 }, { "epoch": 0.3194957242047194, "grad_norm": 1.0021904092939988, "learning_rate": 5.630064027568676e-06, "loss": 0.2297882080078125, "step": 36950 }, { "epoch": 0.3195389577262626, "grad_norm": 6.630891989268332, "learning_rate": 5.629966013749672e-06, "loss": 0.09178466796875, "step": 36955 }, { "epoch": 0.3195821912478059, "grad_norm": 4.674322696624779, "learning_rate": 5.629867987801499e-06, "loss": 0.279241943359375, "step": 36960 }, { "epoch": 0.3196254247693492, "grad_norm": 0.8227584108494539, "learning_rate": 5.629769949724606e-06, "loss": 0.35871734619140627, "step": 36965 }, { "epoch": 0.3196686582908924, "grad_norm": 0.5448761297495306, "learning_rate": 5.629671899519446e-06, "loss": 0.25109710693359377, "step": 36970 }, { "epoch": 0.3197118918124357, "grad_norm": 1.1828786564812785, "learning_rate": 5.629573837186472e-06, "loss": 0.101654052734375, "step": 36975 }, { "epoch": 0.31975512533397893, "grad_norm": 17.540543557646128, "learning_rate": 5.629475762726137e-06, "loss": 0.2330780029296875, "step": 36980 }, { "epoch": 0.3197983588555222, "grad_norm": 6.850133228576783, "learning_rate": 5.62937767613889e-06, "loss": 0.5451377868652344, "step": 36985 }, { "epoch": 0.3198415923770655, "grad_norm": 0.2838517035598962, "learning_rate": 5.629279577425187e-06, "loss": 0.1762847900390625, "step": 36990 }, { "epoch": 0.3198848258986087, "grad_norm": 0.7151843894065918, "learning_rate": 5.629181466585478e-06, "loss": 0.10033035278320312, "step": 36995 }, { "epoch": 0.319928059420152, "grad_norm": 6.030095083799543, "learning_rate": 5.629083343620217e-06, "loss": 0.07364501953125, "step": 37000 }, { "epoch": 0.3199712929416953, "grad_norm": 3.0323301283886264, "learning_rate": 5.628985208529857e-06, "loss": 0.1390625, "step": 37005 }, { "epoch": 0.3200145264632385, "grad_norm": 0.49993709421472715, "learning_rate": 5.628887061314848e-06, "loss": 0.0735992431640625, "step": 37010 }, { "epoch": 0.3200577599847818, "grad_norm": 23.506497259488718, "learning_rate": 5.628788901975644e-06, "loss": 0.24877166748046875, "step": 37015 }, { "epoch": 0.32010099350632504, "grad_norm": 18.053558512932458, "learning_rate": 5.628690730512699e-06, "loss": 0.24826087951660156, "step": 37020 }, { "epoch": 0.3201442270278683, "grad_norm": 7.162325142816544, "learning_rate": 5.628592546926463e-06, "loss": 0.1236083984375, "step": 37025 }, { "epoch": 0.3201874605494116, "grad_norm": 8.1548347640601, "learning_rate": 5.628494351217392e-06, "loss": 0.0752960205078125, "step": 37030 }, { "epoch": 0.32023069407095484, "grad_norm": 5.466974795547418, "learning_rate": 5.6283961433859364e-06, "loss": 0.20205078125, "step": 37035 }, { "epoch": 0.3202739275924981, "grad_norm": 63.624485054746096, "learning_rate": 5.62829792343255e-06, "loss": 0.25340576171875, "step": 37040 }, { "epoch": 0.3203171611140414, "grad_norm": 6.721517021769432, "learning_rate": 5.628199691357686e-06, "loss": 0.1582122802734375, "step": 37045 }, { "epoch": 0.32036039463558463, "grad_norm": 3.1151573263016292, "learning_rate": 5.628101447161798e-06, "loss": 0.196099853515625, "step": 37050 }, { "epoch": 0.3204036281571279, "grad_norm": 10.825571069430422, "learning_rate": 5.628003190845337e-06, "loss": 0.3881683349609375, "step": 37055 }, { "epoch": 0.3204468616786712, "grad_norm": 0.9168146481767577, "learning_rate": 5.627904922408758e-06, "loss": 0.204290771484375, "step": 37060 }, { "epoch": 0.32049009520021443, "grad_norm": 25.13647939599445, "learning_rate": 5.6278066418525145e-06, "loss": 0.43499755859375, "step": 37065 }, { "epoch": 0.3205333287217577, "grad_norm": 7.082850651901783, "learning_rate": 5.627708349177058e-06, "loss": 0.14564552307128906, "step": 37070 }, { "epoch": 0.32057656224330094, "grad_norm": 17.08236189644997, "learning_rate": 5.627610044382844e-06, "loss": 0.36529541015625, "step": 37075 }, { "epoch": 0.32061979576484423, "grad_norm": 2.1922301748496036, "learning_rate": 5.627511727470323e-06, "loss": 0.030698394775390624, "step": 37080 }, { "epoch": 0.3206630292863875, "grad_norm": 5.726385178536389, "learning_rate": 5.627413398439952e-06, "loss": 0.22580718994140625, "step": 37085 }, { "epoch": 0.32070626280793074, "grad_norm": 4.097894334202689, "learning_rate": 5.627315057292182e-06, "loss": 0.20719757080078124, "step": 37090 }, { "epoch": 0.320749496329474, "grad_norm": 10.821116099350968, "learning_rate": 5.627216704027466e-06, "loss": 0.21012725830078124, "step": 37095 }, { "epoch": 0.3207927298510173, "grad_norm": 22.649115859381656, "learning_rate": 5.627118338646259e-06, "loss": 0.15503692626953125, "step": 37100 }, { "epoch": 0.32083596337256054, "grad_norm": 17.76285039098355, "learning_rate": 5.627019961149015e-06, "loss": 0.210003662109375, "step": 37105 }, { "epoch": 0.3208791968941038, "grad_norm": 7.8485128867081135, "learning_rate": 5.626921571536187e-06, "loss": 0.2378173828125, "step": 37110 }, { "epoch": 0.32092243041564705, "grad_norm": 5.496271423492937, "learning_rate": 5.6268231698082276e-06, "loss": 0.107720947265625, "step": 37115 }, { "epoch": 0.32096566393719034, "grad_norm": 10.813879192373664, "learning_rate": 5.626724755965594e-06, "loss": 0.34269256591796876, "step": 37120 }, { "epoch": 0.3210088974587336, "grad_norm": 3.058253327056786, "learning_rate": 5.626626330008736e-06, "loss": 0.36908111572265623, "step": 37125 }, { "epoch": 0.32105213098027685, "grad_norm": 1.586544578085161, "learning_rate": 5.62652789193811e-06, "loss": 0.25142593383789064, "step": 37130 }, { "epoch": 0.32109536450182014, "grad_norm": 1.6601617117217287, "learning_rate": 5.62642944175417e-06, "loss": 0.3154327392578125, "step": 37135 }, { "epoch": 0.3211385980233634, "grad_norm": 4.230477248062447, "learning_rate": 5.6263309794573695e-06, "loss": 0.037237548828125, "step": 37140 }, { "epoch": 0.32118183154490665, "grad_norm": 5.768460301267615, "learning_rate": 5.6262325050481615e-06, "loss": 0.2551849365234375, "step": 37145 }, { "epoch": 0.32122506506644993, "grad_norm": 14.220400188420985, "learning_rate": 5.6261340185270025e-06, "loss": 0.07138137817382813, "step": 37150 }, { "epoch": 0.32126829858799316, "grad_norm": 7.503764928886755, "learning_rate": 5.626035519894345e-06, "loss": 0.1155914306640625, "step": 37155 }, { "epoch": 0.32131153210953645, "grad_norm": 13.671169137458303, "learning_rate": 5.625937009150644e-06, "loss": 0.213568115234375, "step": 37160 }, { "epoch": 0.32135476563107973, "grad_norm": 1.4544511364920483, "learning_rate": 5.625838486296353e-06, "loss": 0.2517662048339844, "step": 37165 }, { "epoch": 0.32139799915262296, "grad_norm": 7.614937390098748, "learning_rate": 5.625739951331927e-06, "loss": 0.100103759765625, "step": 37170 }, { "epoch": 0.32144123267416624, "grad_norm": 1.3224307101413106, "learning_rate": 5.6256414042578206e-06, "loss": 0.08468399047851563, "step": 37175 }, { "epoch": 0.32148446619570953, "grad_norm": 27.069028395606225, "learning_rate": 5.625542845074487e-06, "loss": 0.2826568603515625, "step": 37180 }, { "epoch": 0.32152769971725276, "grad_norm": 4.439266350265442, "learning_rate": 5.625444273782383e-06, "loss": 0.288092041015625, "step": 37185 }, { "epoch": 0.32157093323879604, "grad_norm": 1.2315470790570253, "learning_rate": 5.625345690381962e-06, "loss": 0.154864501953125, "step": 37190 }, { "epoch": 0.32161416676033927, "grad_norm": 2.2048183381469615, "learning_rate": 5.625247094873678e-06, "loss": 0.20987281799316407, "step": 37195 }, { "epoch": 0.32165740028188256, "grad_norm": 3.2233579767422693, "learning_rate": 5.625148487257987e-06, "loss": 0.1316131591796875, "step": 37200 }, { "epoch": 0.32170063380342584, "grad_norm": 14.522547654143757, "learning_rate": 5.6250498675353435e-06, "loss": 0.10253238677978516, "step": 37205 }, { "epoch": 0.32174386732496907, "grad_norm": 20.649893101453586, "learning_rate": 5.624951235706201e-06, "loss": 0.24760360717773439, "step": 37210 }, { "epoch": 0.32178710084651235, "grad_norm": 13.791348934052925, "learning_rate": 5.624852591771016e-06, "loss": 0.076580810546875, "step": 37215 }, { "epoch": 0.32183033436805564, "grad_norm": 4.513962416629074, "learning_rate": 5.6247539357302426e-06, "loss": 0.06124114990234375, "step": 37220 }, { "epoch": 0.32187356788959887, "grad_norm": 8.815108308227936, "learning_rate": 5.624655267584336e-06, "loss": 0.216754150390625, "step": 37225 }, { "epoch": 0.32191680141114215, "grad_norm": 1.9887193592830739, "learning_rate": 5.62455658733375e-06, "loss": 0.04993743896484375, "step": 37230 }, { "epoch": 0.3219600349326854, "grad_norm": 49.74323948413851, "learning_rate": 5.624457894978943e-06, "loss": 0.14323348999023439, "step": 37235 }, { "epoch": 0.32200326845422866, "grad_norm": 3.130825241621033, "learning_rate": 5.6243591905203665e-06, "loss": 0.1172332763671875, "step": 37240 }, { "epoch": 0.32204650197577195, "grad_norm": 24.554431504397563, "learning_rate": 5.6242604739584785e-06, "loss": 0.6036376953125, "step": 37245 }, { "epoch": 0.3220897354973152, "grad_norm": 26.344165688892762, "learning_rate": 5.624161745293732e-06, "loss": 0.2160614013671875, "step": 37250 }, { "epoch": 0.32213296901885846, "grad_norm": 6.005614401768338, "learning_rate": 5.624063004526584e-06, "loss": 0.14528045654296876, "step": 37255 }, { "epoch": 0.32217620254040175, "grad_norm": 39.24389837083869, "learning_rate": 5.623964251657489e-06, "loss": 0.064947509765625, "step": 37260 }, { "epoch": 0.322219436061945, "grad_norm": 12.252570458977928, "learning_rate": 5.623865486686903e-06, "loss": 0.1555816650390625, "step": 37265 }, { "epoch": 0.32226266958348826, "grad_norm": 7.7565242893529565, "learning_rate": 5.623766709615281e-06, "loss": 0.3020050048828125, "step": 37270 }, { "epoch": 0.32230590310503154, "grad_norm": 20.890918183492182, "learning_rate": 5.623667920443079e-06, "loss": 0.12254257202148437, "step": 37275 }, { "epoch": 0.3223491366265748, "grad_norm": 15.127587490633722, "learning_rate": 5.623569119170752e-06, "loss": 0.038043212890625, "step": 37280 }, { "epoch": 0.32239237014811806, "grad_norm": 16.677997664993526, "learning_rate": 5.623470305798756e-06, "loss": 0.21441650390625, "step": 37285 }, { "epoch": 0.3224356036696613, "grad_norm": 0.18967123787981413, "learning_rate": 5.623371480327548e-06, "loss": 0.4420166015625, "step": 37290 }, { "epoch": 0.32247883719120457, "grad_norm": 17.937293008637432, "learning_rate": 5.623272642757582e-06, "loss": 0.43361053466796873, "step": 37295 }, { "epoch": 0.32252207071274785, "grad_norm": 2.380646334786009, "learning_rate": 5.623173793089314e-06, "loss": 0.0865478515625, "step": 37300 }, { "epoch": 0.3225653042342911, "grad_norm": 0.8520401250909803, "learning_rate": 5.6230749313232e-06, "loss": 0.32022705078125, "step": 37305 }, { "epoch": 0.32260853775583437, "grad_norm": 2.6506954543247288, "learning_rate": 5.622976057459696e-06, "loss": 0.05918188095092773, "step": 37310 }, { "epoch": 0.32265177127737765, "grad_norm": 43.10321558766843, "learning_rate": 5.6228771714992584e-06, "loss": 0.458685302734375, "step": 37315 }, { "epoch": 0.3226950047989209, "grad_norm": 5.956459195496378, "learning_rate": 5.6227782734423435e-06, "loss": 0.35469207763671873, "step": 37320 }, { "epoch": 0.32273823832046417, "grad_norm": 29.37001794168585, "learning_rate": 5.622679363289407e-06, "loss": 0.1777435302734375, "step": 37325 }, { "epoch": 0.3227814718420074, "grad_norm": 4.050382299635953, "learning_rate": 5.622580441040905e-06, "loss": 0.405255126953125, "step": 37330 }, { "epoch": 0.3228247053635507, "grad_norm": 4.0042027181882585, "learning_rate": 5.622481506697293e-06, "loss": 0.24608840942382812, "step": 37335 }, { "epoch": 0.32286793888509396, "grad_norm": 0.9604911790760061, "learning_rate": 5.6223825602590295e-06, "loss": 0.0486175537109375, "step": 37340 }, { "epoch": 0.3229111724066372, "grad_norm": 1.2078034542988865, "learning_rate": 5.6222836017265676e-06, "loss": 0.08001708984375, "step": 37345 }, { "epoch": 0.3229544059281805, "grad_norm": 0.7227655647261398, "learning_rate": 5.622184631100366e-06, "loss": 0.08999786376953126, "step": 37350 }, { "epoch": 0.32299763944972376, "grad_norm": 5.261403278646845, "learning_rate": 5.6220856483808814e-06, "loss": 0.193951416015625, "step": 37355 }, { "epoch": 0.323040872971267, "grad_norm": 21.151286035016376, "learning_rate": 5.621986653568569e-06, "loss": 0.19678955078125, "step": 37360 }, { "epoch": 0.3230841064928103, "grad_norm": 13.708642913095833, "learning_rate": 5.621887646663885e-06, "loss": 0.24400634765625, "step": 37365 }, { "epoch": 0.3231273400143535, "grad_norm": 12.23987812529214, "learning_rate": 5.621788627667288e-06, "loss": 0.19259185791015626, "step": 37370 }, { "epoch": 0.3231705735358968, "grad_norm": 19.230456438127085, "learning_rate": 5.621689596579233e-06, "loss": 0.464593505859375, "step": 37375 }, { "epoch": 0.3232138070574401, "grad_norm": 4.345070429443906, "learning_rate": 5.621590553400177e-06, "loss": 0.4001220703125, "step": 37380 }, { "epoch": 0.3232570405789833, "grad_norm": 0.1477072580576217, "learning_rate": 5.6214914981305775e-06, "loss": 0.5555755615234375, "step": 37385 }, { "epoch": 0.3233002741005266, "grad_norm": 13.167001463722636, "learning_rate": 5.6213924307708906e-06, "loss": 0.20269927978515626, "step": 37390 }, { "epoch": 0.32334350762206987, "grad_norm": 7.9613664006794576, "learning_rate": 5.6212933513215735e-06, "loss": 0.0721811294555664, "step": 37395 }, { "epoch": 0.3233867411436131, "grad_norm": 5.431215053439904, "learning_rate": 5.621194259783083e-06, "loss": 0.0388336181640625, "step": 37400 }, { "epoch": 0.3234299746651564, "grad_norm": 9.826768977107184, "learning_rate": 5.621095156155877e-06, "loss": 0.3612518310546875, "step": 37405 }, { "epoch": 0.3234732081866996, "grad_norm": 0.5528147629471026, "learning_rate": 5.620996040440411e-06, "loss": 0.0414031982421875, "step": 37410 }, { "epoch": 0.3235164417082429, "grad_norm": 26.235122518132084, "learning_rate": 5.620896912637143e-06, "loss": 0.2106781005859375, "step": 37415 }, { "epoch": 0.3235596752297862, "grad_norm": 1.573701769912241, "learning_rate": 5.6207977727465295e-06, "loss": 0.0773529052734375, "step": 37420 }, { "epoch": 0.3236029087513294, "grad_norm": 1.8972548436641563, "learning_rate": 5.620698620769029e-06, "loss": 0.1374176025390625, "step": 37425 }, { "epoch": 0.3236461422728727, "grad_norm": 0.857558987106072, "learning_rate": 5.620599456705097e-06, "loss": 0.1270050048828125, "step": 37430 }, { "epoch": 0.323689375794416, "grad_norm": 9.881337001236504, "learning_rate": 5.620500280555193e-06, "loss": 0.40493927001953123, "step": 37435 }, { "epoch": 0.3237326093159592, "grad_norm": 66.93108347085128, "learning_rate": 5.6204010923197725e-06, "loss": 0.542669677734375, "step": 37440 }, { "epoch": 0.3237758428375025, "grad_norm": 39.120345916620586, "learning_rate": 5.620301891999294e-06, "loss": 0.150482177734375, "step": 37445 }, { "epoch": 0.3238190763590458, "grad_norm": 7.113556954866094, "learning_rate": 5.620202679594215e-06, "loss": 0.1087371826171875, "step": 37450 }, { "epoch": 0.323862309880589, "grad_norm": 1.3810068543249465, "learning_rate": 5.620103455104992e-06, "loss": 0.1147430419921875, "step": 37455 }, { "epoch": 0.3239055434021323, "grad_norm": 4.966714034582978, "learning_rate": 5.620004218532084e-06, "loss": 0.0931488037109375, "step": 37460 }, { "epoch": 0.3239487769236755, "grad_norm": 10.523251527796218, "learning_rate": 5.619904969875948e-06, "loss": 0.0693115234375, "step": 37465 }, { "epoch": 0.3239920104452188, "grad_norm": 1.8961541134037654, "learning_rate": 5.619805709137041e-06, "loss": 0.2456806182861328, "step": 37470 }, { "epoch": 0.3240352439667621, "grad_norm": 15.27723564179116, "learning_rate": 5.619706436315823e-06, "loss": 0.30880889892578123, "step": 37475 }, { "epoch": 0.3240784774883053, "grad_norm": 20.175467389708235, "learning_rate": 5.6196071514127496e-06, "loss": 0.09768142700195312, "step": 37480 }, { "epoch": 0.3241217110098486, "grad_norm": 0.40135313396765987, "learning_rate": 5.61950785442828e-06, "loss": 0.07455978393554688, "step": 37485 }, { "epoch": 0.3241649445313919, "grad_norm": 23.09813425248671, "learning_rate": 5.6194085453628705e-06, "loss": 0.3204620361328125, "step": 37490 }, { "epoch": 0.3242081780529351, "grad_norm": 7.7356360543725415, "learning_rate": 5.619309224216981e-06, "loss": 0.5575942993164062, "step": 37495 }, { "epoch": 0.3242514115744784, "grad_norm": 1.4090350125349351, "learning_rate": 5.619209890991068e-06, "loss": 0.3174957275390625, "step": 37500 }, { "epoch": 0.3242946450960216, "grad_norm": 3.433282584553204, "learning_rate": 5.619110545685592e-06, "loss": 0.040335464477539065, "step": 37505 }, { "epoch": 0.3243378786175649, "grad_norm": 26.050265990620264, "learning_rate": 5.6190111883010095e-06, "loss": 0.314654541015625, "step": 37510 }, { "epoch": 0.3243811121391082, "grad_norm": 0.7752547261003798, "learning_rate": 5.618911818837777e-06, "loss": 0.14626235961914064, "step": 37515 }, { "epoch": 0.3244243456606514, "grad_norm": 3.4812562651208236, "learning_rate": 5.618812437296356e-06, "loss": 0.1289154052734375, "step": 37520 }, { "epoch": 0.3244675791821947, "grad_norm": 3.5551892525468096, "learning_rate": 5.618713043677203e-06, "loss": 0.5362045288085937, "step": 37525 }, { "epoch": 0.324510812703738, "grad_norm": 0.18964676601151612, "learning_rate": 5.618613637980777e-06, "loss": 0.05323028564453125, "step": 37530 }, { "epoch": 0.3245540462252812, "grad_norm": 30.286318901828267, "learning_rate": 5.618514220207536e-06, "loss": 0.28941650390625, "step": 37535 }, { "epoch": 0.3245972797468245, "grad_norm": 7.765644667901948, "learning_rate": 5.618414790357939e-06, "loss": 0.1314361572265625, "step": 37540 }, { "epoch": 0.32464051326836774, "grad_norm": 3.121678984413102, "learning_rate": 5.6183153484324445e-06, "loss": 0.0374969482421875, "step": 37545 }, { "epoch": 0.324683746789911, "grad_norm": 14.12023787797309, "learning_rate": 5.618215894431511e-06, "loss": 0.0993194580078125, "step": 37550 }, { "epoch": 0.3247269803114543, "grad_norm": 11.370432343729693, "learning_rate": 5.618116428355597e-06, "loss": 0.12915573120117188, "step": 37555 }, { "epoch": 0.32477021383299753, "grad_norm": 8.57467897080112, "learning_rate": 5.618016950205162e-06, "loss": 0.142041015625, "step": 37560 }, { "epoch": 0.3248134473545408, "grad_norm": 3.3555675761141144, "learning_rate": 5.6179174599806625e-06, "loss": 0.15925064086914062, "step": 37565 }, { "epoch": 0.3248566808760841, "grad_norm": 5.749319315004916, "learning_rate": 5.617817957682561e-06, "loss": 0.272308349609375, "step": 37570 }, { "epoch": 0.32489991439762733, "grad_norm": 4.6707393330510305, "learning_rate": 5.617718443311312e-06, "loss": 0.3099212646484375, "step": 37575 }, { "epoch": 0.3249431479191706, "grad_norm": 38.43450995211305, "learning_rate": 5.6176189168673785e-06, "loss": 0.0857940673828125, "step": 37580 }, { "epoch": 0.32498638144071385, "grad_norm": 3.4914246155518627, "learning_rate": 5.617519378351218e-06, "loss": 0.03741741180419922, "step": 37585 }, { "epoch": 0.32502961496225713, "grad_norm": 4.001214302674825, "learning_rate": 5.617419827763289e-06, "loss": 0.20240478515625, "step": 37590 }, { "epoch": 0.3250728484838004, "grad_norm": 28.552335528541168, "learning_rate": 5.617320265104051e-06, "loss": 0.20914764404296876, "step": 37595 }, { "epoch": 0.32511608200534364, "grad_norm": 1.406977953018703, "learning_rate": 5.617220690373963e-06, "loss": 0.08170166015625, "step": 37600 }, { "epoch": 0.3251593155268869, "grad_norm": 43.452336571922324, "learning_rate": 5.617121103573485e-06, "loss": 0.366943359375, "step": 37605 }, { "epoch": 0.3252025490484302, "grad_norm": 20.352297881781283, "learning_rate": 5.617021504703075e-06, "loss": 0.6944969177246094, "step": 37610 }, { "epoch": 0.32524578256997344, "grad_norm": 21.198306422700256, "learning_rate": 5.616921893763194e-06, "loss": 0.5196090698242187, "step": 37615 }, { "epoch": 0.3252890160915167, "grad_norm": 19.362642167065825, "learning_rate": 5.6168222707543e-06, "loss": 0.211444091796875, "step": 37620 }, { "epoch": 0.32533224961306, "grad_norm": 28.33893728823563, "learning_rate": 5.616722635676853e-06, "loss": 0.21119918823242187, "step": 37625 }, { "epoch": 0.32537548313460324, "grad_norm": 52.19543659760939, "learning_rate": 5.616622988531311e-06, "loss": 0.2222442626953125, "step": 37630 }, { "epoch": 0.3254187166561465, "grad_norm": 23.675582384596748, "learning_rate": 5.616523329318137e-06, "loss": 0.22119522094726562, "step": 37635 }, { "epoch": 0.32546195017768975, "grad_norm": 31.397751186289202, "learning_rate": 5.616423658037789e-06, "loss": 0.612689208984375, "step": 37640 }, { "epoch": 0.32550518369923304, "grad_norm": 1.6183373729245616, "learning_rate": 5.616323974690725e-06, "loss": 0.6133386611938476, "step": 37645 }, { "epoch": 0.3255484172207763, "grad_norm": 5.878341080380693, "learning_rate": 5.616224279277406e-06, "loss": 0.2246551513671875, "step": 37650 }, { "epoch": 0.32559165074231955, "grad_norm": 2.838040564869151, "learning_rate": 5.616124571798291e-06, "loss": 0.11103363037109375, "step": 37655 }, { "epoch": 0.32563488426386283, "grad_norm": 4.351187985969005, "learning_rate": 5.616024852253842e-06, "loss": 0.26490478515625, "step": 37660 }, { "epoch": 0.3256781177854061, "grad_norm": 19.33497460097848, "learning_rate": 5.615925120644517e-06, "loss": 0.26226806640625, "step": 37665 }, { "epoch": 0.32572135130694935, "grad_norm": 38.16436868224441, "learning_rate": 5.615825376970776e-06, "loss": 0.45013427734375, "step": 37670 }, { "epoch": 0.32576458482849263, "grad_norm": 12.332146661373327, "learning_rate": 5.615725621233081e-06, "loss": 0.170440673828125, "step": 37675 }, { "epoch": 0.32580781835003586, "grad_norm": 4.863917047655223, "learning_rate": 5.61562585343189e-06, "loss": 0.203302001953125, "step": 37680 }, { "epoch": 0.32585105187157914, "grad_norm": 26.890131048633126, "learning_rate": 5.615526073567663e-06, "loss": 0.19800796508789062, "step": 37685 }, { "epoch": 0.32589428539312243, "grad_norm": 6.315145364278203, "learning_rate": 5.6154262816408605e-06, "loss": 0.2595458984375, "step": 37690 }, { "epoch": 0.32593751891466566, "grad_norm": 3.8736815631072643, "learning_rate": 5.615326477651945e-06, "loss": 0.3333984375, "step": 37695 }, { "epoch": 0.32598075243620894, "grad_norm": 3.838911432546424, "learning_rate": 5.615226661601373e-06, "loss": 0.261883544921875, "step": 37700 }, { "epoch": 0.3260239859577522, "grad_norm": 36.65960553667265, "learning_rate": 5.615126833489607e-06, "loss": 0.17848663330078124, "step": 37705 }, { "epoch": 0.32606721947929546, "grad_norm": 24.844133513143586, "learning_rate": 5.615026993317108e-06, "loss": 0.1850067138671875, "step": 37710 }, { "epoch": 0.32611045300083874, "grad_norm": 5.8514574141295155, "learning_rate": 5.614927141084335e-06, "loss": 0.09763031005859375, "step": 37715 }, { "epoch": 0.32615368652238197, "grad_norm": 0.2678208633980899, "learning_rate": 5.614827276791749e-06, "loss": 0.03686676025390625, "step": 37720 }, { "epoch": 0.32619692004392525, "grad_norm": 30.396335605744646, "learning_rate": 5.614727400439811e-06, "loss": 0.2667816162109375, "step": 37725 }, { "epoch": 0.32624015356546854, "grad_norm": 5.658361934101215, "learning_rate": 5.6146275120289815e-06, "loss": 0.06754150390625, "step": 37730 }, { "epoch": 0.32628338708701177, "grad_norm": 2.7211195846211016, "learning_rate": 5.614527611559721e-06, "loss": 0.0559906005859375, "step": 37735 }, { "epoch": 0.32632662060855505, "grad_norm": 0.8880698299565748, "learning_rate": 5.6144276990324886e-06, "loss": 0.23241958618164063, "step": 37740 }, { "epoch": 0.32636985413009834, "grad_norm": 4.6027237713850795, "learning_rate": 5.614327774447748e-06, "loss": 0.0743072509765625, "step": 37745 }, { "epoch": 0.32641308765164156, "grad_norm": 39.06238572753609, "learning_rate": 5.614227837805959e-06, "loss": 0.1067474365234375, "step": 37750 }, { "epoch": 0.32645632117318485, "grad_norm": 1.1962450469180181, "learning_rate": 5.614127889107582e-06, "loss": 0.045688629150390625, "step": 37755 }, { "epoch": 0.3264995546947281, "grad_norm": 29.86143926036908, "learning_rate": 5.614027928353078e-06, "loss": 0.1770050048828125, "step": 37760 }, { "epoch": 0.32654278821627136, "grad_norm": 19.439573604915896, "learning_rate": 5.613927955542908e-06, "loss": 0.19559326171875, "step": 37765 }, { "epoch": 0.32658602173781465, "grad_norm": 13.268822653725067, "learning_rate": 5.613827970677533e-06, "loss": 0.6003631591796875, "step": 37770 }, { "epoch": 0.3266292552593579, "grad_norm": 1.4093126325227212, "learning_rate": 5.613727973757414e-06, "loss": 0.14278945922851563, "step": 37775 }, { "epoch": 0.32667248878090116, "grad_norm": 4.165609214143688, "learning_rate": 5.6136279647830135e-06, "loss": 0.1238983154296875, "step": 37780 }, { "epoch": 0.32671572230244444, "grad_norm": 1.402347390344944, "learning_rate": 5.613527943754792e-06, "loss": 0.27465362548828126, "step": 37785 }, { "epoch": 0.3267589558239877, "grad_norm": 2.4970399497320184, "learning_rate": 5.61342791067321e-06, "loss": 0.06112823486328125, "step": 37790 }, { "epoch": 0.32680218934553096, "grad_norm": 3.700953556414398, "learning_rate": 5.613327865538729e-06, "loss": 0.10537261962890625, "step": 37795 }, { "epoch": 0.32684542286707424, "grad_norm": 2.186864835927223, "learning_rate": 5.613227808351812e-06, "loss": 0.08505706787109375, "step": 37800 }, { "epoch": 0.32688865638861747, "grad_norm": 9.028250280636414, "learning_rate": 5.613127739112918e-06, "loss": 0.5192619323730469, "step": 37805 }, { "epoch": 0.32693188991016076, "grad_norm": 3.7719651433310393, "learning_rate": 5.61302765782251e-06, "loss": 0.0544647216796875, "step": 37810 }, { "epoch": 0.326975123431704, "grad_norm": 37.49363276951319, "learning_rate": 5.61292756448105e-06, "loss": 0.313104248046875, "step": 37815 }, { "epoch": 0.32701835695324727, "grad_norm": 8.805176236931267, "learning_rate": 5.612827459088999e-06, "loss": 0.05970001220703125, "step": 37820 }, { "epoch": 0.32706159047479055, "grad_norm": 14.14319899220817, "learning_rate": 5.612727341646818e-06, "loss": 0.24501266479492187, "step": 37825 }, { "epoch": 0.3271048239963338, "grad_norm": 5.494363220350285, "learning_rate": 5.612627212154969e-06, "loss": 0.12558135986328126, "step": 37830 }, { "epoch": 0.32714805751787707, "grad_norm": 5.476868508092531, "learning_rate": 5.6125270706139144e-06, "loss": 0.13477783203125, "step": 37835 }, { "epoch": 0.32719129103942035, "grad_norm": 33.634925716528656, "learning_rate": 5.612426917024117e-06, "loss": 0.14063720703125, "step": 37840 }, { "epoch": 0.3272345245609636, "grad_norm": 0.6185941436473318, "learning_rate": 5.612326751386036e-06, "loss": 0.126953125, "step": 37845 }, { "epoch": 0.32727775808250686, "grad_norm": 36.513327617766656, "learning_rate": 5.612226573700135e-06, "loss": 0.5211181640625, "step": 37850 }, { "epoch": 0.3273209916040501, "grad_norm": 19.94161460449619, "learning_rate": 5.612126383966877e-06, "loss": 0.123974609375, "step": 37855 }, { "epoch": 0.3273642251255934, "grad_norm": 22.360511821770466, "learning_rate": 5.612026182186722e-06, "loss": 0.1098388671875, "step": 37860 }, { "epoch": 0.32740745864713666, "grad_norm": 4.264160815508514, "learning_rate": 5.611925968360133e-06, "loss": 0.19434432983398436, "step": 37865 }, { "epoch": 0.3274506921686799, "grad_norm": 11.575474421589822, "learning_rate": 5.611825742487572e-06, "loss": 0.176068115234375, "step": 37870 }, { "epoch": 0.3274939256902232, "grad_norm": 23.38840436237618, "learning_rate": 5.611725504569503e-06, "loss": 0.21048393249511718, "step": 37875 }, { "epoch": 0.32753715921176646, "grad_norm": 34.163782360300964, "learning_rate": 5.611625254606385e-06, "loss": 0.3269775390625, "step": 37880 }, { "epoch": 0.3275803927333097, "grad_norm": 1.496926315544175, "learning_rate": 5.611524992598683e-06, "loss": 0.07734184265136719, "step": 37885 }, { "epoch": 0.327623626254853, "grad_norm": 20.07365701337901, "learning_rate": 5.611424718546858e-06, "loss": 0.1415252685546875, "step": 37890 }, { "epoch": 0.3276668597763962, "grad_norm": 19.39430425443837, "learning_rate": 5.611324432451373e-06, "loss": 0.21524200439453126, "step": 37895 }, { "epoch": 0.3277100932979395, "grad_norm": 29.85924039314617, "learning_rate": 5.611224134312691e-06, "loss": 0.333477783203125, "step": 37900 }, { "epoch": 0.32775332681948277, "grad_norm": 21.886450488951574, "learning_rate": 5.611123824131274e-06, "loss": 0.22770538330078124, "step": 37905 }, { "epoch": 0.327796560341026, "grad_norm": 3.3322600498355053, "learning_rate": 5.6110235019075845e-06, "loss": 0.070025634765625, "step": 37910 }, { "epoch": 0.3278397938625693, "grad_norm": 2.3651604865844336, "learning_rate": 5.6109231676420855e-06, "loss": 0.02249908447265625, "step": 37915 }, { "epoch": 0.32788302738411257, "grad_norm": 18.187956638446337, "learning_rate": 5.610822821335239e-06, "loss": 0.056463623046875, "step": 37920 }, { "epoch": 0.3279262609056558, "grad_norm": 0.8160300922845001, "learning_rate": 5.610722462987508e-06, "loss": 0.155889892578125, "step": 37925 }, { "epoch": 0.3279694944271991, "grad_norm": 8.641447157447741, "learning_rate": 5.610622092599357e-06, "loss": 0.1099884033203125, "step": 37930 }, { "epoch": 0.3280127279487423, "grad_norm": 0.7183657623522692, "learning_rate": 5.610521710171247e-06, "loss": 0.0771148681640625, "step": 37935 }, { "epoch": 0.3280559614702856, "grad_norm": 6.08353336832198, "learning_rate": 5.610421315703641e-06, "loss": 0.07274665832519531, "step": 37940 }, { "epoch": 0.3280991949918289, "grad_norm": 5.612844994968761, "learning_rate": 5.610320909197003e-06, "loss": 0.112164306640625, "step": 37945 }, { "epoch": 0.3281424285133721, "grad_norm": 28.397378052973693, "learning_rate": 5.610220490651796e-06, "loss": 0.24570541381835936, "step": 37950 }, { "epoch": 0.3281856620349154, "grad_norm": 48.25345673816122, "learning_rate": 5.610120060068482e-06, "loss": 0.196282958984375, "step": 37955 }, { "epoch": 0.3282288955564587, "grad_norm": 16.219464205121973, "learning_rate": 5.610019617447526e-06, "loss": 0.12397184371948242, "step": 37960 }, { "epoch": 0.3282721290780019, "grad_norm": 0.9015148744340301, "learning_rate": 5.609919162789388e-06, "loss": 0.10326461791992188, "step": 37965 }, { "epoch": 0.3283153625995452, "grad_norm": 26.223293120483763, "learning_rate": 5.609818696094536e-06, "loss": 0.256951904296875, "step": 37970 }, { "epoch": 0.3283585961210884, "grad_norm": 9.447119393024689, "learning_rate": 5.60971821736343e-06, "loss": 0.09398193359375, "step": 37975 }, { "epoch": 0.3284018296426317, "grad_norm": 6.069111400329955, "learning_rate": 5.609617726596533e-06, "loss": 0.5559722900390625, "step": 37980 }, { "epoch": 0.328445063164175, "grad_norm": 2.2426253153815825, "learning_rate": 5.609517223794311e-06, "loss": 0.0354522705078125, "step": 37985 }, { "epoch": 0.3284882966857182, "grad_norm": 3.05565396905186, "learning_rate": 5.609416708957225e-06, "loss": 0.04483108520507813, "step": 37990 }, { "epoch": 0.3285315302072615, "grad_norm": 20.423779498446645, "learning_rate": 5.60931618208574e-06, "loss": 0.147625732421875, "step": 37995 }, { "epoch": 0.3285747637288048, "grad_norm": 10.234315931624227, "learning_rate": 5.6092156431803195e-06, "loss": 0.1276763916015625, "step": 38000 }, { "epoch": 0.328617997250348, "grad_norm": 24.27801906768439, "learning_rate": 5.609115092241427e-06, "loss": 0.2525520324707031, "step": 38005 }, { "epoch": 0.3286612307718913, "grad_norm": 45.003376801452276, "learning_rate": 5.609014529269526e-06, "loss": 0.4483001708984375, "step": 38010 }, { "epoch": 0.3287044642934346, "grad_norm": 11.387973301540423, "learning_rate": 5.608913954265082e-06, "loss": 0.14645614624023437, "step": 38015 }, { "epoch": 0.3287476978149778, "grad_norm": 0.6524681305444504, "learning_rate": 5.608813367228556e-06, "loss": 0.29562911987304685, "step": 38020 }, { "epoch": 0.3287909313365211, "grad_norm": 8.417172085143244, "learning_rate": 5.6087127681604136e-06, "loss": 0.0958709716796875, "step": 38025 }, { "epoch": 0.3288341648580643, "grad_norm": 1.8107558634493008, "learning_rate": 5.6086121570611184e-06, "loss": 0.1254364013671875, "step": 38030 }, { "epoch": 0.3288773983796076, "grad_norm": 14.995190156747963, "learning_rate": 5.608511533931134e-06, "loss": 0.07496871948242187, "step": 38035 }, { "epoch": 0.3289206319011509, "grad_norm": 0.7596559981456072, "learning_rate": 5.608410898770925e-06, "loss": 0.2003509521484375, "step": 38040 }, { "epoch": 0.3289638654226941, "grad_norm": 4.207865708941754, "learning_rate": 5.608310251580956e-06, "loss": 0.4789581298828125, "step": 38045 }, { "epoch": 0.3290070989442374, "grad_norm": 0.7120858398534542, "learning_rate": 5.608209592361691e-06, "loss": 0.1657470703125, "step": 38050 }, { "epoch": 0.3290503324657807, "grad_norm": 10.562959691982181, "learning_rate": 5.608108921113592e-06, "loss": 0.20445947647094725, "step": 38055 }, { "epoch": 0.3290935659873239, "grad_norm": 4.093798930421912, "learning_rate": 5.6080082378371265e-06, "loss": 0.11235809326171875, "step": 38060 }, { "epoch": 0.3291367995088672, "grad_norm": 6.521531671748213, "learning_rate": 5.607907542532757e-06, "loss": 0.11488304138183594, "step": 38065 }, { "epoch": 0.32918003303041043, "grad_norm": 1.656692885324622, "learning_rate": 5.607806835200949e-06, "loss": 0.089801025390625, "step": 38070 }, { "epoch": 0.3292232665519537, "grad_norm": 5.291803191241972, "learning_rate": 5.607706115842166e-06, "loss": 0.2637847900390625, "step": 38075 }, { "epoch": 0.329266500073497, "grad_norm": 2.6033358469496326, "learning_rate": 5.607605384456872e-06, "loss": 0.051100540161132815, "step": 38080 }, { "epoch": 0.32930973359504023, "grad_norm": 26.116429157778963, "learning_rate": 5.607504641045533e-06, "loss": 0.3741546630859375, "step": 38085 }, { "epoch": 0.3293529671165835, "grad_norm": 2.7363023352477827, "learning_rate": 5.607403885608614e-06, "loss": 0.049716758728027347, "step": 38090 }, { "epoch": 0.3293962006381268, "grad_norm": 2.1405011845698554, "learning_rate": 5.607303118146577e-06, "loss": 0.048204803466796876, "step": 38095 }, { "epoch": 0.32943943415967003, "grad_norm": 23.193488407486985, "learning_rate": 5.6072023386598896e-06, "loss": 0.15914154052734375, "step": 38100 }, { "epoch": 0.3294826676812133, "grad_norm": 1.256997146897094, "learning_rate": 5.607101547149014e-06, "loss": 0.05874710083007813, "step": 38105 }, { "epoch": 0.32952590120275654, "grad_norm": 16.997469995446544, "learning_rate": 5.607000743614418e-06, "loss": 0.490826416015625, "step": 38110 }, { "epoch": 0.32956913472429983, "grad_norm": 2.536542052192595, "learning_rate": 5.606899928056564e-06, "loss": 0.08173828125, "step": 38115 }, { "epoch": 0.3296123682458431, "grad_norm": 0.5422481475251945, "learning_rate": 5.606799100475918e-06, "loss": 0.1930908203125, "step": 38120 }, { "epoch": 0.32965560176738634, "grad_norm": 0.7975993844185755, "learning_rate": 5.6066982608729455e-06, "loss": 0.06838760375976563, "step": 38125 }, { "epoch": 0.3296988352889296, "grad_norm": 16.92134761671956, "learning_rate": 5.60659740924811e-06, "loss": 0.09665145874023437, "step": 38130 }, { "epoch": 0.3297420688104729, "grad_norm": 11.113142685526107, "learning_rate": 5.606496545601878e-06, "loss": 0.163067626953125, "step": 38135 }, { "epoch": 0.32978530233201614, "grad_norm": 1.7030894177719074, "learning_rate": 5.606395669934715e-06, "loss": 0.0126129150390625, "step": 38140 }, { "epoch": 0.3298285358535594, "grad_norm": 75.86787920530816, "learning_rate": 5.6062947822470845e-06, "loss": 0.36470794677734375, "step": 38145 }, { "epoch": 0.32987176937510265, "grad_norm": 6.40181828407473, "learning_rate": 5.6061938825394535e-06, "loss": 0.2074127197265625, "step": 38150 }, { "epoch": 0.32991500289664594, "grad_norm": 6.997173219905954, "learning_rate": 5.606092970812286e-06, "loss": 0.3090728759765625, "step": 38155 }, { "epoch": 0.3299582364181892, "grad_norm": 3.3491408705250625, "learning_rate": 5.605992047066047e-06, "loss": 0.10818061828613282, "step": 38160 }, { "epoch": 0.33000146993973245, "grad_norm": 1.522536794760129, "learning_rate": 5.6058911113012045e-06, "loss": 0.12070770263671875, "step": 38165 }, { "epoch": 0.33004470346127573, "grad_norm": 5.003320087297679, "learning_rate": 5.605790163518222e-06, "loss": 0.16214828491210936, "step": 38170 }, { "epoch": 0.330087936982819, "grad_norm": 5.160809786873963, "learning_rate": 5.605689203717566e-06, "loss": 0.061932373046875, "step": 38175 }, { "epoch": 0.33013117050436225, "grad_norm": 1.246503031424561, "learning_rate": 5.605588231899701e-06, "loss": 0.0829803466796875, "step": 38180 }, { "epoch": 0.33017440402590553, "grad_norm": 3.4472656014438603, "learning_rate": 5.605487248065094e-06, "loss": 0.12843704223632812, "step": 38185 }, { "epoch": 0.3302176375474488, "grad_norm": 25.906123396372646, "learning_rate": 5.605386252214208e-06, "loss": 0.24125289916992188, "step": 38190 }, { "epoch": 0.33026087106899205, "grad_norm": 1.5703142371014143, "learning_rate": 5.605285244347514e-06, "loss": 0.049755287170410153, "step": 38195 }, { "epoch": 0.33030410459053533, "grad_norm": 9.73540031207226, "learning_rate": 5.605184224465472e-06, "loss": 0.1035430908203125, "step": 38200 }, { "epoch": 0.33034733811207856, "grad_norm": 0.625786520275332, "learning_rate": 5.605083192568552e-06, "loss": 0.09929065704345703, "step": 38205 }, { "epoch": 0.33039057163362184, "grad_norm": 26.392425284175456, "learning_rate": 5.6049821486572185e-06, "loss": 0.12117166519165039, "step": 38210 }, { "epoch": 0.33043380515516513, "grad_norm": 17.223566174645413, "learning_rate": 5.604881092731936e-06, "loss": 0.36929664611816404, "step": 38215 }, { "epoch": 0.33047703867670836, "grad_norm": 2.5142073934762217, "learning_rate": 5.604780024793174e-06, "loss": 0.08039093017578125, "step": 38220 }, { "epoch": 0.33052027219825164, "grad_norm": 4.363548245664671, "learning_rate": 5.604678944841396e-06, "loss": 0.11904182434082031, "step": 38225 }, { "epoch": 0.3305635057197949, "grad_norm": 46.54928248002371, "learning_rate": 5.604577852877068e-06, "loss": 0.046643829345703124, "step": 38230 }, { "epoch": 0.33060673924133815, "grad_norm": 1.094922074112609, "learning_rate": 5.604476748900659e-06, "loss": 0.1191925048828125, "step": 38235 }, { "epoch": 0.33064997276288144, "grad_norm": 28.62811240650272, "learning_rate": 5.60437563291263e-06, "loss": 0.2037200927734375, "step": 38240 }, { "epoch": 0.33069320628442467, "grad_norm": 8.389190865720073, "learning_rate": 5.6042745049134544e-06, "loss": 0.34393796920776365, "step": 38245 }, { "epoch": 0.33073643980596795, "grad_norm": 14.025543145034773, "learning_rate": 5.604173364903593e-06, "loss": 0.1658926010131836, "step": 38250 }, { "epoch": 0.33077967332751124, "grad_norm": 19.80617960484152, "learning_rate": 5.604072212883514e-06, "loss": 0.13936538696289064, "step": 38255 }, { "epoch": 0.33082290684905447, "grad_norm": 36.89738540354014, "learning_rate": 5.603971048853685e-06, "loss": 0.35893096923828127, "step": 38260 }, { "epoch": 0.33086614037059775, "grad_norm": 13.97352254957545, "learning_rate": 5.603869872814571e-06, "loss": 0.23505706787109376, "step": 38265 }, { "epoch": 0.33090937389214103, "grad_norm": 8.37470258082947, "learning_rate": 5.603768684766639e-06, "loss": 0.20770263671875, "step": 38270 }, { "epoch": 0.33095260741368426, "grad_norm": 2.8005936676279437, "learning_rate": 5.603667484710357e-06, "loss": 0.1596038818359375, "step": 38275 }, { "epoch": 0.33099584093522755, "grad_norm": 1.3269183817638044, "learning_rate": 5.60356627264619e-06, "loss": 0.1489105224609375, "step": 38280 }, { "epoch": 0.3310390744567708, "grad_norm": 2.593896048889864, "learning_rate": 5.603465048574605e-06, "loss": 0.161846923828125, "step": 38285 }, { "epoch": 0.33108230797831406, "grad_norm": 3.7272860025696883, "learning_rate": 5.60336381249607e-06, "loss": 0.171990966796875, "step": 38290 }, { "epoch": 0.33112554149985735, "grad_norm": 5.568867709252485, "learning_rate": 5.60326256441105e-06, "loss": 0.2724884033203125, "step": 38295 }, { "epoch": 0.3311687750214006, "grad_norm": 18.13805735832987, "learning_rate": 5.603161304320014e-06, "loss": 0.25483551025390627, "step": 38300 }, { "epoch": 0.33121200854294386, "grad_norm": 1.9055956903260178, "learning_rate": 5.603060032223428e-06, "loss": 0.0940460205078125, "step": 38305 }, { "epoch": 0.33125524206448714, "grad_norm": 31.166646441522307, "learning_rate": 5.602958748121759e-06, "loss": 0.23806610107421874, "step": 38310 }, { "epoch": 0.33129847558603037, "grad_norm": 30.464050498470417, "learning_rate": 5.602857452015474e-06, "loss": 0.3272418975830078, "step": 38315 }, { "epoch": 0.33134170910757366, "grad_norm": 10.414099057090727, "learning_rate": 5.602756143905041e-06, "loss": 0.24360599517822265, "step": 38320 }, { "epoch": 0.3313849426291169, "grad_norm": 79.68942545691324, "learning_rate": 5.602654823790926e-06, "loss": 0.1801727294921875, "step": 38325 }, { "epoch": 0.33142817615066017, "grad_norm": 7.506952076209827, "learning_rate": 5.602553491673598e-06, "loss": 0.0925201416015625, "step": 38330 }, { "epoch": 0.33147140967220345, "grad_norm": 3.6642993764891973, "learning_rate": 5.602452147553522e-06, "loss": 0.1900970458984375, "step": 38335 }, { "epoch": 0.3315146431937467, "grad_norm": 24.03140274983541, "learning_rate": 5.602350791431167e-06, "loss": 0.16298370361328124, "step": 38340 }, { "epoch": 0.33155787671528997, "grad_norm": 25.178937115462535, "learning_rate": 5.602249423307e-06, "loss": 0.38961639404296877, "step": 38345 }, { "epoch": 0.33160111023683325, "grad_norm": 2.525046895622453, "learning_rate": 5.6021480431814895e-06, "loss": 0.07570953369140625, "step": 38350 }, { "epoch": 0.3316443437583765, "grad_norm": 7.44534203406766, "learning_rate": 5.602046651055102e-06, "loss": 0.33398590087890623, "step": 38355 }, { "epoch": 0.33168757727991977, "grad_norm": 7.682470548330904, "learning_rate": 5.601945246928304e-06, "loss": 0.0465423583984375, "step": 38360 }, { "epoch": 0.33173081080146305, "grad_norm": 4.616622657144741, "learning_rate": 5.601843830801565e-06, "loss": 0.160400390625, "step": 38365 }, { "epoch": 0.3317740443230063, "grad_norm": 4.648723153687763, "learning_rate": 5.601742402675353e-06, "loss": 0.07884063720703124, "step": 38370 }, { "epoch": 0.33181727784454956, "grad_norm": 1.0431663100125632, "learning_rate": 5.601640962550134e-06, "loss": 0.19973907470703126, "step": 38375 }, { "epoch": 0.3318605113660928, "grad_norm": 26.888230956833766, "learning_rate": 5.601539510426377e-06, "loss": 0.1183746337890625, "step": 38380 }, { "epoch": 0.3319037448876361, "grad_norm": 34.1688039527553, "learning_rate": 5.6014380463045494e-06, "loss": 0.4886932373046875, "step": 38385 }, { "epoch": 0.33194697840917936, "grad_norm": 58.7182192727584, "learning_rate": 5.60133657018512e-06, "loss": 0.18527641296386718, "step": 38390 }, { "epoch": 0.3319902119307226, "grad_norm": 1.479367688088509, "learning_rate": 5.601235082068556e-06, "loss": 0.1419036865234375, "step": 38395 }, { "epoch": 0.3320334454522659, "grad_norm": 45.254403192823226, "learning_rate": 5.6011335819553255e-06, "loss": 0.2336090087890625, "step": 38400 }, { "epoch": 0.33207667897380916, "grad_norm": 1.3025691720682877, "learning_rate": 5.601032069845897e-06, "loss": 0.52103271484375, "step": 38405 }, { "epoch": 0.3321199124953524, "grad_norm": 17.312187440686447, "learning_rate": 5.600930545740738e-06, "loss": 0.10012760162353515, "step": 38410 }, { "epoch": 0.33216314601689567, "grad_norm": 15.443560419168158, "learning_rate": 5.600829009640318e-06, "loss": 0.11622848510742187, "step": 38415 }, { "epoch": 0.3322063795384389, "grad_norm": 44.41191616024868, "learning_rate": 5.600727461545104e-06, "loss": 0.3395538330078125, "step": 38420 }, { "epoch": 0.3322496130599822, "grad_norm": 9.538598434324113, "learning_rate": 5.600625901455565e-06, "loss": 0.06323699951171875, "step": 38425 }, { "epoch": 0.33229284658152547, "grad_norm": 0.14327829847940818, "learning_rate": 5.600524329372168e-06, "loss": 0.19117889404296876, "step": 38430 }, { "epoch": 0.3323360801030687, "grad_norm": 30.624837327460817, "learning_rate": 5.600422745295384e-06, "loss": 0.21286487579345703, "step": 38435 }, { "epoch": 0.332379313624612, "grad_norm": 21.296288933312322, "learning_rate": 5.600321149225678e-06, "loss": 0.32816925048828127, "step": 38440 }, { "epoch": 0.33242254714615527, "grad_norm": 0.6767825910948475, "learning_rate": 5.600219541163522e-06, "loss": 0.28645763397216795, "step": 38445 }, { "epoch": 0.3324657806676985, "grad_norm": 2.5578633104471065, "learning_rate": 5.600117921109384e-06, "loss": 0.06978836059570312, "step": 38450 }, { "epoch": 0.3325090141892418, "grad_norm": 1.117448186074832, "learning_rate": 5.600016289063731e-06, "loss": 0.18756103515625, "step": 38455 }, { "epoch": 0.332552247710785, "grad_norm": 21.317374091307922, "learning_rate": 5.599914645027032e-06, "loss": 0.1900959014892578, "step": 38460 }, { "epoch": 0.3325954812323283, "grad_norm": 39.548322361997016, "learning_rate": 5.5998129889997575e-06, "loss": 0.312261962890625, "step": 38465 }, { "epoch": 0.3326387147538716, "grad_norm": 18.687501541412956, "learning_rate": 5.599711320982375e-06, "loss": 0.06821212768554688, "step": 38470 }, { "epoch": 0.3326819482754148, "grad_norm": 32.2413825350389, "learning_rate": 5.599609640975353e-06, "loss": 0.3309906005859375, "step": 38475 }, { "epoch": 0.3327251817969581, "grad_norm": 9.182485462114887, "learning_rate": 5.5995079489791616e-06, "loss": 0.06485481262207031, "step": 38480 }, { "epoch": 0.3327684153185014, "grad_norm": 4.052374553123705, "learning_rate": 5.59940624499427e-06, "loss": 0.07924346923828125, "step": 38485 }, { "epoch": 0.3328116488400446, "grad_norm": 21.1815399552894, "learning_rate": 5.599304529021145e-06, "loss": 0.18089599609375, "step": 38490 }, { "epoch": 0.3328548823615879, "grad_norm": 14.83915674790512, "learning_rate": 5.599202801060257e-06, "loss": 0.2208892822265625, "step": 38495 }, { "epoch": 0.3328981158831311, "grad_norm": 9.444122274171937, "learning_rate": 5.5991010611120775e-06, "loss": 0.21294097900390624, "step": 38500 }, { "epoch": 0.3329413494046744, "grad_norm": 6.70529723954426, "learning_rate": 5.598999309177072e-06, "loss": 0.21156463623046876, "step": 38505 }, { "epoch": 0.3329845829262177, "grad_norm": 7.985832266935573, "learning_rate": 5.598897545255711e-06, "loss": 0.39378662109375, "step": 38510 }, { "epoch": 0.3330278164477609, "grad_norm": 11.691982979917745, "learning_rate": 5.598795769348465e-06, "loss": 0.35438232421875, "step": 38515 }, { "epoch": 0.3330710499693042, "grad_norm": 1.326277017181044, "learning_rate": 5.598693981455802e-06, "loss": 0.13685531616210939, "step": 38520 }, { "epoch": 0.3331142834908475, "grad_norm": 7.278539385474268, "learning_rate": 5.598592181578193e-06, "loss": 0.11130867004394532, "step": 38525 }, { "epoch": 0.3331575170123907, "grad_norm": 6.632511063752761, "learning_rate": 5.598490369716105e-06, "loss": 0.1572662353515625, "step": 38530 }, { "epoch": 0.333200750533934, "grad_norm": 21.135002105560684, "learning_rate": 5.59838854587001e-06, "loss": 0.0897003173828125, "step": 38535 }, { "epoch": 0.3332439840554773, "grad_norm": 5.947344363007835, "learning_rate": 5.598286710040376e-06, "loss": 0.061602783203125, "step": 38540 }, { "epoch": 0.3332872175770205, "grad_norm": 13.914978168246035, "learning_rate": 5.598184862227674e-06, "loss": 0.13011932373046875, "step": 38545 }, { "epoch": 0.3333304510985638, "grad_norm": 6.904078823631049, "learning_rate": 5.598083002432373e-06, "loss": 0.18043212890625, "step": 38550 }, { "epoch": 0.333373684620107, "grad_norm": 1.3127014302054434, "learning_rate": 5.597981130654942e-06, "loss": 0.0871246337890625, "step": 38555 }, { "epoch": 0.3334169181416503, "grad_norm": 11.138779008852548, "learning_rate": 5.597879246895852e-06, "loss": 0.07795238494873047, "step": 38560 }, { "epoch": 0.3334601516631936, "grad_norm": 60.6065135647808, "learning_rate": 5.5977773511555725e-06, "loss": 0.37574462890625, "step": 38565 }, { "epoch": 0.3335033851847368, "grad_norm": 2.6426288401373377, "learning_rate": 5.597675443434573e-06, "loss": 0.04073944091796875, "step": 38570 }, { "epoch": 0.3335466187062801, "grad_norm": 5.6372871503975, "learning_rate": 5.597573523733325e-06, "loss": 0.11834516525268554, "step": 38575 }, { "epoch": 0.3335898522278234, "grad_norm": 4.855118953128878, "learning_rate": 5.597471592052296e-06, "loss": 0.21749191284179686, "step": 38580 }, { "epoch": 0.3336330857493666, "grad_norm": 3.039122944672827, "learning_rate": 5.5973696483919586e-06, "loss": 0.4734977722167969, "step": 38585 }, { "epoch": 0.3336763192709099, "grad_norm": 39.37454654863667, "learning_rate": 5.597267692752781e-06, "loss": 0.2378387451171875, "step": 38590 }, { "epoch": 0.33371955279245313, "grad_norm": 4.855644983972508, "learning_rate": 5.597165725135235e-06, "loss": 0.2326873779296875, "step": 38595 }, { "epoch": 0.3337627863139964, "grad_norm": 4.353323431549696, "learning_rate": 5.59706374553979e-06, "loss": 0.224969482421875, "step": 38600 }, { "epoch": 0.3338060198355397, "grad_norm": 0.25626554992590783, "learning_rate": 5.5969617539669165e-06, "loss": 0.12534637451171876, "step": 38605 }, { "epoch": 0.33384925335708293, "grad_norm": 6.399929775670022, "learning_rate": 5.596859750417085e-06, "loss": 0.1400846481323242, "step": 38610 }, { "epoch": 0.3338924868786262, "grad_norm": 3.2616119928783474, "learning_rate": 5.596757734890766e-06, "loss": 0.132757568359375, "step": 38615 }, { "epoch": 0.3339357204001695, "grad_norm": 36.66735031534822, "learning_rate": 5.596655707388429e-06, "loss": 0.2263408660888672, "step": 38620 }, { "epoch": 0.33397895392171273, "grad_norm": 51.420509726972384, "learning_rate": 5.596553667910546e-06, "loss": 0.1976184844970703, "step": 38625 }, { "epoch": 0.334022187443256, "grad_norm": 6.460959468883038, "learning_rate": 5.596451616457586e-06, "loss": 0.09296875, "step": 38630 }, { "epoch": 0.33406542096479924, "grad_norm": 1.74527907423086, "learning_rate": 5.596349553030022e-06, "loss": 0.3911773681640625, "step": 38635 }, { "epoch": 0.3341086544863425, "grad_norm": 4.50325140513256, "learning_rate": 5.5962474776283215e-06, "loss": 0.1156646728515625, "step": 38640 }, { "epoch": 0.3341518880078858, "grad_norm": 4.596782148635876, "learning_rate": 5.596145390252959e-06, "loss": 0.08562164306640625, "step": 38645 }, { "epoch": 0.33419512152942904, "grad_norm": 8.293972170080545, "learning_rate": 5.596043290904402e-06, "loss": 0.22874908447265624, "step": 38650 }, { "epoch": 0.3342383550509723, "grad_norm": 3.1884206215325293, "learning_rate": 5.5959411795831225e-06, "loss": 0.3980224609375, "step": 38655 }, { "epoch": 0.3342815885725156, "grad_norm": 18.846032855443116, "learning_rate": 5.595839056289592e-06, "loss": 0.13489303588867188, "step": 38660 }, { "epoch": 0.33432482209405884, "grad_norm": 26.07378378194812, "learning_rate": 5.595736921024282e-06, "loss": 0.1745147705078125, "step": 38665 }, { "epoch": 0.3343680556156021, "grad_norm": 22.82393671558354, "learning_rate": 5.595634773787662e-06, "loss": 0.22628555297851563, "step": 38670 }, { "epoch": 0.33441128913714535, "grad_norm": 0.09241460163355086, "learning_rate": 5.595532614580204e-06, "loss": 0.19213504791259767, "step": 38675 }, { "epoch": 0.33445452265868864, "grad_norm": 16.155051949212748, "learning_rate": 5.595430443402378e-06, "loss": 0.440069580078125, "step": 38680 }, { "epoch": 0.3344977561802319, "grad_norm": 6.926396834303928, "learning_rate": 5.595328260254658e-06, "loss": 0.2227691650390625, "step": 38685 }, { "epoch": 0.33454098970177515, "grad_norm": 9.509317590316977, "learning_rate": 5.595226065137512e-06, "loss": 0.2951568603515625, "step": 38690 }, { "epoch": 0.33458422322331843, "grad_norm": 25.3451556757118, "learning_rate": 5.595123858051413e-06, "loss": 0.3104156494140625, "step": 38695 }, { "epoch": 0.3346274567448617, "grad_norm": 1.806526631526786, "learning_rate": 5.595021638996832e-06, "loss": 0.3644828796386719, "step": 38700 }, { "epoch": 0.33467069026640495, "grad_norm": 0.8854379365882084, "learning_rate": 5.594919407974241e-06, "loss": 0.30990028381347656, "step": 38705 }, { "epoch": 0.33471392378794823, "grad_norm": 1.0827301729945111, "learning_rate": 5.59481716498411e-06, "loss": 0.0995330810546875, "step": 38710 }, { "epoch": 0.33475715730949146, "grad_norm": 8.084750837103531, "learning_rate": 5.594714910026913e-06, "loss": 0.11448516845703124, "step": 38715 }, { "epoch": 0.33480039083103474, "grad_norm": 34.32187732313767, "learning_rate": 5.594612643103119e-06, "loss": 0.44413604736328127, "step": 38720 }, { "epoch": 0.33484362435257803, "grad_norm": 3.7924481300318633, "learning_rate": 5.5945103642132e-06, "loss": 0.0792236328125, "step": 38725 }, { "epoch": 0.33488685787412126, "grad_norm": 8.245293038289198, "learning_rate": 5.59440807335763e-06, "loss": 0.15243377685546874, "step": 38730 }, { "epoch": 0.33493009139566454, "grad_norm": 40.37470727515944, "learning_rate": 5.594305770536879e-06, "loss": 0.4286468505859375, "step": 38735 }, { "epoch": 0.3349733249172078, "grad_norm": 7.9790006011592975, "learning_rate": 5.594203455751418e-06, "loss": 0.15291938781738282, "step": 38740 }, { "epoch": 0.33501655843875106, "grad_norm": 3.5956125649668, "learning_rate": 5.594101129001721e-06, "loss": 0.022078227996826173, "step": 38745 }, { "epoch": 0.33505979196029434, "grad_norm": 0.8318893348585072, "learning_rate": 5.593998790288258e-06, "loss": 0.29990234375, "step": 38750 }, { "epoch": 0.3351030254818376, "grad_norm": 6.01650791575793, "learning_rate": 5.593896439611502e-06, "loss": 0.2156646728515625, "step": 38755 }, { "epoch": 0.33514625900338085, "grad_norm": 5.9598037405913065, "learning_rate": 5.593794076971925e-06, "loss": 0.0686981201171875, "step": 38760 }, { "epoch": 0.33518949252492414, "grad_norm": 14.530739883474274, "learning_rate": 5.593691702369999e-06, "loss": 0.31765403747558596, "step": 38765 }, { "epoch": 0.33523272604646737, "grad_norm": 1.970991155481272, "learning_rate": 5.593589315806196e-06, "loss": 0.13528709411621093, "step": 38770 }, { "epoch": 0.33527595956801065, "grad_norm": 1.0431275541636895, "learning_rate": 5.5934869172809885e-06, "loss": 0.1916015625, "step": 38775 }, { "epoch": 0.33531919308955394, "grad_norm": 6.569647918036614, "learning_rate": 5.593384506794847e-06, "loss": 0.05855255126953125, "step": 38780 }, { "epoch": 0.33536242661109716, "grad_norm": 0.12752058493937457, "learning_rate": 5.5932820843482475e-06, "loss": 0.04552650451660156, "step": 38785 }, { "epoch": 0.33540566013264045, "grad_norm": 12.384255649591974, "learning_rate": 5.5931796499416595e-06, "loss": 0.16180496215820311, "step": 38790 }, { "epoch": 0.33544889365418373, "grad_norm": 74.87765360932364, "learning_rate": 5.593077203575555e-06, "loss": 0.47051467895507815, "step": 38795 }, { "epoch": 0.33549212717572696, "grad_norm": 27.68994342630397, "learning_rate": 5.592974745250409e-06, "loss": 0.08906402587890624, "step": 38800 }, { "epoch": 0.33553536069727025, "grad_norm": 8.128778569140788, "learning_rate": 5.592872274966691e-06, "loss": 0.13731231689453124, "step": 38805 }, { "epoch": 0.3355785942188135, "grad_norm": 4.92044245240729, "learning_rate": 5.592769792724877e-06, "loss": 0.28147430419921876, "step": 38810 }, { "epoch": 0.33562182774035676, "grad_norm": 24.20511167385198, "learning_rate": 5.592667298525436e-06, "loss": 0.19701995849609374, "step": 38815 }, { "epoch": 0.33566506126190004, "grad_norm": 9.265824714743603, "learning_rate": 5.592564792368844e-06, "loss": 0.17382965087890626, "step": 38820 }, { "epoch": 0.3357082947834433, "grad_norm": 6.313081863958737, "learning_rate": 5.592462274255572e-06, "loss": 0.27777786254882814, "step": 38825 }, { "epoch": 0.33575152830498656, "grad_norm": 7.247633242658407, "learning_rate": 5.592359744186092e-06, "loss": 0.3016014099121094, "step": 38830 }, { "epoch": 0.33579476182652984, "grad_norm": 12.02489996546834, "learning_rate": 5.592257202160879e-06, "loss": 0.18936920166015625, "step": 38835 }, { "epoch": 0.33583799534807307, "grad_norm": 0.3968824262225669, "learning_rate": 5.592154648180405e-06, "loss": 0.11562271118164062, "step": 38840 }, { "epoch": 0.33588122886961636, "grad_norm": 0.5940252663148824, "learning_rate": 5.592052082245142e-06, "loss": 0.09776153564453124, "step": 38845 }, { "epoch": 0.3359244623911596, "grad_norm": 6.026737746672777, "learning_rate": 5.591949504355564e-06, "loss": 0.3493377685546875, "step": 38850 }, { "epoch": 0.33596769591270287, "grad_norm": 17.88978646136788, "learning_rate": 5.591846914512144e-06, "loss": 0.06319656372070312, "step": 38855 }, { "epoch": 0.33601092943424615, "grad_norm": 0.5295035889641082, "learning_rate": 5.591744312715355e-06, "loss": 0.109014892578125, "step": 38860 }, { "epoch": 0.3360541629557894, "grad_norm": 19.187076439643278, "learning_rate": 5.591641698965671e-06, "loss": 0.1744609832763672, "step": 38865 }, { "epoch": 0.33609739647733267, "grad_norm": 2.8704424565664035, "learning_rate": 5.591539073263563e-06, "loss": 0.2770660400390625, "step": 38870 }, { "epoch": 0.33614062999887595, "grad_norm": 6.19580082787114, "learning_rate": 5.591436435609507e-06, "loss": 0.05477752685546875, "step": 38875 }, { "epoch": 0.3361838635204192, "grad_norm": 0.7262594193223132, "learning_rate": 5.591333786003975e-06, "loss": 0.15220794677734376, "step": 38880 }, { "epoch": 0.33622709704196246, "grad_norm": 18.587807634840935, "learning_rate": 5.59123112444744e-06, "loss": 0.4241767883300781, "step": 38885 }, { "epoch": 0.3362703305635057, "grad_norm": 19.247491636144698, "learning_rate": 5.591128450940376e-06, "loss": 0.07494964599609374, "step": 38890 }, { "epoch": 0.336313564085049, "grad_norm": 4.451420761449283, "learning_rate": 5.591025765483257e-06, "loss": 0.2704833984375, "step": 38895 }, { "epoch": 0.33635679760659226, "grad_norm": 26.896943971567048, "learning_rate": 5.590923068076556e-06, "loss": 0.2140625, "step": 38900 }, { "epoch": 0.3364000311281355, "grad_norm": 40.924111744283245, "learning_rate": 5.5908203587207465e-06, "loss": 0.14511566162109374, "step": 38905 }, { "epoch": 0.3364432646496788, "grad_norm": 0.12723821715826636, "learning_rate": 5.590717637416302e-06, "loss": 0.0375518798828125, "step": 38910 }, { "epoch": 0.33648649817122206, "grad_norm": 16.917894626882514, "learning_rate": 5.590614904163699e-06, "loss": 0.43442306518554685, "step": 38915 }, { "epoch": 0.3365297316927653, "grad_norm": 23.948979191434375, "learning_rate": 5.590512158963406e-06, "loss": 0.14287757873535156, "step": 38920 }, { "epoch": 0.3365729652143086, "grad_norm": 2.504045770287656, "learning_rate": 5.5904094018159015e-06, "loss": 0.04703521728515625, "step": 38925 }, { "epoch": 0.33661619873585186, "grad_norm": 9.082225624326673, "learning_rate": 5.590306632721658e-06, "loss": 0.1503753662109375, "step": 38930 }, { "epoch": 0.3366594322573951, "grad_norm": 22.968237866978654, "learning_rate": 5.590203851681148e-06, "loss": 0.23300323486328126, "step": 38935 }, { "epoch": 0.33670266577893837, "grad_norm": 18.863475083890737, "learning_rate": 5.590101058694847e-06, "loss": 0.242724609375, "step": 38940 }, { "epoch": 0.3367458993004816, "grad_norm": 38.14616885996851, "learning_rate": 5.589998253763229e-06, "loss": 0.31429100036621094, "step": 38945 }, { "epoch": 0.3367891328220249, "grad_norm": 3.1245011643442946, "learning_rate": 5.589895436886768e-06, "loss": 0.2663482666015625, "step": 38950 }, { "epoch": 0.33683236634356817, "grad_norm": 9.11638548438506, "learning_rate": 5.589792608065939e-06, "loss": 0.2225931167602539, "step": 38955 }, { "epoch": 0.3368755998651114, "grad_norm": 4.99290805015899, "learning_rate": 5.589689767301214e-06, "loss": 0.13152503967285156, "step": 38960 }, { "epoch": 0.3369188333866547, "grad_norm": 58.42995460892253, "learning_rate": 5.589586914593068e-06, "loss": 0.15989990234375, "step": 38965 }, { "epoch": 0.33696206690819797, "grad_norm": 6.579366358932836, "learning_rate": 5.589484049941978e-06, "loss": 0.1149169921875, "step": 38970 }, { "epoch": 0.3370053004297412, "grad_norm": 5.032234760905874, "learning_rate": 5.5893811733484156e-06, "loss": 0.23477783203125, "step": 38975 }, { "epoch": 0.3370485339512845, "grad_norm": 12.019819156021674, "learning_rate": 5.589278284812855e-06, "loss": 0.3436859130859375, "step": 38980 }, { "epoch": 0.3370917674728277, "grad_norm": 5.843348107133733, "learning_rate": 5.5891753843357735e-06, "loss": 0.11724853515625, "step": 38985 }, { "epoch": 0.337135000994371, "grad_norm": 10.409238411119043, "learning_rate": 5.589072471917643e-06, "loss": 0.10977249145507813, "step": 38990 }, { "epoch": 0.3371782345159143, "grad_norm": 0.961817303159787, "learning_rate": 5.588969547558938e-06, "loss": 0.10978546142578124, "step": 38995 }, { "epoch": 0.3372214680374575, "grad_norm": 0.11679727773515769, "learning_rate": 5.588866611260136e-06, "loss": 0.11211471557617188, "step": 39000 }, { "epoch": 0.3372647015590008, "grad_norm": 12.972862504157053, "learning_rate": 5.588763663021708e-06, "loss": 0.3537628173828125, "step": 39005 }, { "epoch": 0.3373079350805441, "grad_norm": 6.83733525678001, "learning_rate": 5.588660702844133e-06, "loss": 0.33043212890625, "step": 39010 }, { "epoch": 0.3373511686020873, "grad_norm": 6.562121192836974, "learning_rate": 5.588557730727882e-06, "loss": 0.03593730926513672, "step": 39015 }, { "epoch": 0.3373944021236306, "grad_norm": 0.8183104007134755, "learning_rate": 5.588454746673432e-06, "loss": 0.203125, "step": 39020 }, { "epoch": 0.3374376356451738, "grad_norm": 2.691171244833585, "learning_rate": 5.588351750681257e-06, "loss": 0.34832305908203126, "step": 39025 }, { "epoch": 0.3374808691667171, "grad_norm": 1.6155974010696825, "learning_rate": 5.588248742751833e-06, "loss": 0.11386566162109375, "step": 39030 }, { "epoch": 0.3375241026882604, "grad_norm": 3.9145373974084468, "learning_rate": 5.588145722885634e-06, "loss": 0.07005767822265625, "step": 39035 }, { "epoch": 0.3375673362098036, "grad_norm": 1.2571219817204153, "learning_rate": 5.588042691083136e-06, "loss": 0.133953857421875, "step": 39040 }, { "epoch": 0.3376105697313469, "grad_norm": 8.321758197198363, "learning_rate": 5.587939647344813e-06, "loss": 0.139727783203125, "step": 39045 }, { "epoch": 0.3376538032528902, "grad_norm": 2.249425246848257, "learning_rate": 5.587836591671143e-06, "loss": 0.10113525390625, "step": 39050 }, { "epoch": 0.3376970367744334, "grad_norm": 7.005230781394162, "learning_rate": 5.587733524062597e-06, "loss": 0.1928863525390625, "step": 39055 }, { "epoch": 0.3377402702959767, "grad_norm": 74.90519552316783, "learning_rate": 5.5876304445196534e-06, "loss": 0.5722648620605468, "step": 39060 }, { "epoch": 0.3377835038175199, "grad_norm": 36.2496883333039, "learning_rate": 5.587527353042788e-06, "loss": 0.46313323974609377, "step": 39065 }, { "epoch": 0.3378267373390632, "grad_norm": 6.487519253748086, "learning_rate": 5.587424249632475e-06, "loss": 0.2728710174560547, "step": 39070 }, { "epoch": 0.3378699708606065, "grad_norm": 3.6589853516166424, "learning_rate": 5.587321134289188e-06, "loss": 0.2002349853515625, "step": 39075 }, { "epoch": 0.3379132043821497, "grad_norm": 1.1129582930376407, "learning_rate": 5.587218007013406e-06, "loss": 0.1161773681640625, "step": 39080 }, { "epoch": 0.337956437903693, "grad_norm": 13.755056939734896, "learning_rate": 5.587114867805602e-06, "loss": 0.11051025390625, "step": 39085 }, { "epoch": 0.3379996714252363, "grad_norm": 18.584734662208895, "learning_rate": 5.587011716666254e-06, "loss": 0.07772674560546874, "step": 39090 }, { "epoch": 0.3380429049467795, "grad_norm": 1.219033354436764, "learning_rate": 5.586908553595837e-06, "loss": 0.1608367919921875, "step": 39095 }, { "epoch": 0.3380861384683228, "grad_norm": 5.507075157366722, "learning_rate": 5.586805378594825e-06, "loss": 0.28812255859375, "step": 39100 }, { "epoch": 0.3381293719898661, "grad_norm": 5.849615493434502, "learning_rate": 5.5867021916636954e-06, "loss": 0.19501953125, "step": 39105 }, { "epoch": 0.3381726055114093, "grad_norm": 1.666894159519057, "learning_rate": 5.586598992802925e-06, "loss": 0.14294891357421874, "step": 39110 }, { "epoch": 0.3382158390329526, "grad_norm": 9.283142892508776, "learning_rate": 5.5864957820129875e-06, "loss": 0.3341705322265625, "step": 39115 }, { "epoch": 0.33825907255449583, "grad_norm": 3.302564186033169, "learning_rate": 5.586392559294361e-06, "loss": 0.17882976531982422, "step": 39120 }, { "epoch": 0.3383023060760391, "grad_norm": 5.799785629568706, "learning_rate": 5.5862893246475194e-06, "loss": 0.09722690582275391, "step": 39125 }, { "epoch": 0.3383455395975824, "grad_norm": 11.954957088038508, "learning_rate": 5.58618607807294e-06, "loss": 0.235491943359375, "step": 39130 }, { "epoch": 0.33838877311912563, "grad_norm": 8.375045716999084, "learning_rate": 5.5860828195710995e-06, "loss": 0.2035594940185547, "step": 39135 }, { "epoch": 0.3384320066406689, "grad_norm": 1.0203129822395147, "learning_rate": 5.585979549142473e-06, "loss": 0.17363433837890624, "step": 39140 }, { "epoch": 0.3384752401622122, "grad_norm": 3.036214052506606, "learning_rate": 5.585876266787538e-06, "loss": 0.17621078491210937, "step": 39145 }, { "epoch": 0.3385184736837554, "grad_norm": 13.357768402239357, "learning_rate": 5.58577297250677e-06, "loss": 0.34703598022460935, "step": 39150 }, { "epoch": 0.3385617072052987, "grad_norm": 15.141883322091992, "learning_rate": 5.5856696663006455e-06, "loss": 0.26330413818359377, "step": 39155 }, { "epoch": 0.33860494072684194, "grad_norm": 2.48712151379179, "learning_rate": 5.585566348169641e-06, "loss": 0.11305084228515624, "step": 39160 }, { "epoch": 0.3386481742483852, "grad_norm": 45.26663742970875, "learning_rate": 5.585463018114233e-06, "loss": 0.21888427734375, "step": 39165 }, { "epoch": 0.3386914077699285, "grad_norm": 5.293583685329531, "learning_rate": 5.585359676134897e-06, "loss": 0.2195068359375, "step": 39170 }, { "epoch": 0.33873464129147174, "grad_norm": 10.41170721697537, "learning_rate": 5.585256322232112e-06, "loss": 0.12551460266113282, "step": 39175 }, { "epoch": 0.338777874813015, "grad_norm": 6.736578807109876, "learning_rate": 5.585152956406353e-06, "loss": 0.4619659423828125, "step": 39180 }, { "epoch": 0.3388211083345583, "grad_norm": 10.508813082631235, "learning_rate": 5.585049578658097e-06, "loss": 0.11919898986816406, "step": 39185 }, { "epoch": 0.33886434185610154, "grad_norm": 0.9709510252260757, "learning_rate": 5.58494618898782e-06, "loss": 0.15824432373046876, "step": 39190 }, { "epoch": 0.3389075753776448, "grad_norm": 8.552788734779087, "learning_rate": 5.584842787396e-06, "loss": 0.14878311157226562, "step": 39195 }, { "epoch": 0.33895080889918805, "grad_norm": 30.549221124557707, "learning_rate": 5.584739373883114e-06, "loss": 0.10511703491210937, "step": 39200 }, { "epoch": 0.33899404242073133, "grad_norm": 6.157089746605243, "learning_rate": 5.5846359484496375e-06, "loss": 0.062221527099609375, "step": 39205 }, { "epoch": 0.3390372759422746, "grad_norm": 18.83730248796578, "learning_rate": 5.584532511096049e-06, "loss": 0.16837921142578124, "step": 39210 }, { "epoch": 0.33908050946381785, "grad_norm": 5.230289067916854, "learning_rate": 5.5844290618228244e-06, "loss": 0.1141754150390625, "step": 39215 }, { "epoch": 0.33912374298536113, "grad_norm": 3.418833958072013, "learning_rate": 5.584325600630442e-06, "loss": 0.21103057861328126, "step": 39220 }, { "epoch": 0.3391669765069044, "grad_norm": 23.610393481220363, "learning_rate": 5.584222127519378e-06, "loss": 0.212060546875, "step": 39225 }, { "epoch": 0.33921021002844765, "grad_norm": 11.487172943434215, "learning_rate": 5.58411864249011e-06, "loss": 0.1665191650390625, "step": 39230 }, { "epoch": 0.33925344354999093, "grad_norm": 1.0825478477594423, "learning_rate": 5.5840151455431146e-06, "loss": 0.5079750061035156, "step": 39235 }, { "epoch": 0.33929667707153416, "grad_norm": 0.7473038635155165, "learning_rate": 5.58391163667887e-06, "loss": 0.14243431091308595, "step": 39240 }, { "epoch": 0.33933991059307744, "grad_norm": 4.93815828550098, "learning_rate": 5.583808115897854e-06, "loss": 0.10415267944335938, "step": 39245 }, { "epoch": 0.3393831441146207, "grad_norm": 15.257038624946384, "learning_rate": 5.5837045832005425e-06, "loss": 0.4683441162109375, "step": 39250 }, { "epoch": 0.33942637763616396, "grad_norm": 9.94746617814544, "learning_rate": 5.583601038587414e-06, "loss": 0.32679290771484376, "step": 39255 }, { "epoch": 0.33946961115770724, "grad_norm": 23.467935754334352, "learning_rate": 5.583497482058946e-06, "loss": 0.40980224609375, "step": 39260 }, { "epoch": 0.3395128446792505, "grad_norm": 10.803573803651021, "learning_rate": 5.583393913615615e-06, "loss": 0.10158767700195312, "step": 39265 }, { "epoch": 0.33955607820079375, "grad_norm": 5.7946086471644875, "learning_rate": 5.5832903332579e-06, "loss": 0.176995849609375, "step": 39270 }, { "epoch": 0.33959931172233704, "grad_norm": 2.4699524678229854, "learning_rate": 5.583186740986279e-06, "loss": 0.03810043334960937, "step": 39275 }, { "epoch": 0.3396425452438803, "grad_norm": 1.4947887772822226, "learning_rate": 5.583083136801228e-06, "loss": 0.12328872680664063, "step": 39280 }, { "epoch": 0.33968577876542355, "grad_norm": 3.117239815464888, "learning_rate": 5.582979520703226e-06, "loss": 0.10001335144042969, "step": 39285 }, { "epoch": 0.33972901228696684, "grad_norm": 33.14219071453262, "learning_rate": 5.582875892692751e-06, "loss": 0.21597137451171874, "step": 39290 }, { "epoch": 0.33977224580851006, "grad_norm": 1.2420169398641012, "learning_rate": 5.582772252770281e-06, "loss": 0.084271240234375, "step": 39295 }, { "epoch": 0.33981547933005335, "grad_norm": 0.7055656819246562, "learning_rate": 5.582668600936293e-06, "loss": 0.22363433837890626, "step": 39300 }, { "epoch": 0.33985871285159663, "grad_norm": 2.72572657508497, "learning_rate": 5.582564937191266e-06, "loss": 0.1772705078125, "step": 39305 }, { "epoch": 0.33990194637313986, "grad_norm": 8.033320836800714, "learning_rate": 5.5824612615356775e-06, "loss": 0.12204513549804688, "step": 39310 }, { "epoch": 0.33994517989468315, "grad_norm": 15.400673203490383, "learning_rate": 5.582357573970005e-06, "loss": 0.1044778823852539, "step": 39315 }, { "epoch": 0.33998841341622643, "grad_norm": 6.109172595151102, "learning_rate": 5.582253874494729e-06, "loss": 0.15660629272460938, "step": 39320 }, { "epoch": 0.34003164693776966, "grad_norm": 20.36823585586223, "learning_rate": 5.5821501631103255e-06, "loss": 0.4984130859375, "step": 39325 }, { "epoch": 0.34007488045931294, "grad_norm": 4.632364025827549, "learning_rate": 5.582046439817274e-06, "loss": 0.059247589111328124, "step": 39330 }, { "epoch": 0.3401181139808562, "grad_norm": 8.816589217957693, "learning_rate": 5.581942704616052e-06, "loss": 0.2874420166015625, "step": 39335 }, { "epoch": 0.34016134750239946, "grad_norm": 3.5252331160504986, "learning_rate": 5.581838957507138e-06, "loss": 0.06708831787109375, "step": 39340 }, { "epoch": 0.34020458102394274, "grad_norm": 6.197137032799022, "learning_rate": 5.581735198491012e-06, "loss": 0.06541595458984376, "step": 39345 }, { "epoch": 0.34024781454548597, "grad_norm": 17.47418218157359, "learning_rate": 5.5816314275681506e-06, "loss": 0.0921478271484375, "step": 39350 }, { "epoch": 0.34029104806702926, "grad_norm": 25.690957713146457, "learning_rate": 5.581527644739033e-06, "loss": 0.12690811157226561, "step": 39355 }, { "epoch": 0.34033428158857254, "grad_norm": 2.783073493182998, "learning_rate": 5.581423850004139e-06, "loss": 0.132598876953125, "step": 39360 }, { "epoch": 0.34037751511011577, "grad_norm": 18.96339565548106, "learning_rate": 5.5813200433639466e-06, "loss": 0.2615486145019531, "step": 39365 }, { "epoch": 0.34042074863165905, "grad_norm": 6.619874273943353, "learning_rate": 5.581216224818932e-06, "loss": 0.554815673828125, "step": 39370 }, { "epoch": 0.3404639821532023, "grad_norm": 2.366804439483656, "learning_rate": 5.581112394369578e-06, "loss": 0.06797637939453124, "step": 39375 }, { "epoch": 0.34050721567474557, "grad_norm": 14.57148731939594, "learning_rate": 5.581008552016361e-06, "loss": 0.15496826171875, "step": 39380 }, { "epoch": 0.34055044919628885, "grad_norm": 3.265871410782355, "learning_rate": 5.580904697759761e-06, "loss": 0.1054412841796875, "step": 39385 }, { "epoch": 0.3405936827178321, "grad_norm": 25.483720368478036, "learning_rate": 5.580800831600257e-06, "loss": 0.3944305419921875, "step": 39390 }, { "epoch": 0.34063691623937536, "grad_norm": 7.210158782811769, "learning_rate": 5.580696953538327e-06, "loss": 0.09812774658203124, "step": 39395 }, { "epoch": 0.34068014976091865, "grad_norm": 6.311685654460721, "learning_rate": 5.580593063574451e-06, "loss": 0.210992431640625, "step": 39400 }, { "epoch": 0.3407233832824619, "grad_norm": 7.38619715256654, "learning_rate": 5.580489161709107e-06, "loss": 0.10878143310546876, "step": 39405 }, { "epoch": 0.34076661680400516, "grad_norm": 32.159425931943474, "learning_rate": 5.580385247942776e-06, "loss": 0.644970703125, "step": 39410 }, { "epoch": 0.3408098503255484, "grad_norm": 7.6253783879350525, "learning_rate": 5.580281322275937e-06, "loss": 0.0857177734375, "step": 39415 }, { "epoch": 0.3408530838470917, "grad_norm": 14.947019923844886, "learning_rate": 5.580177384709066e-06, "loss": 0.12778549194335936, "step": 39420 }, { "epoch": 0.34089631736863496, "grad_norm": 50.42885529857966, "learning_rate": 5.580073435242647e-06, "loss": 0.448797607421875, "step": 39425 }, { "epoch": 0.3409395508901782, "grad_norm": 6.637341599779554, "learning_rate": 5.579969473877157e-06, "loss": 0.13455734252929688, "step": 39430 }, { "epoch": 0.3409827844117215, "grad_norm": 10.969617065211654, "learning_rate": 5.579865500613074e-06, "loss": 0.2954437255859375, "step": 39435 }, { "epoch": 0.34102601793326476, "grad_norm": 6.35577816237078, "learning_rate": 5.579761515450881e-06, "loss": 0.28125, "step": 39440 }, { "epoch": 0.341069251454808, "grad_norm": 0.6616206398872869, "learning_rate": 5.579657518391057e-06, "loss": 0.08776016235351562, "step": 39445 }, { "epoch": 0.34111248497635127, "grad_norm": 0.3148185750609605, "learning_rate": 5.579553509434078e-06, "loss": 0.038037109375, "step": 39450 }, { "epoch": 0.3411557184978945, "grad_norm": 30.59058468503109, "learning_rate": 5.579449488580427e-06, "loss": 0.4466880798339844, "step": 39455 }, { "epoch": 0.3411989520194378, "grad_norm": 1.7789850834694492, "learning_rate": 5.579345455830583e-06, "loss": 0.06448745727539062, "step": 39460 }, { "epoch": 0.34124218554098107, "grad_norm": 25.320341798319383, "learning_rate": 5.579241411185026e-06, "loss": 0.3578277587890625, "step": 39465 }, { "epoch": 0.3412854190625243, "grad_norm": 0.8278357063109144, "learning_rate": 5.579137354644235e-06, "loss": 0.238006591796875, "step": 39470 }, { "epoch": 0.3413286525840676, "grad_norm": 2.9347967589905735, "learning_rate": 5.57903328620869e-06, "loss": 0.36219024658203125, "step": 39475 }, { "epoch": 0.34137188610561087, "grad_norm": 2.0201519612294487, "learning_rate": 5.578929205878873e-06, "loss": 0.10355644226074219, "step": 39480 }, { "epoch": 0.3414151196271541, "grad_norm": 2.791866002313071, "learning_rate": 5.57882511365526e-06, "loss": 0.1808074951171875, "step": 39485 }, { "epoch": 0.3414583531486974, "grad_norm": 43.88163324494068, "learning_rate": 5.578721009538336e-06, "loss": 0.3568572998046875, "step": 39490 }, { "epoch": 0.34150158667024066, "grad_norm": 1.9716863886164193, "learning_rate": 5.578616893528577e-06, "loss": 0.36788177490234375, "step": 39495 }, { "epoch": 0.3415448201917839, "grad_norm": 27.304695561840727, "learning_rate": 5.578512765626465e-06, "loss": 0.128399658203125, "step": 39500 }, { "epoch": 0.3415880537133272, "grad_norm": 21.912245900319345, "learning_rate": 5.57840862583248e-06, "loss": 0.13150100708007811, "step": 39505 }, { "epoch": 0.3416312872348704, "grad_norm": 26.685546928422074, "learning_rate": 5.578304474147103e-06, "loss": 0.10449981689453125, "step": 39510 }, { "epoch": 0.3416745207564137, "grad_norm": 2.5888025010591287, "learning_rate": 5.578200310570812e-06, "loss": 0.20458984375, "step": 39515 }, { "epoch": 0.341717754277957, "grad_norm": 26.573923340695234, "learning_rate": 5.57809613510409e-06, "loss": 0.31207275390625, "step": 39520 }, { "epoch": 0.3417609877995002, "grad_norm": 5.474028627142167, "learning_rate": 5.577991947747416e-06, "loss": 0.2110748291015625, "step": 39525 }, { "epoch": 0.3418042213210435, "grad_norm": 0.4792016822422337, "learning_rate": 5.577887748501272e-06, "loss": 0.32146148681640624, "step": 39530 }, { "epoch": 0.3418474548425868, "grad_norm": 23.61897964846635, "learning_rate": 5.577783537366137e-06, "loss": 0.1196807861328125, "step": 39535 }, { "epoch": 0.34189068836413, "grad_norm": 7.642098833014208, "learning_rate": 5.577679314342491e-06, "loss": 0.300067138671875, "step": 39540 }, { "epoch": 0.3419339218856733, "grad_norm": 4.007308259072301, "learning_rate": 5.577575079430817e-06, "loss": 0.0908447265625, "step": 39545 }, { "epoch": 0.3419771554072165, "grad_norm": 0.9768444545463106, "learning_rate": 5.577470832631594e-06, "loss": 0.24942054748535156, "step": 39550 }, { "epoch": 0.3420203889287598, "grad_norm": 1.3267667897016489, "learning_rate": 5.577366573945304e-06, "loss": 0.370611572265625, "step": 39555 }, { "epoch": 0.3420636224503031, "grad_norm": 2.7491994317827966, "learning_rate": 5.577262303372426e-06, "loss": 0.35213584899902345, "step": 39560 }, { "epoch": 0.3421068559718463, "grad_norm": 23.67834373177465, "learning_rate": 5.577158020913442e-06, "loss": 0.16895370483398436, "step": 39565 }, { "epoch": 0.3421500894933896, "grad_norm": 22.223055958269672, "learning_rate": 5.577053726568834e-06, "loss": 0.21045303344726562, "step": 39570 }, { "epoch": 0.3421933230149329, "grad_norm": 9.701484966123846, "learning_rate": 5.576949420339081e-06, "loss": 0.1272754669189453, "step": 39575 }, { "epoch": 0.3422365565364761, "grad_norm": 12.055343786910377, "learning_rate": 5.576845102224666e-06, "loss": 0.14468464851379395, "step": 39580 }, { "epoch": 0.3422797900580194, "grad_norm": 4.779179401032781, "learning_rate": 5.5767407722260684e-06, "loss": 0.29741973876953126, "step": 39585 }, { "epoch": 0.3423230235795626, "grad_norm": 1.1959699651837603, "learning_rate": 5.5766364303437695e-06, "loss": 0.030889892578125, "step": 39590 }, { "epoch": 0.3423662571011059, "grad_norm": 2.062029994588209, "learning_rate": 5.576532076578251e-06, "loss": 0.1360992431640625, "step": 39595 }, { "epoch": 0.3424094906226492, "grad_norm": 11.738278664361996, "learning_rate": 5.576427710929995e-06, "loss": 0.16116943359375, "step": 39600 }, { "epoch": 0.3424527241441924, "grad_norm": 5.65261362717791, "learning_rate": 5.576323333399482e-06, "loss": 0.09991531372070313, "step": 39605 }, { "epoch": 0.3424959576657357, "grad_norm": 2.369848573212097, "learning_rate": 5.576218943987194e-06, "loss": 0.4870147705078125, "step": 39610 }, { "epoch": 0.342539191187279, "grad_norm": 12.25621904065792, "learning_rate": 5.57611454269361e-06, "loss": 0.09585418701171874, "step": 39615 }, { "epoch": 0.3425824247088222, "grad_norm": 115.1175203307774, "learning_rate": 5.576010129519215e-06, "loss": 0.41444091796875, "step": 39620 }, { "epoch": 0.3426256582303655, "grad_norm": 35.86661558827953, "learning_rate": 5.575905704464488e-06, "loss": 0.4092254638671875, "step": 39625 }, { "epoch": 0.34266889175190873, "grad_norm": 0.4863660178650895, "learning_rate": 5.575801267529913e-06, "loss": 0.2341094970703125, "step": 39630 }, { "epoch": 0.342712125273452, "grad_norm": 17.225116920950427, "learning_rate": 5.575696818715969e-06, "loss": 0.18798828125, "step": 39635 }, { "epoch": 0.3427553587949953, "grad_norm": 1.8231811974353882, "learning_rate": 5.5755923580231384e-06, "loss": 0.09254989624023438, "step": 39640 }, { "epoch": 0.34279859231653853, "grad_norm": 40.81176357377374, "learning_rate": 5.575487885451904e-06, "loss": 0.182098388671875, "step": 39645 }, { "epoch": 0.3428418258380818, "grad_norm": 16.426443218809162, "learning_rate": 5.575383401002747e-06, "loss": 0.14018707275390624, "step": 39650 }, { "epoch": 0.3428850593596251, "grad_norm": 15.097743462196643, "learning_rate": 5.5752789046761495e-06, "loss": 0.16885833740234374, "step": 39655 }, { "epoch": 0.34292829288116833, "grad_norm": 3.0263368290526613, "learning_rate": 5.575174396472593e-06, "loss": 0.08926963806152344, "step": 39660 }, { "epoch": 0.3429715264027116, "grad_norm": 18.968903538606106, "learning_rate": 5.575069876392559e-06, "loss": 0.20161590576171876, "step": 39665 }, { "epoch": 0.3430147599242549, "grad_norm": 13.765116983229944, "learning_rate": 5.5749653444365325e-06, "loss": 0.082159423828125, "step": 39670 }, { "epoch": 0.3430579934457981, "grad_norm": 3.9302161092360617, "learning_rate": 5.5748608006049915e-06, "loss": 0.26551666259765627, "step": 39675 }, { "epoch": 0.3431012269673414, "grad_norm": 31.639956213787457, "learning_rate": 5.5747562448984195e-06, "loss": 0.36536865234375, "step": 39680 }, { "epoch": 0.34314446048888464, "grad_norm": 17.605641524868066, "learning_rate": 5.5746516773173e-06, "loss": 0.263385009765625, "step": 39685 }, { "epoch": 0.3431876940104279, "grad_norm": 0.8371477092150298, "learning_rate": 5.574547097862115e-06, "loss": 0.16993560791015624, "step": 39690 }, { "epoch": 0.3432309275319712, "grad_norm": 0.8763063937937942, "learning_rate": 5.574442506533346e-06, "loss": 0.04871826171875, "step": 39695 }, { "epoch": 0.34327416105351444, "grad_norm": 34.8769885533571, "learning_rate": 5.574337903331476e-06, "loss": 0.40032958984375, "step": 39700 }, { "epoch": 0.3433173945750577, "grad_norm": 8.451491253720869, "learning_rate": 5.574233288256986e-06, "loss": 0.1175445556640625, "step": 39705 }, { "epoch": 0.343360628096601, "grad_norm": 4.253278650393212, "learning_rate": 5.574128661310361e-06, "loss": 0.19094390869140626, "step": 39710 }, { "epoch": 0.34340386161814423, "grad_norm": 1.4820362005261092, "learning_rate": 5.574024022492081e-06, "loss": 0.23867416381835938, "step": 39715 }, { "epoch": 0.3434470951396875, "grad_norm": 28.358880969166314, "learning_rate": 5.57391937180263e-06, "loss": 0.396575927734375, "step": 39720 }, { "epoch": 0.34349032866123075, "grad_norm": 5.90700264046569, "learning_rate": 5.57381470924249e-06, "loss": 0.21166152954101564, "step": 39725 }, { "epoch": 0.34353356218277403, "grad_norm": 1.142700791561249, "learning_rate": 5.573710034812144e-06, "loss": 0.089501953125, "step": 39730 }, { "epoch": 0.3435767957043173, "grad_norm": 4.533609065458505, "learning_rate": 5.573605348512075e-06, "loss": 0.2835723876953125, "step": 39735 }, { "epoch": 0.34362002922586055, "grad_norm": 6.526352615807778, "learning_rate": 5.573500650342765e-06, "loss": 0.06731491088867188, "step": 39740 }, { "epoch": 0.34366326274740383, "grad_norm": 4.324466710159785, "learning_rate": 5.573395940304698e-06, "loss": 0.1297454833984375, "step": 39745 }, { "epoch": 0.3437064962689471, "grad_norm": 0.8874542073174322, "learning_rate": 5.573291218398356e-06, "loss": 0.07892532348632812, "step": 39750 }, { "epoch": 0.34374972979049034, "grad_norm": 1.0547836229705716, "learning_rate": 5.573186484624222e-06, "loss": 0.13214664459228515, "step": 39755 }, { "epoch": 0.34379296331203363, "grad_norm": 1.849875558206195, "learning_rate": 5.57308173898278e-06, "loss": 0.10016937255859375, "step": 39760 }, { "epoch": 0.34383619683357686, "grad_norm": 24.170593137746142, "learning_rate": 5.572976981474512e-06, "loss": 0.188336181640625, "step": 39765 }, { "epoch": 0.34387943035512014, "grad_norm": 8.970601206413752, "learning_rate": 5.572872212099902e-06, "loss": 0.48704681396484373, "step": 39770 }, { "epoch": 0.3439226638766634, "grad_norm": 0.5033986603844864, "learning_rate": 5.572767430859432e-06, "loss": 0.34390106201171877, "step": 39775 }, { "epoch": 0.34396589739820665, "grad_norm": 22.48211374446377, "learning_rate": 5.572662637753586e-06, "loss": 0.17872314453125, "step": 39780 }, { "epoch": 0.34400913091974994, "grad_norm": 50.44222647566031, "learning_rate": 5.572557832782848e-06, "loss": 0.07413253784179688, "step": 39785 }, { "epoch": 0.3440523644412932, "grad_norm": 12.27166659817345, "learning_rate": 5.572453015947699e-06, "loss": 0.09898529052734376, "step": 39790 }, { "epoch": 0.34409559796283645, "grad_norm": 40.265246012686276, "learning_rate": 5.572348187248626e-06, "loss": 0.29994659423828124, "step": 39795 }, { "epoch": 0.34413883148437974, "grad_norm": 21.124673685340802, "learning_rate": 5.5722433466861095e-06, "loss": 0.1779388427734375, "step": 39800 }, { "epoch": 0.34418206500592297, "grad_norm": 2.143533778787777, "learning_rate": 5.572138494260633e-06, "loss": 0.1678924560546875, "step": 39805 }, { "epoch": 0.34422529852746625, "grad_norm": 32.13972807688078, "learning_rate": 5.572033629972684e-06, "loss": 0.15457305908203126, "step": 39810 }, { "epoch": 0.34426853204900953, "grad_norm": 71.45318331893118, "learning_rate": 5.5719287538227404e-06, "loss": 0.2767303466796875, "step": 39815 }, { "epoch": 0.34431176557055276, "grad_norm": 0.7962373903290207, "learning_rate": 5.571823865811289e-06, "loss": 0.2378082275390625, "step": 39820 }, { "epoch": 0.34435499909209605, "grad_norm": 0.7897826991193834, "learning_rate": 5.5717189659388145e-06, "loss": 0.14013214111328126, "step": 39825 }, { "epoch": 0.34439823261363933, "grad_norm": 7.251021578568026, "learning_rate": 5.571614054205797e-06, "loss": 0.37003173828125, "step": 39830 }, { "epoch": 0.34444146613518256, "grad_norm": 0.18389859839724895, "learning_rate": 5.571509130612725e-06, "loss": 0.5024734497070312, "step": 39835 }, { "epoch": 0.34448469965672585, "grad_norm": 25.356822249265186, "learning_rate": 5.571404195160079e-06, "loss": 0.29895362854003904, "step": 39840 }, { "epoch": 0.34452793317826913, "grad_norm": 2.285926174387116, "learning_rate": 5.571299247848345e-06, "loss": 0.2198272705078125, "step": 39845 }, { "epoch": 0.34457116669981236, "grad_norm": 0.6240450126780186, "learning_rate": 5.5711942886780056e-06, "loss": 0.22982711791992189, "step": 39850 }, { "epoch": 0.34461440022135564, "grad_norm": 7.9780711521501715, "learning_rate": 5.571089317649545e-06, "loss": 0.5849945068359375, "step": 39855 }, { "epoch": 0.3446576337428989, "grad_norm": 5.198513319841628, "learning_rate": 5.5709843347634476e-06, "loss": 0.3324615478515625, "step": 39860 }, { "epoch": 0.34470086726444216, "grad_norm": 3.9180287096779884, "learning_rate": 5.570879340020197e-06, "loss": 0.2768646240234375, "step": 39865 }, { "epoch": 0.34474410078598544, "grad_norm": 2.5820455002480935, "learning_rate": 5.570774333420279e-06, "loss": 0.43453283309936525, "step": 39870 }, { "epoch": 0.34478733430752867, "grad_norm": 0.9311352295669028, "learning_rate": 5.570669314964177e-06, "loss": 0.4816070556640625, "step": 39875 }, { "epoch": 0.34483056782907195, "grad_norm": 1.058292384803323, "learning_rate": 5.5705642846523756e-06, "loss": 0.134912109375, "step": 39880 }, { "epoch": 0.34487380135061524, "grad_norm": 1.9876652782744562, "learning_rate": 5.570459242485358e-06, "loss": 0.334405517578125, "step": 39885 }, { "epoch": 0.34491703487215847, "grad_norm": 18.02300039136403, "learning_rate": 5.570354188463609e-06, "loss": 0.191998291015625, "step": 39890 }, { "epoch": 0.34496026839370175, "grad_norm": 16.59506486441118, "learning_rate": 5.570249122587616e-06, "loss": 0.07336082458496093, "step": 39895 }, { "epoch": 0.345003501915245, "grad_norm": 0.040781814902261446, "learning_rate": 5.57014404485786e-06, "loss": 0.175616455078125, "step": 39900 }, { "epoch": 0.34504673543678827, "grad_norm": 1.3206452638768804, "learning_rate": 5.570038955274826e-06, "loss": 0.0810638427734375, "step": 39905 }, { "epoch": 0.34508996895833155, "grad_norm": 8.386887363971585, "learning_rate": 5.569933853838999e-06, "loss": 0.18994140625, "step": 39910 }, { "epoch": 0.3451332024798748, "grad_norm": 2.864305561961725, "learning_rate": 5.5698287405508654e-06, "loss": 0.04351806640625, "step": 39915 }, { "epoch": 0.34517643600141806, "grad_norm": 3.0483071474550365, "learning_rate": 5.569723615410908e-06, "loss": 0.27862396240234377, "step": 39920 }, { "epoch": 0.34521966952296135, "grad_norm": 3.853878880275241, "learning_rate": 5.569618478419613e-06, "loss": 0.0442626953125, "step": 39925 }, { "epoch": 0.3452629030445046, "grad_norm": 1.6183277648455148, "learning_rate": 5.569513329577465e-06, "loss": 0.267138671875, "step": 39930 }, { "epoch": 0.34530613656604786, "grad_norm": 6.605102362896709, "learning_rate": 5.5694081688849465e-06, "loss": 0.1183542251586914, "step": 39935 }, { "epoch": 0.3453493700875911, "grad_norm": 6.993183376353334, "learning_rate": 5.5693029963425465e-06, "loss": 0.096240234375, "step": 39940 }, { "epoch": 0.3453926036091344, "grad_norm": 35.21103533790344, "learning_rate": 5.569197811950747e-06, "loss": 0.20530319213867188, "step": 39945 }, { "epoch": 0.34543583713067766, "grad_norm": 1.9441121413688116, "learning_rate": 5.569092615710035e-06, "loss": 0.2131927490234375, "step": 39950 }, { "epoch": 0.3454790706522209, "grad_norm": 28.55964573095053, "learning_rate": 5.568987407620895e-06, "loss": 0.334027099609375, "step": 39955 }, { "epoch": 0.34552230417376417, "grad_norm": 34.561959368978286, "learning_rate": 5.568882187683811e-06, "loss": 0.35762557983398435, "step": 39960 }, { "epoch": 0.34556553769530746, "grad_norm": 35.07150607760492, "learning_rate": 5.568776955899271e-06, "loss": 0.18266448974609376, "step": 39965 }, { "epoch": 0.3456087712168507, "grad_norm": 16.20364032294851, "learning_rate": 5.568671712267757e-06, "loss": 0.09557037353515625, "step": 39970 }, { "epoch": 0.34565200473839397, "grad_norm": 17.3495597595283, "learning_rate": 5.568566456789757e-06, "loss": 0.19049034118652344, "step": 39975 }, { "epoch": 0.3456952382599372, "grad_norm": 2.1320214125860577, "learning_rate": 5.568461189465755e-06, "loss": 0.08824920654296875, "step": 39980 }, { "epoch": 0.3457384717814805, "grad_norm": 0.20545687470723986, "learning_rate": 5.568355910296237e-06, "loss": 0.160638427734375, "step": 39985 }, { "epoch": 0.34578170530302377, "grad_norm": 38.99589552146025, "learning_rate": 5.568250619281689e-06, "loss": 0.4490928649902344, "step": 39990 }, { "epoch": 0.345824938824567, "grad_norm": 8.701437972626621, "learning_rate": 5.5681453164225955e-06, "loss": 0.3404022216796875, "step": 39995 }, { "epoch": 0.3458681723461103, "grad_norm": 0.25307710163309416, "learning_rate": 5.568040001719443e-06, "loss": 0.41152191162109375, "step": 40000 }, { "epoch": 0.3458681723461103, "eval_loss": 0.13926450908184052, "eval_margin": 0.1340373158454895, "eval_mean_neg": 0.014631230384111404, "eval_mean_pos": 0.700334906578064, "eval_runtime": 19.772, "eval_samples_per_second": 11.683, "eval_steps_per_second": 5.867, "step": 40000 }, { "epoch": 0.34591140586765357, "grad_norm": 24.628908147675318, "learning_rate": 5.567934675172716e-06, "loss": 0.2661376953125, "step": 40005 }, { "epoch": 0.3459546393891968, "grad_norm": 4.298891119212175, "learning_rate": 5.567829336782902e-06, "loss": 0.074407958984375, "step": 40010 }, { "epoch": 0.3459978729107401, "grad_norm": 34.409739196734044, "learning_rate": 5.567723986550486e-06, "loss": 0.296929931640625, "step": 40015 }, { "epoch": 0.3460411064322833, "grad_norm": 6.031818695593964, "learning_rate": 5.567618624475954e-06, "loss": 0.1298980712890625, "step": 40020 }, { "epoch": 0.3460843399538266, "grad_norm": 9.784552183027163, "learning_rate": 5.567513250559792e-06, "loss": 0.2040283203125, "step": 40025 }, { "epoch": 0.3461275734753699, "grad_norm": 4.598336367180972, "learning_rate": 5.567407864802486e-06, "loss": 0.04920616149902344, "step": 40030 }, { "epoch": 0.3461708069969131, "grad_norm": 4.762323006817539, "learning_rate": 5.567302467204521e-06, "loss": 0.20866928100585938, "step": 40035 }, { "epoch": 0.3462140405184564, "grad_norm": 2.5005556311193238, "learning_rate": 5.567197057766384e-06, "loss": 0.07350616455078125, "step": 40040 }, { "epoch": 0.3462572740399997, "grad_norm": 6.442446355043783, "learning_rate": 5.567091636488561e-06, "loss": 0.1177001953125, "step": 40045 }, { "epoch": 0.3463005075615429, "grad_norm": 1.3435400182086106, "learning_rate": 5.5669862033715386e-06, "loss": 0.18997802734375, "step": 40050 }, { "epoch": 0.3463437410830862, "grad_norm": 11.103354722689472, "learning_rate": 5.5668807584158024e-06, "loss": 0.2904296875, "step": 40055 }, { "epoch": 0.34638697460462947, "grad_norm": 21.11869991718726, "learning_rate": 5.566775301621839e-06, "loss": 0.33564453125, "step": 40060 }, { "epoch": 0.3464302081261727, "grad_norm": 17.996589259015224, "learning_rate": 5.566669832990134e-06, "loss": 0.2964996337890625, "step": 40065 }, { "epoch": 0.346473441647716, "grad_norm": 2.198383262089791, "learning_rate": 5.566564352521175e-06, "loss": 0.2990447998046875, "step": 40070 }, { "epoch": 0.3465166751692592, "grad_norm": 17.613338346807314, "learning_rate": 5.566458860215448e-06, "loss": 0.0961456298828125, "step": 40075 }, { "epoch": 0.3465599086908025, "grad_norm": 5.810039270104754, "learning_rate": 5.56635335607344e-06, "loss": 0.164501953125, "step": 40080 }, { "epoch": 0.3466031422123458, "grad_norm": 28.37851755631543, "learning_rate": 5.566247840095637e-06, "loss": 0.25864105224609374, "step": 40085 }, { "epoch": 0.346646375733889, "grad_norm": 16.221495167228078, "learning_rate": 5.5661423122825254e-06, "loss": 0.11043853759765625, "step": 40090 }, { "epoch": 0.3466896092554323, "grad_norm": 7.8524896884397855, "learning_rate": 5.566036772634592e-06, "loss": 0.218408203125, "step": 40095 }, { "epoch": 0.3467328427769756, "grad_norm": 10.901318517069301, "learning_rate": 5.5659312211523245e-06, "loss": 0.34616546630859374, "step": 40100 }, { "epoch": 0.3467760762985188, "grad_norm": 15.066291845874108, "learning_rate": 5.565825657836207e-06, "loss": 0.206634521484375, "step": 40105 }, { "epoch": 0.3468193098200621, "grad_norm": 2.814597918821739, "learning_rate": 5.565720082686731e-06, "loss": 0.1941558837890625, "step": 40110 }, { "epoch": 0.3468625433416053, "grad_norm": 23.87791918193535, "learning_rate": 5.565614495704379e-06, "loss": 0.23372116088867187, "step": 40115 }, { "epoch": 0.3469057768631486, "grad_norm": 0.3109490679381371, "learning_rate": 5.56550889688964e-06, "loss": 0.061236572265625, "step": 40120 }, { "epoch": 0.3469490103846919, "grad_norm": 24.52725193229744, "learning_rate": 5.5654032862430014e-06, "loss": 0.2449493408203125, "step": 40125 }, { "epoch": 0.3469922439062351, "grad_norm": 17.85793287365388, "learning_rate": 5.565297663764948e-06, "loss": 0.1345001220703125, "step": 40130 }, { "epoch": 0.3470354774277784, "grad_norm": 5.597830488526257, "learning_rate": 5.56519202945597e-06, "loss": 0.131304931640625, "step": 40135 }, { "epoch": 0.3470787109493217, "grad_norm": 5.4239163807769035, "learning_rate": 5.565086383316552e-06, "loss": 0.08018646240234376, "step": 40140 }, { "epoch": 0.3471219444708649, "grad_norm": 0.5731196460187776, "learning_rate": 5.564980725347182e-06, "loss": 0.036346435546875, "step": 40145 }, { "epoch": 0.3471651779924082, "grad_norm": 1.9941849847351956, "learning_rate": 5.5648750555483485e-06, "loss": 0.1477752685546875, "step": 40150 }, { "epoch": 0.34720841151395143, "grad_norm": 0.9610709164392383, "learning_rate": 5.564769373920538e-06, "loss": 0.313494873046875, "step": 40155 }, { "epoch": 0.3472516450354947, "grad_norm": 0.8080130181116991, "learning_rate": 5.5646636804642365e-06, "loss": 0.04485664367675781, "step": 40160 }, { "epoch": 0.347294878557038, "grad_norm": 4.870895572374685, "learning_rate": 5.564557975179933e-06, "loss": 0.36123085021972656, "step": 40165 }, { "epoch": 0.34733811207858123, "grad_norm": 0.12471362396777967, "learning_rate": 5.564452258068116e-06, "loss": 0.2518760681152344, "step": 40170 }, { "epoch": 0.3473813456001245, "grad_norm": 26.452847304025113, "learning_rate": 5.564346529129271e-06, "loss": 0.3335762023925781, "step": 40175 }, { "epoch": 0.3474245791216678, "grad_norm": 4.671526807952252, "learning_rate": 5.564240788363886e-06, "loss": 0.0836822509765625, "step": 40180 }, { "epoch": 0.347467812643211, "grad_norm": 4.072616973471304, "learning_rate": 5.564135035772449e-06, "loss": 0.42549991607666016, "step": 40185 }, { "epoch": 0.3475110461647543, "grad_norm": 0.42592209529562386, "learning_rate": 5.564029271355449e-06, "loss": 0.22471466064453124, "step": 40190 }, { "epoch": 0.34755427968629754, "grad_norm": 14.205183045160137, "learning_rate": 5.5639234951133715e-06, "loss": 0.1278076171875, "step": 40195 }, { "epoch": 0.3475975132078408, "grad_norm": 8.474024268097375, "learning_rate": 5.563817707046706e-06, "loss": 0.41796112060546875, "step": 40200 }, { "epoch": 0.3476407467293841, "grad_norm": 22.79569934224162, "learning_rate": 5.5637119071559395e-06, "loss": 0.1432649612426758, "step": 40205 }, { "epoch": 0.34768398025092734, "grad_norm": 0.775107318530544, "learning_rate": 5.56360609544156e-06, "loss": 0.1392730712890625, "step": 40210 }, { "epoch": 0.3477272137724706, "grad_norm": 4.717572802209072, "learning_rate": 5.563500271904056e-06, "loss": 0.11571502685546875, "step": 40215 }, { "epoch": 0.3477704472940139, "grad_norm": 1.1307295492042388, "learning_rate": 5.563394436543915e-06, "loss": 0.5053375244140625, "step": 40220 }, { "epoch": 0.34781368081555714, "grad_norm": 1.7646313793721158, "learning_rate": 5.563288589361625e-06, "loss": 0.15216522216796874, "step": 40225 }, { "epoch": 0.3478569143371004, "grad_norm": 18.757812063139163, "learning_rate": 5.563182730357676e-06, "loss": 0.09865646362304688, "step": 40230 }, { "epoch": 0.3479001478586437, "grad_norm": 0.14867178997620328, "learning_rate": 5.5630768595325535e-06, "loss": 0.04371337890625, "step": 40235 }, { "epoch": 0.34794338138018693, "grad_norm": 46.89338230661769, "learning_rate": 5.5629709768867475e-06, "loss": 0.2754180908203125, "step": 40240 }, { "epoch": 0.3479866149017302, "grad_norm": 3.9739249294808894, "learning_rate": 5.562865082420746e-06, "loss": 0.027471923828125, "step": 40245 }, { "epoch": 0.34802984842327345, "grad_norm": 4.718777535682361, "learning_rate": 5.562759176135036e-06, "loss": 0.0999786376953125, "step": 40250 }, { "epoch": 0.34807308194481673, "grad_norm": 0.1979305732383882, "learning_rate": 5.562653258030108e-06, "loss": 0.0939788818359375, "step": 40255 }, { "epoch": 0.34811631546636, "grad_norm": 26.49462929519146, "learning_rate": 5.5625473281064494e-06, "loss": 0.32035064697265625, "step": 40260 }, { "epoch": 0.34815954898790324, "grad_norm": 9.46924079412947, "learning_rate": 5.56244138636455e-06, "loss": 0.2400787353515625, "step": 40265 }, { "epoch": 0.34820278250944653, "grad_norm": 45.5675823886827, "learning_rate": 5.5623354328048955e-06, "loss": 0.600799560546875, "step": 40270 }, { "epoch": 0.3482460160309898, "grad_norm": 3.1999537725211997, "learning_rate": 5.562229467427978e-06, "loss": 0.6275177001953125, "step": 40275 }, { "epoch": 0.34828924955253304, "grad_norm": 16.81263475653594, "learning_rate": 5.562123490234284e-06, "loss": 0.10284767150878907, "step": 40280 }, { "epoch": 0.3483324830740763, "grad_norm": 17.892036193894963, "learning_rate": 5.562017501224302e-06, "loss": 0.18568000793457032, "step": 40285 }, { "epoch": 0.34837571659561956, "grad_norm": 1.6707349705420902, "learning_rate": 5.561911500398523e-06, "loss": 0.15856781005859374, "step": 40290 }, { "epoch": 0.34841895011716284, "grad_norm": 17.42022638836178, "learning_rate": 5.561805487757435e-06, "loss": 0.109442138671875, "step": 40295 }, { "epoch": 0.3484621836387061, "grad_norm": 5.4709350113556665, "learning_rate": 5.561699463301525e-06, "loss": 0.0441986083984375, "step": 40300 }, { "epoch": 0.34850541716024935, "grad_norm": 1.9237952279669133, "learning_rate": 5.561593427031283e-06, "loss": 0.221185302734375, "step": 40305 }, { "epoch": 0.34854865068179264, "grad_norm": 17.07358129063664, "learning_rate": 5.5614873789471994e-06, "loss": 0.1618072509765625, "step": 40310 }, { "epoch": 0.3485918842033359, "grad_norm": 4.823618883869524, "learning_rate": 5.561381319049763e-06, "loss": 0.16778182983398438, "step": 40315 }, { "epoch": 0.34863511772487915, "grad_norm": 10.359808041377722, "learning_rate": 5.561275247339461e-06, "loss": 0.1711334228515625, "step": 40320 }, { "epoch": 0.34867835124642244, "grad_norm": 2.496006940542368, "learning_rate": 5.561169163816784e-06, "loss": 0.18582649230957032, "step": 40325 }, { "epoch": 0.34872158476796566, "grad_norm": 0.8211608153123618, "learning_rate": 5.561063068482223e-06, "loss": 0.03565702438354492, "step": 40330 }, { "epoch": 0.34876481828950895, "grad_norm": 35.52172144178006, "learning_rate": 5.5609569613362634e-06, "loss": 0.24008712768554688, "step": 40335 }, { "epoch": 0.34880805181105223, "grad_norm": 20.6803305200565, "learning_rate": 5.560850842379397e-06, "loss": 0.5964523315429687, "step": 40340 }, { "epoch": 0.34885128533259546, "grad_norm": 21.186551075629765, "learning_rate": 5.560744711612113e-06, "loss": 0.25029296875, "step": 40345 }, { "epoch": 0.34889451885413875, "grad_norm": 16.081416384995613, "learning_rate": 5.5606385690349005e-06, "loss": 0.10486874580383301, "step": 40350 }, { "epoch": 0.34893775237568203, "grad_norm": 2.877107055635678, "learning_rate": 5.560532414648249e-06, "loss": 0.16836128234863282, "step": 40355 }, { "epoch": 0.34898098589722526, "grad_norm": 6.619933952487609, "learning_rate": 5.560426248452649e-06, "loss": 0.274224853515625, "step": 40360 }, { "epoch": 0.34902421941876854, "grad_norm": 10.037326488096026, "learning_rate": 5.560320070448589e-06, "loss": 0.18928985595703124, "step": 40365 }, { "epoch": 0.3490674529403118, "grad_norm": 10.294904106828824, "learning_rate": 5.560213880636559e-06, "loss": 0.18353729248046874, "step": 40370 }, { "epoch": 0.34911068646185506, "grad_norm": 1.10614156109384, "learning_rate": 5.560107679017049e-06, "loss": 0.2142608642578125, "step": 40375 }, { "epoch": 0.34915391998339834, "grad_norm": 18.40900276336842, "learning_rate": 5.560001465590548e-06, "loss": 0.2270599365234375, "step": 40380 }, { "epoch": 0.34919715350494157, "grad_norm": 12.189760430181543, "learning_rate": 5.5598952403575474e-06, "loss": 0.16021728515625, "step": 40385 }, { "epoch": 0.34924038702648486, "grad_norm": 3.9601078334329842, "learning_rate": 5.559789003318535e-06, "loss": 0.3201396942138672, "step": 40390 }, { "epoch": 0.34928362054802814, "grad_norm": 52.34139237132152, "learning_rate": 5.559682754474002e-06, "loss": 0.32667236328125, "step": 40395 }, { "epoch": 0.34932685406957137, "grad_norm": 35.98440022375821, "learning_rate": 5.55957649382444e-06, "loss": 0.12439079284667968, "step": 40400 }, { "epoch": 0.34937008759111465, "grad_norm": 28.63892047613772, "learning_rate": 5.559470221370336e-06, "loss": 0.11917572021484375, "step": 40405 }, { "epoch": 0.34941332111265794, "grad_norm": 29.24750426463084, "learning_rate": 5.559363937112182e-06, "loss": 0.367987060546875, "step": 40410 }, { "epoch": 0.34945655463420117, "grad_norm": 13.144049368375272, "learning_rate": 5.5592576410504665e-06, "loss": 0.19654388427734376, "step": 40415 }, { "epoch": 0.34949978815574445, "grad_norm": 0.2650848276043051, "learning_rate": 5.559151333185682e-06, "loss": 0.211334228515625, "step": 40420 }, { "epoch": 0.3495430216772877, "grad_norm": 31.765620066423445, "learning_rate": 5.5590450135183174e-06, "loss": 0.5062088966369629, "step": 40425 }, { "epoch": 0.34958625519883096, "grad_norm": 0.734315115340287, "learning_rate": 5.558938682048864e-06, "loss": 0.245880126953125, "step": 40430 }, { "epoch": 0.34962948872037425, "grad_norm": 1.9807461458972218, "learning_rate": 5.558832338777811e-06, "loss": 0.14913864135742189, "step": 40435 }, { "epoch": 0.3496727222419175, "grad_norm": 13.839527627781742, "learning_rate": 5.5587259837056485e-06, "loss": 0.45056686401367185, "step": 40440 }, { "epoch": 0.34971595576346076, "grad_norm": 9.872116951308273, "learning_rate": 5.558619616832869e-06, "loss": 0.24330673217773438, "step": 40445 }, { "epoch": 0.34975918928500405, "grad_norm": 6.291707195747628, "learning_rate": 5.5585132381599615e-06, "loss": 0.115087890625, "step": 40450 }, { "epoch": 0.3498024228065473, "grad_norm": 1.2255327893159647, "learning_rate": 5.558406847687416e-06, "loss": 0.2375, "step": 40455 }, { "epoch": 0.34984565632809056, "grad_norm": 28.48076694949617, "learning_rate": 5.558300445415726e-06, "loss": 0.3888721466064453, "step": 40460 }, { "epoch": 0.3498888898496338, "grad_norm": 1.9360275049402291, "learning_rate": 5.55819403134538e-06, "loss": 0.25501708984375, "step": 40465 }, { "epoch": 0.3499321233711771, "grad_norm": 52.44500969258209, "learning_rate": 5.5580876054768686e-06, "loss": 0.22618865966796875, "step": 40470 }, { "epoch": 0.34997535689272036, "grad_norm": 9.993636349553466, "learning_rate": 5.557981167810683e-06, "loss": 0.14321670532226563, "step": 40475 }, { "epoch": 0.3500185904142636, "grad_norm": 23.535461329843788, "learning_rate": 5.5578747183473154e-06, "loss": 0.1911163330078125, "step": 40480 }, { "epoch": 0.35006182393580687, "grad_norm": 4.110117973230297, "learning_rate": 5.5577682570872545e-06, "loss": 0.31171531677246095, "step": 40485 }, { "epoch": 0.35010505745735016, "grad_norm": 13.75548151326752, "learning_rate": 5.5576617840309925e-06, "loss": 0.09700164794921876, "step": 40490 }, { "epoch": 0.3501482909788934, "grad_norm": 8.449656105481278, "learning_rate": 5.557555299179022e-06, "loss": 0.4331146240234375, "step": 40495 }, { "epoch": 0.35019152450043667, "grad_norm": 5.673317316671431, "learning_rate": 5.55744880253183e-06, "loss": 0.20674285888671876, "step": 40500 }, { "epoch": 0.3502347580219799, "grad_norm": 20.509630514332464, "learning_rate": 5.557342294089912e-06, "loss": 0.294873046875, "step": 40505 }, { "epoch": 0.3502779915435232, "grad_norm": 1.8135609078625547, "learning_rate": 5.557235773853756e-06, "loss": 0.227325439453125, "step": 40510 }, { "epoch": 0.35032122506506647, "grad_norm": 3.3503478013482115, "learning_rate": 5.557129241823855e-06, "loss": 0.146466064453125, "step": 40515 }, { "epoch": 0.3503644585866097, "grad_norm": 4.39155691474321, "learning_rate": 5.5570226980007e-06, "loss": 0.24251632690429686, "step": 40520 }, { "epoch": 0.350407692108153, "grad_norm": 0.6454797737907831, "learning_rate": 5.556916142384783e-06, "loss": 0.2501434326171875, "step": 40525 }, { "epoch": 0.35045092562969626, "grad_norm": 0.7343145561381423, "learning_rate": 5.556809574976593e-06, "loss": 0.11570053100585938, "step": 40530 }, { "epoch": 0.3504941591512395, "grad_norm": 0.942549832257533, "learning_rate": 5.556702995776625e-06, "loss": 0.09221420288085938, "step": 40535 }, { "epoch": 0.3505373926727828, "grad_norm": 4.484038889623902, "learning_rate": 5.556596404785367e-06, "loss": 0.04519596099853516, "step": 40540 }, { "epoch": 0.350580626194326, "grad_norm": 0.05649141128608836, "learning_rate": 5.556489802003313e-06, "loss": 0.10394172668457032, "step": 40545 }, { "epoch": 0.3506238597158693, "grad_norm": 42.99332094865235, "learning_rate": 5.556383187430954e-06, "loss": 0.3781494140625, "step": 40550 }, { "epoch": 0.3506670932374126, "grad_norm": 1.0505824186215091, "learning_rate": 5.556276561068782e-06, "loss": 0.1956695556640625, "step": 40555 }, { "epoch": 0.3507103267589558, "grad_norm": 29.79720989490254, "learning_rate": 5.556169922917287e-06, "loss": 0.11726264953613282, "step": 40560 }, { "epoch": 0.3507535602804991, "grad_norm": 3.842715318509261, "learning_rate": 5.556063272976963e-06, "loss": 0.22607574462890626, "step": 40565 }, { "epoch": 0.3507967938020424, "grad_norm": 13.107980581841257, "learning_rate": 5.555956611248302e-06, "loss": 0.08178863525390626, "step": 40570 }, { "epoch": 0.3508400273235856, "grad_norm": 0.9539442492468635, "learning_rate": 5.5558499377317946e-06, "loss": 0.08640518188476562, "step": 40575 }, { "epoch": 0.3508832608451289, "grad_norm": 4.8073507659034105, "learning_rate": 5.555743252427932e-06, "loss": 0.18896865844726562, "step": 40580 }, { "epoch": 0.35092649436667217, "grad_norm": 3.299018233397554, "learning_rate": 5.555636555337208e-06, "loss": 0.1043701171875, "step": 40585 }, { "epoch": 0.3509697278882154, "grad_norm": 4.8759594677509925, "learning_rate": 5.555529846460115e-06, "loss": 0.360382080078125, "step": 40590 }, { "epoch": 0.3510129614097587, "grad_norm": 11.651293587901092, "learning_rate": 5.555423125797142e-06, "loss": 0.27130851745605467, "step": 40595 }, { "epoch": 0.3510561949313019, "grad_norm": 32.81132050143377, "learning_rate": 5.555316393348786e-06, "loss": 0.14430770874023438, "step": 40600 }, { "epoch": 0.3510994284528452, "grad_norm": 1.7683110325919167, "learning_rate": 5.555209649115535e-06, "loss": 0.1316436767578125, "step": 40605 }, { "epoch": 0.3511426619743885, "grad_norm": 40.19495545611485, "learning_rate": 5.555102893097882e-06, "loss": 0.41085968017578123, "step": 40610 }, { "epoch": 0.3511858954959317, "grad_norm": 11.21081768874985, "learning_rate": 5.554996125296322e-06, "loss": 0.13754653930664062, "step": 40615 }, { "epoch": 0.351229129017475, "grad_norm": 7.7485859232437155, "learning_rate": 5.554889345711345e-06, "loss": 0.058393096923828124, "step": 40620 }, { "epoch": 0.3512723625390183, "grad_norm": 8.142210887974352, "learning_rate": 5.554782554343443e-06, "loss": 0.1773895263671875, "step": 40625 }, { "epoch": 0.3513155960605615, "grad_norm": 0.652961540103946, "learning_rate": 5.554675751193112e-06, "loss": 0.5166458129882813, "step": 40630 }, { "epoch": 0.3513588295821048, "grad_norm": 14.855074117777349, "learning_rate": 5.5545689362608405e-06, "loss": 0.507806396484375, "step": 40635 }, { "epoch": 0.351402063103648, "grad_norm": 3.595571645325042, "learning_rate": 5.554462109547123e-06, "loss": 0.2030303955078125, "step": 40640 }, { "epoch": 0.3514452966251913, "grad_norm": 24.877216771547907, "learning_rate": 5.5543552710524514e-06, "loss": 0.28594970703125, "step": 40645 }, { "epoch": 0.3514885301467346, "grad_norm": 6.583692158530597, "learning_rate": 5.55424842077732e-06, "loss": 0.07199554443359375, "step": 40650 }, { "epoch": 0.3515317636682778, "grad_norm": 38.069119216182166, "learning_rate": 5.55414155872222e-06, "loss": 0.14495010375976564, "step": 40655 }, { "epoch": 0.3515749971898211, "grad_norm": 28.728757740826612, "learning_rate": 5.5540346848876445e-06, "loss": 0.5819305419921875, "step": 40660 }, { "epoch": 0.3516182307113644, "grad_norm": 8.498810696110953, "learning_rate": 5.553927799274087e-06, "loss": 0.270794677734375, "step": 40665 }, { "epoch": 0.3516614642329076, "grad_norm": 1.0057232569292611, "learning_rate": 5.553820901882041e-06, "loss": 0.1610240936279297, "step": 40670 }, { "epoch": 0.3517046977544509, "grad_norm": 6.014977767879911, "learning_rate": 5.553713992711998e-06, "loss": 0.056762313842773436, "step": 40675 }, { "epoch": 0.35174793127599413, "grad_norm": 10.005775921701556, "learning_rate": 5.553607071764452e-06, "loss": 0.06252288818359375, "step": 40680 }, { "epoch": 0.3517911647975374, "grad_norm": 0.6497790031435369, "learning_rate": 5.553500139039895e-06, "loss": 0.07548980712890625, "step": 40685 }, { "epoch": 0.3518343983190807, "grad_norm": 9.266303579156457, "learning_rate": 5.553393194538822e-06, "loss": 0.22041854858398438, "step": 40690 }, { "epoch": 0.35187763184062393, "grad_norm": 9.241195044996724, "learning_rate": 5.553286238261725e-06, "loss": 0.073370361328125, "step": 40695 }, { "epoch": 0.3519208653621672, "grad_norm": 12.725830967660713, "learning_rate": 5.553179270209098e-06, "loss": 0.06697845458984375, "step": 40700 }, { "epoch": 0.3519640988837105, "grad_norm": 4.099338013265039, "learning_rate": 5.553072290381432e-06, "loss": 0.07822265625, "step": 40705 }, { "epoch": 0.3520073324052537, "grad_norm": 3.1128435725446995, "learning_rate": 5.552965298779223e-06, "loss": 0.174969482421875, "step": 40710 }, { "epoch": 0.352050565926797, "grad_norm": 17.500527622291596, "learning_rate": 5.552858295402964e-06, "loss": 0.11612453460693359, "step": 40715 }, { "epoch": 0.35209379944834024, "grad_norm": 2.0729239634610646, "learning_rate": 5.552751280253148e-06, "loss": 0.01784515380859375, "step": 40720 }, { "epoch": 0.3521370329698835, "grad_norm": 3.016052583447483, "learning_rate": 5.552644253330269e-06, "loss": 0.1095855712890625, "step": 40725 }, { "epoch": 0.3521802664914268, "grad_norm": 3.271846160254535, "learning_rate": 5.55253721463482e-06, "loss": 0.1679473876953125, "step": 40730 }, { "epoch": 0.35222350001297004, "grad_norm": 45.56948219365736, "learning_rate": 5.552430164167295e-06, "loss": 0.464373779296875, "step": 40735 }, { "epoch": 0.3522667335345133, "grad_norm": 11.20533857256237, "learning_rate": 5.552323101928187e-06, "loss": 0.22249832153320312, "step": 40740 }, { "epoch": 0.3523099670560566, "grad_norm": 2.6502732350348306, "learning_rate": 5.552216027917991e-06, "loss": 0.1081268310546875, "step": 40745 }, { "epoch": 0.35235320057759983, "grad_norm": 7.132372051009153, "learning_rate": 5.5521089421372e-06, "loss": 0.6479522705078125, "step": 40750 }, { "epoch": 0.3523964340991431, "grad_norm": 5.872142101965706, "learning_rate": 5.552001844586308e-06, "loss": 0.20717735290527345, "step": 40755 }, { "epoch": 0.35243966762068635, "grad_norm": 0.5423427072818674, "learning_rate": 5.551894735265809e-06, "loss": 0.11887626647949219, "step": 40760 }, { "epoch": 0.35248290114222963, "grad_norm": 4.047258309306702, "learning_rate": 5.551787614176197e-06, "loss": 0.16392669677734376, "step": 40765 }, { "epoch": 0.3525261346637729, "grad_norm": 48.40879803307279, "learning_rate": 5.551680481317966e-06, "loss": 0.31388168334960936, "step": 40770 }, { "epoch": 0.35256936818531615, "grad_norm": 10.353830786433045, "learning_rate": 5.551573336691611e-06, "loss": 0.439398193359375, "step": 40775 }, { "epoch": 0.35261260170685943, "grad_norm": 15.567299990407085, "learning_rate": 5.5514661802976235e-06, "loss": 0.09472389221191406, "step": 40780 }, { "epoch": 0.3526558352284027, "grad_norm": 19.53988820636809, "learning_rate": 5.551359012136501e-06, "loss": 0.3557373046875, "step": 40785 }, { "epoch": 0.35269906874994594, "grad_norm": 1.8231007477665409, "learning_rate": 5.5512518322087355e-06, "loss": 0.13693695068359374, "step": 40790 }, { "epoch": 0.3527423022714892, "grad_norm": 24.728220554834763, "learning_rate": 5.551144640514822e-06, "loss": 0.24263916015625, "step": 40795 }, { "epoch": 0.3527855357930325, "grad_norm": 44.87112631987885, "learning_rate": 5.5510374370552545e-06, "loss": 0.295806884765625, "step": 40800 }, { "epoch": 0.35282876931457574, "grad_norm": 8.66499557887344, "learning_rate": 5.550930221830528e-06, "loss": 0.052973175048828126, "step": 40805 }, { "epoch": 0.352872002836119, "grad_norm": 1.2297698284125265, "learning_rate": 5.550822994841137e-06, "loss": 0.044272232055664065, "step": 40810 }, { "epoch": 0.35291523635766225, "grad_norm": 22.039009251005844, "learning_rate": 5.550715756087576e-06, "loss": 0.47951278686523435, "step": 40815 }, { "epoch": 0.35295846987920554, "grad_norm": 4.43004896226267, "learning_rate": 5.550608505570338e-06, "loss": 0.1589599609375, "step": 40820 }, { "epoch": 0.3530017034007488, "grad_norm": 0.5695866949862185, "learning_rate": 5.55050124328992e-06, "loss": 0.07472381591796876, "step": 40825 }, { "epoch": 0.35304493692229205, "grad_norm": 4.545851014788443, "learning_rate": 5.550393969246815e-06, "loss": 0.14502601623535155, "step": 40830 }, { "epoch": 0.35308817044383534, "grad_norm": 0.8924717184769514, "learning_rate": 5.5502866834415194e-06, "loss": 0.07847518920898437, "step": 40835 }, { "epoch": 0.3531314039653786, "grad_norm": 22.261908064312482, "learning_rate": 5.550179385874526e-06, "loss": 0.174371337890625, "step": 40840 }, { "epoch": 0.35317463748692185, "grad_norm": 38.483379654688335, "learning_rate": 5.5500720765463315e-06, "loss": 0.297918701171875, "step": 40845 }, { "epoch": 0.35321787100846513, "grad_norm": 15.588375932956327, "learning_rate": 5.5499647554574295e-06, "loss": 0.44568710327148436, "step": 40850 }, { "epoch": 0.35326110453000836, "grad_norm": 0.41147607856307034, "learning_rate": 5.549857422608315e-06, "loss": 0.10856170654296875, "step": 40855 }, { "epoch": 0.35330433805155165, "grad_norm": 10.189088069378252, "learning_rate": 5.5497500779994835e-06, "loss": 0.07714462280273438, "step": 40860 }, { "epoch": 0.35334757157309493, "grad_norm": 14.754115420639227, "learning_rate": 5.5496427216314305e-06, "loss": 0.27177581787109373, "step": 40865 }, { "epoch": 0.35339080509463816, "grad_norm": 16.279727395787322, "learning_rate": 5.54953535350465e-06, "loss": 0.14545135498046874, "step": 40870 }, { "epoch": 0.35343403861618145, "grad_norm": 4.330739155581509, "learning_rate": 5.549427973619637e-06, "loss": 0.08175048828125, "step": 40875 }, { "epoch": 0.35347727213772473, "grad_norm": 8.385847164747627, "learning_rate": 5.5493205819768894e-06, "loss": 0.06907501220703124, "step": 40880 }, { "epoch": 0.35352050565926796, "grad_norm": 8.800724857546639, "learning_rate": 5.549213178576899e-06, "loss": 0.2626983642578125, "step": 40885 }, { "epoch": 0.35356373918081124, "grad_norm": 8.472653345333681, "learning_rate": 5.549105763420163e-06, "loss": 0.29154052734375, "step": 40890 }, { "epoch": 0.35360697270235447, "grad_norm": 8.633368503992672, "learning_rate": 5.548998336507177e-06, "loss": 0.53419189453125, "step": 40895 }, { "epoch": 0.35365020622389776, "grad_norm": 43.23450218154411, "learning_rate": 5.548890897838435e-06, "loss": 0.5340423583984375, "step": 40900 }, { "epoch": 0.35369343974544104, "grad_norm": 1.1749527615628381, "learning_rate": 5.5487834474144345e-06, "loss": 0.14507904052734374, "step": 40905 }, { "epoch": 0.35373667326698427, "grad_norm": 0.5368391565687791, "learning_rate": 5.5486759852356695e-06, "loss": 0.049456787109375, "step": 40910 }, { "epoch": 0.35377990678852755, "grad_norm": 5.9629090083146625, "learning_rate": 5.548568511302635e-06, "loss": 0.25874176025390627, "step": 40915 }, { "epoch": 0.35382314031007084, "grad_norm": 1.5263894048777358, "learning_rate": 5.548461025615829e-06, "loss": 0.19429473876953124, "step": 40920 }, { "epoch": 0.35386637383161407, "grad_norm": 10.95248556303549, "learning_rate": 5.548353528175745e-06, "loss": 0.22181472778320313, "step": 40925 }, { "epoch": 0.35390960735315735, "grad_norm": 13.155799165924662, "learning_rate": 5.548246018982881e-06, "loss": 0.20506362915039061, "step": 40930 }, { "epoch": 0.3539528408747006, "grad_norm": 7.668945676716776, "learning_rate": 5.548138498037731e-06, "loss": 0.09486083984375, "step": 40935 }, { "epoch": 0.35399607439624386, "grad_norm": 27.400974262551795, "learning_rate": 5.548030965340791e-06, "loss": 0.28822784423828124, "step": 40940 }, { "epoch": 0.35403930791778715, "grad_norm": 20.983575447311896, "learning_rate": 5.547923420892557e-06, "loss": 0.25064239501953123, "step": 40945 }, { "epoch": 0.3540825414393304, "grad_norm": 9.564890202687264, "learning_rate": 5.5478158646935264e-06, "loss": 0.10361328125, "step": 40950 }, { "epoch": 0.35412577496087366, "grad_norm": 13.901140215107091, "learning_rate": 5.547708296744193e-06, "loss": 0.10595550537109374, "step": 40955 }, { "epoch": 0.35416900848241695, "grad_norm": 1.292747058263668, "learning_rate": 5.5476007170450545e-06, "loss": 0.2823333740234375, "step": 40960 }, { "epoch": 0.3542122420039602, "grad_norm": 12.45396673035936, "learning_rate": 5.547493125596607e-06, "loss": 0.1982147216796875, "step": 40965 }, { "epoch": 0.35425547552550346, "grad_norm": 6.315347174596962, "learning_rate": 5.547385522399347e-06, "loss": 0.24220199584960939, "step": 40970 }, { "epoch": 0.35429870904704674, "grad_norm": 0.6441787019729657, "learning_rate": 5.547277907453768e-06, "loss": 0.3708648681640625, "step": 40975 }, { "epoch": 0.35434194256859, "grad_norm": 11.607559752495769, "learning_rate": 5.54717028076037e-06, "loss": 0.15309295654296876, "step": 40980 }, { "epoch": 0.35438517609013326, "grad_norm": 8.552789010282956, "learning_rate": 5.547062642319647e-06, "loss": 0.20751800537109374, "step": 40985 }, { "epoch": 0.3544284096116765, "grad_norm": 2.4570607033492546, "learning_rate": 5.546954992132097e-06, "loss": 0.07690811157226562, "step": 40990 }, { "epoch": 0.35447164313321977, "grad_norm": 2.057934249434966, "learning_rate": 5.5468473301982145e-06, "loss": 0.04816169738769531, "step": 40995 }, { "epoch": 0.35451487665476306, "grad_norm": 18.698834321091947, "learning_rate": 5.546739656518498e-06, "loss": 0.3623329162597656, "step": 41000 }, { "epoch": 0.3545581101763063, "grad_norm": 0.6066263950400874, "learning_rate": 5.5466319710934426e-06, "loss": 0.02254180908203125, "step": 41005 }, { "epoch": 0.35460134369784957, "grad_norm": 3.803363297408112, "learning_rate": 5.546524273923546e-06, "loss": 0.25079879760742185, "step": 41010 }, { "epoch": 0.35464457721939285, "grad_norm": 0.4802633660254009, "learning_rate": 5.5464165650093045e-06, "loss": 0.34403076171875, "step": 41015 }, { "epoch": 0.3546878107409361, "grad_norm": 11.413218659982958, "learning_rate": 5.546308844351214e-06, "loss": 0.12576751708984374, "step": 41020 }, { "epoch": 0.35473104426247937, "grad_norm": 12.906158297199097, "learning_rate": 5.546201111949773e-06, "loss": 0.358416748046875, "step": 41025 }, { "epoch": 0.3547742777840226, "grad_norm": 18.584652416698106, "learning_rate": 5.546093367805477e-06, "loss": 0.0557098388671875, "step": 41030 }, { "epoch": 0.3548175113055659, "grad_norm": 4.37174415470841, "learning_rate": 5.5459856119188236e-06, "loss": 0.08416748046875, "step": 41035 }, { "epoch": 0.35486074482710916, "grad_norm": 3.295952520274206, "learning_rate": 5.54587784429031e-06, "loss": 0.14066314697265625, "step": 41040 }, { "epoch": 0.3549039783486524, "grad_norm": 0.09880338263538584, "learning_rate": 5.545770064920433e-06, "loss": 0.07994804382324219, "step": 41045 }, { "epoch": 0.3549472118701957, "grad_norm": 9.040512717199805, "learning_rate": 5.545662273809688e-06, "loss": 0.2038421630859375, "step": 41050 }, { "epoch": 0.35499044539173896, "grad_norm": 12.904460442157783, "learning_rate": 5.545554470958575e-06, "loss": 0.196923828125, "step": 41055 }, { "epoch": 0.3550336789132822, "grad_norm": 5.9516200471644, "learning_rate": 5.545446656367588e-06, "loss": 0.0539398193359375, "step": 41060 }, { "epoch": 0.3550769124348255, "grad_norm": 36.530878012472606, "learning_rate": 5.545338830037227e-06, "loss": 0.29894561767578126, "step": 41065 }, { "epoch": 0.3551201459563687, "grad_norm": 4.401847962690854, "learning_rate": 5.545230991967988e-06, "loss": 0.0435302734375, "step": 41070 }, { "epoch": 0.355163379477912, "grad_norm": 5.709239117071277, "learning_rate": 5.545123142160369e-06, "loss": 0.02161407470703125, "step": 41075 }, { "epoch": 0.3552066129994553, "grad_norm": 4.725826227917074, "learning_rate": 5.545015280614866e-06, "loss": 0.07628421783447266, "step": 41080 }, { "epoch": 0.3552498465209985, "grad_norm": 5.626929899245852, "learning_rate": 5.544907407331978e-06, "loss": 0.24808082580566407, "step": 41085 }, { "epoch": 0.3552930800425418, "grad_norm": 32.016646727724385, "learning_rate": 5.544799522312202e-06, "loss": 0.1246246337890625, "step": 41090 }, { "epoch": 0.35533631356408507, "grad_norm": 6.422369268827486, "learning_rate": 5.544691625556036e-06, "loss": 0.06727676391601563, "step": 41095 }, { "epoch": 0.3553795470856283, "grad_norm": 23.219411310523615, "learning_rate": 5.544583717063976e-06, "loss": 0.047557830810546875, "step": 41100 }, { "epoch": 0.3554227806071716, "grad_norm": 16.15142850284398, "learning_rate": 5.544475796836521e-06, "loss": 0.07501106262207032, "step": 41105 }, { "epoch": 0.3554660141287148, "grad_norm": 0.7355492616844204, "learning_rate": 5.544367864874169e-06, "loss": 0.18697357177734375, "step": 41110 }, { "epoch": 0.3555092476502581, "grad_norm": 5.085360471108633, "learning_rate": 5.544259921177417e-06, "loss": 0.06400146484375, "step": 41115 }, { "epoch": 0.3555524811718014, "grad_norm": 2.9951265341906037, "learning_rate": 5.544151965746762e-06, "loss": 0.2661376953125, "step": 41120 }, { "epoch": 0.3555957146933446, "grad_norm": 2.2045441865229347, "learning_rate": 5.544043998582705e-06, "loss": 0.09918212890625, "step": 41125 }, { "epoch": 0.3556389482148879, "grad_norm": 6.3344805726771565, "learning_rate": 5.54393601968574e-06, "loss": 0.20361576080322266, "step": 41130 }, { "epoch": 0.3556821817364312, "grad_norm": 0.5564444288701527, "learning_rate": 5.543828029056368e-06, "loss": 0.35373382568359374, "step": 41135 }, { "epoch": 0.3557254152579744, "grad_norm": 9.070347314416264, "learning_rate": 5.543720026695085e-06, "loss": 0.463330078125, "step": 41140 }, { "epoch": 0.3557686487795177, "grad_norm": 6.062115409751216, "learning_rate": 5.543612012602391e-06, "loss": 0.06766738891601562, "step": 41145 }, { "epoch": 0.355811882301061, "grad_norm": 37.23898425147459, "learning_rate": 5.543503986778783e-06, "loss": 0.2201690673828125, "step": 41150 }, { "epoch": 0.3558551158226042, "grad_norm": 7.1452631570900165, "learning_rate": 5.543395949224758e-06, "loss": 0.057978057861328126, "step": 41155 }, { "epoch": 0.3558983493441475, "grad_norm": 16.385193862909663, "learning_rate": 5.5432878999408175e-06, "loss": 0.11508026123046874, "step": 41160 }, { "epoch": 0.3559415828656907, "grad_norm": 7.591688115330764, "learning_rate": 5.543179838927457e-06, "loss": 0.2917633056640625, "step": 41165 }, { "epoch": 0.355984816387234, "grad_norm": 7.88268837738556, "learning_rate": 5.543071766185176e-06, "loss": 0.08732757568359376, "step": 41170 }, { "epoch": 0.3560280499087773, "grad_norm": 7.651541941551274, "learning_rate": 5.542963681714472e-06, "loss": 0.4599311828613281, "step": 41175 }, { "epoch": 0.3560712834303205, "grad_norm": 1.7500765112667296, "learning_rate": 5.542855585515846e-06, "loss": 0.19881591796875, "step": 41180 }, { "epoch": 0.3561145169518638, "grad_norm": 8.464964740806149, "learning_rate": 5.542747477589792e-06, "loss": 0.269122314453125, "step": 41185 }, { "epoch": 0.3561577504734071, "grad_norm": 27.994988654627743, "learning_rate": 5.542639357936814e-06, "loss": 0.11653900146484375, "step": 41190 }, { "epoch": 0.3562009839949503, "grad_norm": 1.1247451734572467, "learning_rate": 5.542531226557406e-06, "loss": 0.23193588256835937, "step": 41195 }, { "epoch": 0.3562442175164936, "grad_norm": 11.404585219243982, "learning_rate": 5.54242308345207e-06, "loss": 0.1814300537109375, "step": 41200 }, { "epoch": 0.35628745103803683, "grad_norm": 1.0061296274872902, "learning_rate": 5.542314928621302e-06, "loss": 0.32773284912109374, "step": 41205 }, { "epoch": 0.3563306845595801, "grad_norm": 26.750492764160278, "learning_rate": 5.542206762065603e-06, "loss": 0.3598045349121094, "step": 41210 }, { "epoch": 0.3563739180811234, "grad_norm": 21.71332049278171, "learning_rate": 5.542098583785472e-06, "loss": 0.21741600036621095, "step": 41215 }, { "epoch": 0.3564171516026666, "grad_norm": 12.016456160438484, "learning_rate": 5.541990393781406e-06, "loss": 0.10773544311523438, "step": 41220 }, { "epoch": 0.3564603851242099, "grad_norm": 14.52332622815077, "learning_rate": 5.541882192053905e-06, "loss": 0.0614501953125, "step": 41225 }, { "epoch": 0.3565036186457532, "grad_norm": 5.698585986351648, "learning_rate": 5.541773978603468e-06, "loss": 0.15044097900390624, "step": 41230 }, { "epoch": 0.3565468521672964, "grad_norm": 12.123514762599084, "learning_rate": 5.541665753430594e-06, "loss": 0.32720947265625, "step": 41235 }, { "epoch": 0.3565900856888397, "grad_norm": 23.754934764639874, "learning_rate": 5.5415575165357814e-06, "loss": 0.4344451904296875, "step": 41240 }, { "epoch": 0.35663331921038294, "grad_norm": 1.2642387498049992, "learning_rate": 5.541449267919531e-06, "loss": 0.07392120361328125, "step": 41245 }, { "epoch": 0.3566765527319262, "grad_norm": 7.020955136508019, "learning_rate": 5.541341007582341e-06, "loss": 0.49367828369140626, "step": 41250 }, { "epoch": 0.3567197862534695, "grad_norm": 2.756011150399427, "learning_rate": 5.541232735524711e-06, "loss": 0.20762786865234376, "step": 41255 }, { "epoch": 0.35676301977501274, "grad_norm": 26.66120069291145, "learning_rate": 5.54112445174714e-06, "loss": 0.34780426025390626, "step": 41260 }, { "epoch": 0.356806253296556, "grad_norm": 1.1865286341100405, "learning_rate": 5.541016156250127e-06, "loss": 0.22490386962890624, "step": 41265 }, { "epoch": 0.3568494868180993, "grad_norm": 5.839313177774661, "learning_rate": 5.5409078490341724e-06, "loss": 0.2800689697265625, "step": 41270 }, { "epoch": 0.35689272033964253, "grad_norm": 29.028475795474648, "learning_rate": 5.5407995300997756e-06, "loss": 0.220147705078125, "step": 41275 }, { "epoch": 0.3569359538611858, "grad_norm": 2.7848846856793004, "learning_rate": 5.540691199447435e-06, "loss": 0.15401535034179686, "step": 41280 }, { "epoch": 0.35697918738272905, "grad_norm": 38.60229887309941, "learning_rate": 5.540582857077652e-06, "loss": 0.34819869995117186, "step": 41285 }, { "epoch": 0.35702242090427233, "grad_norm": 4.56366596244436, "learning_rate": 5.540474502990925e-06, "loss": 0.1326263427734375, "step": 41290 }, { "epoch": 0.3570656544258156, "grad_norm": 20.46084554001562, "learning_rate": 5.540366137187755e-06, "loss": 0.395928955078125, "step": 41295 }, { "epoch": 0.35710888794735884, "grad_norm": 25.98503789618083, "learning_rate": 5.540257759668639e-06, "loss": 0.4427909851074219, "step": 41300 }, { "epoch": 0.35715212146890213, "grad_norm": 0.15902399957443164, "learning_rate": 5.5401493704340805e-06, "loss": 0.08037872314453125, "step": 41305 }, { "epoch": 0.3571953549904454, "grad_norm": 15.62084245176451, "learning_rate": 5.540040969484577e-06, "loss": 0.10053329467773438, "step": 41310 }, { "epoch": 0.35723858851198864, "grad_norm": 7.713033202112247, "learning_rate": 5.5399325568206284e-06, "loss": 0.242877197265625, "step": 41315 }, { "epoch": 0.3572818220335319, "grad_norm": 26.83517193158832, "learning_rate": 5.539824132442735e-06, "loss": 0.17342529296875, "step": 41320 }, { "epoch": 0.3573250555550752, "grad_norm": 2.3010075480630343, "learning_rate": 5.539715696351398e-06, "loss": 0.16719207763671876, "step": 41325 }, { "epoch": 0.35736828907661844, "grad_norm": 0.292710010758715, "learning_rate": 5.539607248547116e-06, "loss": 0.08769721984863281, "step": 41330 }, { "epoch": 0.3574115225981617, "grad_norm": 7.793469783557028, "learning_rate": 5.539498789030391e-06, "loss": 0.6282257080078125, "step": 41335 }, { "epoch": 0.35745475611970495, "grad_norm": 4.781940672063838, "learning_rate": 5.539390317801721e-06, "loss": 0.06023101806640625, "step": 41340 }, { "epoch": 0.35749798964124824, "grad_norm": 0.38389899537295075, "learning_rate": 5.5392818348616074e-06, "loss": 0.07199764251708984, "step": 41345 }, { "epoch": 0.3575412231627915, "grad_norm": 48.00853412336559, "learning_rate": 5.539173340210549e-06, "loss": 0.2846527099609375, "step": 41350 }, { "epoch": 0.35758445668433475, "grad_norm": 36.024169004599905, "learning_rate": 5.53906483384905e-06, "loss": 0.787890625, "step": 41355 }, { "epoch": 0.35762769020587803, "grad_norm": 1.859609650331293, "learning_rate": 5.538956315777607e-06, "loss": 0.09658660888671874, "step": 41360 }, { "epoch": 0.3576709237274213, "grad_norm": 10.819791067798015, "learning_rate": 5.538847785996722e-06, "loss": 0.185406494140625, "step": 41365 }, { "epoch": 0.35771415724896455, "grad_norm": 12.138000114446838, "learning_rate": 5.5387392445068955e-06, "loss": 0.08235149383544922, "step": 41370 }, { "epoch": 0.35775739077050783, "grad_norm": 5.836846698636566, "learning_rate": 5.538630691308628e-06, "loss": 0.17996826171875, "step": 41375 }, { "epoch": 0.35780062429205106, "grad_norm": 1.400988159274997, "learning_rate": 5.538522126402419e-06, "loss": 0.17997207641601562, "step": 41380 }, { "epoch": 0.35784385781359435, "grad_norm": 32.25252062334365, "learning_rate": 5.538413549788772e-06, "loss": 0.4608604431152344, "step": 41385 }, { "epoch": 0.35788709133513763, "grad_norm": 21.75303188547268, "learning_rate": 5.5383049614681836e-06, "loss": 0.1139678955078125, "step": 41390 }, { "epoch": 0.35793032485668086, "grad_norm": 5.424608947305274, "learning_rate": 5.53819636144116e-06, "loss": 0.07130603790283203, "step": 41395 }, { "epoch": 0.35797355837822414, "grad_norm": 2.122998742576118, "learning_rate": 5.5380877497081965e-06, "loss": 0.24229011535644532, "step": 41400 }, { "epoch": 0.35801679189976743, "grad_norm": 10.40656917548105, "learning_rate": 5.537979126269798e-06, "loss": 0.2233306884765625, "step": 41405 }, { "epoch": 0.35806002542131066, "grad_norm": 16.238793875733617, "learning_rate": 5.5378704911264635e-06, "loss": 0.1863006591796875, "step": 41410 }, { "epoch": 0.35810325894285394, "grad_norm": 12.735274709046264, "learning_rate": 5.537761844278696e-06, "loss": 0.1919921875, "step": 41415 }, { "epoch": 0.35814649246439717, "grad_norm": 1.4336609454323497, "learning_rate": 5.537653185726993e-06, "loss": 0.162481689453125, "step": 41420 }, { "epoch": 0.35818972598594045, "grad_norm": 30.44841966967212, "learning_rate": 5.537544515471858e-06, "loss": 0.274884033203125, "step": 41425 }, { "epoch": 0.35823295950748374, "grad_norm": 6.562201959573349, "learning_rate": 5.537435833513793e-06, "loss": 0.11297988891601562, "step": 41430 }, { "epoch": 0.35827619302902697, "grad_norm": 0.7024001751074553, "learning_rate": 5.537327139853298e-06, "loss": 0.06507644653320313, "step": 41435 }, { "epoch": 0.35831942655057025, "grad_norm": 0.11832044939316395, "learning_rate": 5.537218434490873e-06, "loss": 0.17537689208984375, "step": 41440 }, { "epoch": 0.35836266007211354, "grad_norm": 45.99287919634268, "learning_rate": 5.537109717427022e-06, "loss": 0.1292572021484375, "step": 41445 }, { "epoch": 0.35840589359365677, "grad_norm": 4.8324105198873575, "learning_rate": 5.537000988662245e-06, "loss": 0.21908111572265626, "step": 41450 }, { "epoch": 0.35844912711520005, "grad_norm": 0.08895632959177702, "learning_rate": 5.536892248197044e-06, "loss": 0.3252998352050781, "step": 41455 }, { "epoch": 0.3584923606367433, "grad_norm": 14.285094113155308, "learning_rate": 5.53678349603192e-06, "loss": 0.5949188232421875, "step": 41460 }, { "epoch": 0.35853559415828656, "grad_norm": 50.09089890038171, "learning_rate": 5.536674732167374e-06, "loss": 0.42202606201171877, "step": 41465 }, { "epoch": 0.35857882767982985, "grad_norm": 42.02196380610391, "learning_rate": 5.5365659566039085e-06, "loss": 0.34867401123046876, "step": 41470 }, { "epoch": 0.3586220612013731, "grad_norm": 0.12077046629087079, "learning_rate": 5.536457169342025e-06, "loss": 0.09181365966796876, "step": 41475 }, { "epoch": 0.35866529472291636, "grad_norm": 0.13198019763705784, "learning_rate": 5.536348370382226e-06, "loss": 0.20578041076660156, "step": 41480 }, { "epoch": 0.35870852824445965, "grad_norm": 0.9644622709054006, "learning_rate": 5.536239559725011e-06, "loss": 0.16733551025390625, "step": 41485 }, { "epoch": 0.3587517617660029, "grad_norm": 57.22381503171543, "learning_rate": 5.536130737370885e-06, "loss": 0.17650527954101564, "step": 41490 }, { "epoch": 0.35879499528754616, "grad_norm": 2.9113602733134534, "learning_rate": 5.536021903320346e-06, "loss": 0.13355178833007814, "step": 41495 }, { "epoch": 0.3588382288090894, "grad_norm": 5.362202618963046, "learning_rate": 5.5359130575739e-06, "loss": 0.09982833862304688, "step": 41500 }, { "epoch": 0.3588814623306327, "grad_norm": 13.538558392698661, "learning_rate": 5.535804200132045e-06, "loss": 0.486083984375, "step": 41505 }, { "epoch": 0.35892469585217596, "grad_norm": 0.7552317334696886, "learning_rate": 5.535695330995287e-06, "loss": 0.12654571533203124, "step": 41510 }, { "epoch": 0.3589679293737192, "grad_norm": 0.4391575662372293, "learning_rate": 5.535586450164125e-06, "loss": 0.07757568359375, "step": 41515 }, { "epoch": 0.35901116289526247, "grad_norm": 3.418241198842024, "learning_rate": 5.535477557639063e-06, "loss": 0.2802490234375, "step": 41520 }, { "epoch": 0.35905439641680575, "grad_norm": 10.439618993217497, "learning_rate": 5.535368653420603e-06, "loss": 0.18344535827636718, "step": 41525 }, { "epoch": 0.359097629938349, "grad_norm": 5.640577984509456, "learning_rate": 5.535259737509246e-06, "loss": 0.14129638671875, "step": 41530 }, { "epoch": 0.35914086345989227, "grad_norm": 20.382919150850338, "learning_rate": 5.535150809905495e-06, "loss": 0.11278228759765625, "step": 41535 }, { "epoch": 0.35918409698143555, "grad_norm": 23.531383499532325, "learning_rate": 5.535041870609853e-06, "loss": 0.3121337890625, "step": 41540 }, { "epoch": 0.3592273305029788, "grad_norm": 6.500605013518276, "learning_rate": 5.5349329196228214e-06, "loss": 0.08450469970703126, "step": 41545 }, { "epoch": 0.35927056402452207, "grad_norm": 15.130300596555136, "learning_rate": 5.534823956944903e-06, "loss": 0.17981719970703125, "step": 41550 }, { "epoch": 0.3593137975460653, "grad_norm": 11.135566385638048, "learning_rate": 5.534714982576602e-06, "loss": 0.043556594848632814, "step": 41555 }, { "epoch": 0.3593570310676086, "grad_norm": 14.615955646798342, "learning_rate": 5.534605996518417e-06, "loss": 0.4085205078125, "step": 41560 }, { "epoch": 0.35940026458915186, "grad_norm": 16.85865292453554, "learning_rate": 5.534496998770855e-06, "loss": 0.34105682373046875, "step": 41565 }, { "epoch": 0.3594434981106951, "grad_norm": 3.600579643274234, "learning_rate": 5.534387989334416e-06, "loss": 0.0710113525390625, "step": 41570 }, { "epoch": 0.3594867316322384, "grad_norm": 0.07528754009207782, "learning_rate": 5.534278968209604e-06, "loss": 0.225762939453125, "step": 41575 }, { "epoch": 0.35952996515378166, "grad_norm": 14.306604887387032, "learning_rate": 5.5341699353969215e-06, "loss": 0.09381351470947266, "step": 41580 }, { "epoch": 0.3595731986753249, "grad_norm": 0.9014477041859457, "learning_rate": 5.534060890896871e-06, "loss": 0.20340576171875, "step": 41585 }, { "epoch": 0.3596164321968682, "grad_norm": 10.021689644244228, "learning_rate": 5.533951834709955e-06, "loss": 0.03030853271484375, "step": 41590 }, { "epoch": 0.3596596657184114, "grad_norm": 14.928983019484976, "learning_rate": 5.533842766836678e-06, "loss": 0.31781005859375, "step": 41595 }, { "epoch": 0.3597028992399547, "grad_norm": 0.42950543384204076, "learning_rate": 5.533733687277541e-06, "loss": 0.08034515380859375, "step": 41600 }, { "epoch": 0.35974613276149797, "grad_norm": 12.051853059016159, "learning_rate": 5.533624596033048e-06, "loss": 0.09052734375, "step": 41605 }, { "epoch": 0.3597893662830412, "grad_norm": 5.8799562551008115, "learning_rate": 5.533515493103704e-06, "loss": 0.058827972412109374, "step": 41610 }, { "epoch": 0.3598325998045845, "grad_norm": 7.567653339486701, "learning_rate": 5.533406378490009e-06, "loss": 0.11956863403320313, "step": 41615 }, { "epoch": 0.35987583332612777, "grad_norm": 1.7407625640056676, "learning_rate": 5.533297252192468e-06, "loss": 0.0759317398071289, "step": 41620 }, { "epoch": 0.359919066847671, "grad_norm": 0.07949652255130911, "learning_rate": 5.5331881142115834e-06, "loss": 0.18333015441894532, "step": 41625 }, { "epoch": 0.3599623003692143, "grad_norm": 9.789561580012132, "learning_rate": 5.53307896454786e-06, "loss": 0.07870330810546874, "step": 41630 }, { "epoch": 0.3600055338907575, "grad_norm": 3.807242596237677, "learning_rate": 5.532969803201799e-06, "loss": 0.118463134765625, "step": 41635 }, { "epoch": 0.3600487674123008, "grad_norm": 27.528161109900363, "learning_rate": 5.5328606301739066e-06, "loss": 0.19146728515625, "step": 41640 }, { "epoch": 0.3600920009338441, "grad_norm": 1.791015323552771, "learning_rate": 5.532751445464683e-06, "loss": 0.06951522827148438, "step": 41645 }, { "epoch": 0.3601352344553873, "grad_norm": 11.556947166831012, "learning_rate": 5.532642249074635e-06, "loss": 0.1183197021484375, "step": 41650 }, { "epoch": 0.3601784679769306, "grad_norm": 28.771640106385718, "learning_rate": 5.532533041004265e-06, "loss": 0.15925674438476561, "step": 41655 }, { "epoch": 0.3602217014984739, "grad_norm": 15.167948610863954, "learning_rate": 5.532423821254075e-06, "loss": 0.09896011352539062, "step": 41660 }, { "epoch": 0.3602649350200171, "grad_norm": 18.333730655132666, "learning_rate": 5.532314589824571e-06, "loss": 0.9062713623046875, "step": 41665 }, { "epoch": 0.3603081685415604, "grad_norm": 45.5287195756047, "learning_rate": 5.532205346716255e-06, "loss": 0.1003936767578125, "step": 41670 }, { "epoch": 0.3603514020631036, "grad_norm": 6.063329960455354, "learning_rate": 5.532096091929633e-06, "loss": 0.2446807861328125, "step": 41675 }, { "epoch": 0.3603946355846469, "grad_norm": 8.514773322397742, "learning_rate": 5.531986825465206e-06, "loss": 0.119580078125, "step": 41680 }, { "epoch": 0.3604378691061902, "grad_norm": 28.71689199323436, "learning_rate": 5.531877547323481e-06, "loss": 0.169647216796875, "step": 41685 }, { "epoch": 0.3604811026277334, "grad_norm": 0.9910275734776564, "learning_rate": 5.53176825750496e-06, "loss": 0.16895675659179688, "step": 41690 }, { "epoch": 0.3605243361492767, "grad_norm": 8.268034418682358, "learning_rate": 5.531658956010147e-06, "loss": 0.13833885192871093, "step": 41695 }, { "epoch": 0.36056756967082, "grad_norm": 0.9627972983849469, "learning_rate": 5.531549642839547e-06, "loss": 0.057100677490234376, "step": 41700 }, { "epoch": 0.3606108031923632, "grad_norm": 7.259165654064277, "learning_rate": 5.531440317993663e-06, "loss": 0.11079864501953125, "step": 41705 }, { "epoch": 0.3606540367139065, "grad_norm": 2.496710778122082, "learning_rate": 5.5313309814730005e-06, "loss": 0.30025405883789064, "step": 41710 }, { "epoch": 0.3606972702354498, "grad_norm": 3.288305484864664, "learning_rate": 5.531221633278064e-06, "loss": 0.0643798828125, "step": 41715 }, { "epoch": 0.360740503756993, "grad_norm": 0.3254019342623508, "learning_rate": 5.531112273409356e-06, "loss": 0.1712158203125, "step": 41720 }, { "epoch": 0.3607837372785363, "grad_norm": 4.677899762294543, "learning_rate": 5.531002901867382e-06, "loss": 0.11975269317626953, "step": 41725 }, { "epoch": 0.3608269708000795, "grad_norm": 1.9682225342685207, "learning_rate": 5.530893518652647e-06, "loss": 0.09810562133789062, "step": 41730 }, { "epoch": 0.3608702043216228, "grad_norm": 2.6505236980534046, "learning_rate": 5.530784123765654e-06, "loss": 0.148199462890625, "step": 41735 }, { "epoch": 0.3609134378431661, "grad_norm": 21.566561712690586, "learning_rate": 5.530674717206908e-06, "loss": 0.37196044921875, "step": 41740 }, { "epoch": 0.3609566713647093, "grad_norm": 42.00717671392255, "learning_rate": 5.530565298976915e-06, "loss": 0.27712554931640626, "step": 41745 }, { "epoch": 0.3609999048862526, "grad_norm": 25.879931426379965, "learning_rate": 5.530455869076178e-06, "loss": 0.355340576171875, "step": 41750 }, { "epoch": 0.3610431384077959, "grad_norm": 4.779280643625326, "learning_rate": 5.530346427505201e-06, "loss": 0.11787567138671876, "step": 41755 }, { "epoch": 0.3610863719293391, "grad_norm": 41.082865543226085, "learning_rate": 5.530236974264491e-06, "loss": 0.29431686401367185, "step": 41760 }, { "epoch": 0.3611296054508824, "grad_norm": 0.6153477735696843, "learning_rate": 5.5301275093545506e-06, "loss": 0.04887237548828125, "step": 41765 }, { "epoch": 0.36117283897242564, "grad_norm": 12.103141290451688, "learning_rate": 5.530018032775887e-06, "loss": 0.12297744750976562, "step": 41770 }, { "epoch": 0.3612160724939689, "grad_norm": 20.587777149272984, "learning_rate": 5.529908544529003e-06, "loss": 0.082586669921875, "step": 41775 }, { "epoch": 0.3612593060155122, "grad_norm": 1.439529923524378, "learning_rate": 5.529799044614404e-06, "loss": 0.2879364013671875, "step": 41780 }, { "epoch": 0.36130253953705543, "grad_norm": 3.2450254535052463, "learning_rate": 5.529689533032596e-06, "loss": 0.235552978515625, "step": 41785 }, { "epoch": 0.3613457730585987, "grad_norm": 36.0324573616481, "learning_rate": 5.529580009784083e-06, "loss": 0.37855377197265627, "step": 41790 }, { "epoch": 0.361389006580142, "grad_norm": 9.735230283642169, "learning_rate": 5.529470474869371e-06, "loss": 0.40445938110351565, "step": 41795 }, { "epoch": 0.36143224010168523, "grad_norm": 11.97848726062262, "learning_rate": 5.529360928288965e-06, "loss": 0.15746231079101564, "step": 41800 }, { "epoch": 0.3614754736232285, "grad_norm": 14.548042913830706, "learning_rate": 5.529251370043368e-06, "loss": 0.11293487548828125, "step": 41805 }, { "epoch": 0.36151870714477174, "grad_norm": 27.969601799144368, "learning_rate": 5.529141800133089e-06, "loss": 0.09772758483886719, "step": 41810 }, { "epoch": 0.36156194066631503, "grad_norm": 51.4988813516701, "learning_rate": 5.5290322185586315e-06, "loss": 0.5354354858398438, "step": 41815 }, { "epoch": 0.3616051741878583, "grad_norm": 25.048372435530474, "learning_rate": 5.5289226253205e-06, "loss": 0.36417388916015625, "step": 41820 }, { "epoch": 0.36164840770940154, "grad_norm": 1.239404912463008, "learning_rate": 5.528813020419201e-06, "loss": 0.3573486328125, "step": 41825 }, { "epoch": 0.3616916412309448, "grad_norm": 12.749080106948744, "learning_rate": 5.5287034038552405e-06, "loss": 0.20229949951171874, "step": 41830 }, { "epoch": 0.3617348747524881, "grad_norm": 7.771489972899884, "learning_rate": 5.5285937756291226e-06, "loss": 0.08028411865234375, "step": 41835 }, { "epoch": 0.36177810827403134, "grad_norm": 2.712025743540737, "learning_rate": 5.528484135741354e-06, "loss": 0.2381072998046875, "step": 41840 }, { "epoch": 0.3618213417955746, "grad_norm": 0.1349392359955097, "learning_rate": 5.52837448419244e-06, "loss": 0.17858505249023438, "step": 41845 }, { "epoch": 0.36186457531711785, "grad_norm": 71.72540314040786, "learning_rate": 5.528264820982886e-06, "loss": 0.867578125, "step": 41850 }, { "epoch": 0.36190780883866114, "grad_norm": 14.70339257006119, "learning_rate": 5.528155146113198e-06, "loss": 0.1785747528076172, "step": 41855 }, { "epoch": 0.3619510423602044, "grad_norm": 2.731053681147775, "learning_rate": 5.528045459583882e-06, "loss": 0.365985107421875, "step": 41860 }, { "epoch": 0.36199427588174765, "grad_norm": 5.201255116390585, "learning_rate": 5.527935761395444e-06, "loss": 0.07698974609375, "step": 41865 }, { "epoch": 0.36203750940329094, "grad_norm": 14.578994470971326, "learning_rate": 5.527826051548389e-06, "loss": 0.04752655029296875, "step": 41870 }, { "epoch": 0.3620807429248342, "grad_norm": 30.52897067144294, "learning_rate": 5.5277163300432234e-06, "loss": 0.14033737182617187, "step": 41875 }, { "epoch": 0.36212397644637745, "grad_norm": 3.1427676764033863, "learning_rate": 5.527606596880455e-06, "loss": 0.095050048828125, "step": 41880 }, { "epoch": 0.36216720996792073, "grad_norm": 7.551720132140465, "learning_rate": 5.5274968520605875e-06, "loss": 0.08194122314453126, "step": 41885 }, { "epoch": 0.362210443489464, "grad_norm": 38.286343610269974, "learning_rate": 5.527387095584127e-06, "loss": 0.2968658447265625, "step": 41890 }, { "epoch": 0.36225367701100725, "grad_norm": 1.8009055993038612, "learning_rate": 5.527277327451582e-06, "loss": 0.15225830078125, "step": 41895 }, { "epoch": 0.36229691053255053, "grad_norm": 17.344938793300496, "learning_rate": 5.527167547663456e-06, "loss": 0.20734405517578125, "step": 41900 }, { "epoch": 0.36234014405409376, "grad_norm": 2.5883766077118264, "learning_rate": 5.527057756220257e-06, "loss": 0.1270599365234375, "step": 41905 }, { "epoch": 0.36238337757563704, "grad_norm": 2.076798714911825, "learning_rate": 5.526947953122491e-06, "loss": 0.0453857421875, "step": 41910 }, { "epoch": 0.36242661109718033, "grad_norm": 13.387763005152665, "learning_rate": 5.526838138370664e-06, "loss": 0.21695098876953126, "step": 41915 }, { "epoch": 0.36246984461872356, "grad_norm": 0.6895226602598244, "learning_rate": 5.526728311965284e-06, "loss": 0.065374755859375, "step": 41920 }, { "epoch": 0.36251307814026684, "grad_norm": 19.680232998592828, "learning_rate": 5.526618473906856e-06, "loss": 0.29658050537109376, "step": 41925 }, { "epoch": 0.3625563116618101, "grad_norm": 6.402861047419217, "learning_rate": 5.526508624195885e-06, "loss": 0.08457565307617188, "step": 41930 }, { "epoch": 0.36259954518335336, "grad_norm": 33.79792067029085, "learning_rate": 5.526398762832881e-06, "loss": 0.4615509033203125, "step": 41935 }, { "epoch": 0.36264277870489664, "grad_norm": 3.245548695291569, "learning_rate": 5.526288889818349e-06, "loss": 0.3608154296875, "step": 41940 }, { "epoch": 0.36268601222643987, "grad_norm": 30.155216715279394, "learning_rate": 5.526179005152796e-06, "loss": 0.488519287109375, "step": 41945 }, { "epoch": 0.36272924574798315, "grad_norm": 1.0615509220408952, "learning_rate": 5.5260691088367285e-06, "loss": 0.13202590942382814, "step": 41950 }, { "epoch": 0.36277247926952644, "grad_norm": 10.324100362444526, "learning_rate": 5.525959200870654e-06, "loss": 0.5220947265625, "step": 41955 }, { "epoch": 0.36281571279106967, "grad_norm": 0.71508323044603, "learning_rate": 5.525849281255077e-06, "loss": 0.1891998291015625, "step": 41960 }, { "epoch": 0.36285894631261295, "grad_norm": 10.617047609605635, "learning_rate": 5.525739349990508e-06, "loss": 0.2508125305175781, "step": 41965 }, { "epoch": 0.36290217983415624, "grad_norm": 32.041273637250875, "learning_rate": 5.525629407077452e-06, "loss": 0.3125297546386719, "step": 41970 }, { "epoch": 0.36294541335569946, "grad_norm": 20.832714952153935, "learning_rate": 5.525519452516416e-06, "loss": 0.12406158447265625, "step": 41975 }, { "epoch": 0.36298864687724275, "grad_norm": 1.0910507396929194, "learning_rate": 5.525409486307908e-06, "loss": 0.3261253356933594, "step": 41980 }, { "epoch": 0.363031880398786, "grad_norm": 30.33854991552917, "learning_rate": 5.525299508452435e-06, "loss": 0.540203857421875, "step": 41985 }, { "epoch": 0.36307511392032926, "grad_norm": 39.343011193673, "learning_rate": 5.525189518950502e-06, "loss": 0.2104278564453125, "step": 41990 }, { "epoch": 0.36311834744187255, "grad_norm": 13.605568814766748, "learning_rate": 5.525079517802619e-06, "loss": 0.10609893798828125, "step": 41995 }, { "epoch": 0.3631615809634158, "grad_norm": 2.197421674554241, "learning_rate": 5.524969505009292e-06, "loss": 0.0492156982421875, "step": 42000 }, { "epoch": 0.36320481448495906, "grad_norm": 17.272908442214018, "learning_rate": 5.52485948057103e-06, "loss": 0.32801055908203125, "step": 42005 }, { "epoch": 0.36324804800650234, "grad_norm": 0.8371082959706072, "learning_rate": 5.524749444488338e-06, "loss": 0.09290924072265624, "step": 42010 }, { "epoch": 0.3632912815280456, "grad_norm": 6.937032209927106, "learning_rate": 5.5246393967617255e-06, "loss": 0.094427490234375, "step": 42015 }, { "epoch": 0.36333451504958886, "grad_norm": 3.100902241080226, "learning_rate": 5.524529337391699e-06, "loss": 0.097149658203125, "step": 42020 }, { "epoch": 0.3633777485711321, "grad_norm": 12.68883390878693, "learning_rate": 5.524419266378765e-06, "loss": 0.11279296875, "step": 42025 }, { "epoch": 0.36342098209267537, "grad_norm": 12.449782244629363, "learning_rate": 5.524309183723434e-06, "loss": 0.27582550048828125, "step": 42030 }, { "epoch": 0.36346421561421866, "grad_norm": 6.822580333831165, "learning_rate": 5.524199089426211e-06, "loss": 0.16044921875, "step": 42035 }, { "epoch": 0.3635074491357619, "grad_norm": 6.065052655768193, "learning_rate": 5.524088983487605e-06, "loss": 0.2588836669921875, "step": 42040 }, { "epoch": 0.36355068265730517, "grad_norm": 31.95914779692188, "learning_rate": 5.523978865908125e-06, "loss": 0.21272125244140624, "step": 42045 }, { "epoch": 0.36359391617884845, "grad_norm": 20.592880289995794, "learning_rate": 5.523868736688277e-06, "loss": 0.20057373046875, "step": 42050 }, { "epoch": 0.3636371497003917, "grad_norm": 12.736832458979013, "learning_rate": 5.523758595828568e-06, "loss": 0.10020523071289063, "step": 42055 }, { "epoch": 0.36368038322193497, "grad_norm": 26.13841101529014, "learning_rate": 5.523648443329508e-06, "loss": 0.1976654052734375, "step": 42060 }, { "epoch": 0.36372361674347825, "grad_norm": 30.169204115938676, "learning_rate": 5.523538279191604e-06, "loss": 0.40569610595703126, "step": 42065 }, { "epoch": 0.3637668502650215, "grad_norm": 1.439821622813904, "learning_rate": 5.523428103415366e-06, "loss": 0.02943000793457031, "step": 42070 }, { "epoch": 0.36381008378656476, "grad_norm": 2.535026730467665, "learning_rate": 5.5233179160013e-06, "loss": 0.1689117431640625, "step": 42075 }, { "epoch": 0.363853317308108, "grad_norm": 6.858271855089975, "learning_rate": 5.523207716949915e-06, "loss": 0.280810546875, "step": 42080 }, { "epoch": 0.3638965508296513, "grad_norm": 1.5128926144723118, "learning_rate": 5.523097506261718e-06, "loss": 0.336187744140625, "step": 42085 }, { "epoch": 0.36393978435119456, "grad_norm": 1.5832671047699043, "learning_rate": 5.522987283937218e-06, "loss": 0.43590240478515624, "step": 42090 }, { "epoch": 0.3639830178727378, "grad_norm": 3.0891151174286278, "learning_rate": 5.522877049976924e-06, "loss": 0.0574432373046875, "step": 42095 }, { "epoch": 0.3640262513942811, "grad_norm": 18.46685140193319, "learning_rate": 5.5227668043813446e-06, "loss": 0.2953214645385742, "step": 42100 }, { "epoch": 0.36406948491582436, "grad_norm": 4.107206362630515, "learning_rate": 5.522656547150987e-06, "loss": 0.1473480224609375, "step": 42105 }, { "epoch": 0.3641127184373676, "grad_norm": 40.39589125968738, "learning_rate": 5.52254627828636e-06, "loss": 0.4369842529296875, "step": 42110 }, { "epoch": 0.3641559519589109, "grad_norm": 11.537960928347902, "learning_rate": 5.522435997787973e-06, "loss": 0.12614021301269532, "step": 42115 }, { "epoch": 0.3641991854804541, "grad_norm": 42.93176896181281, "learning_rate": 5.5223257056563345e-06, "loss": 0.29909515380859375, "step": 42120 }, { "epoch": 0.3642424190019974, "grad_norm": 1.161978654398881, "learning_rate": 5.522215401891952e-06, "loss": 0.10465087890625, "step": 42125 }, { "epoch": 0.36428565252354067, "grad_norm": 12.189799782500735, "learning_rate": 5.522105086495335e-06, "loss": 0.10077056884765626, "step": 42130 }, { "epoch": 0.3643288860450839, "grad_norm": 26.507317018471724, "learning_rate": 5.521994759466992e-06, "loss": 0.33709716796875, "step": 42135 }, { "epoch": 0.3643721195666272, "grad_norm": 3.3787424916440307, "learning_rate": 5.521884420807432e-06, "loss": 0.1716094970703125, "step": 42140 }, { "epoch": 0.36441535308817047, "grad_norm": 7.888438351590131, "learning_rate": 5.521774070517164e-06, "loss": 0.11993408203125, "step": 42145 }, { "epoch": 0.3644585866097137, "grad_norm": 17.926123458433334, "learning_rate": 5.5216637085966975e-06, "loss": 0.548046875, "step": 42150 }, { "epoch": 0.364501820131257, "grad_norm": 3.6617276623178996, "learning_rate": 5.521553335046541e-06, "loss": 0.06541748046875, "step": 42155 }, { "epoch": 0.3645450536528002, "grad_norm": 7.940455096262474, "learning_rate": 5.5214429498672025e-06, "loss": 0.3062713623046875, "step": 42160 }, { "epoch": 0.3645882871743435, "grad_norm": 0.5118200495801896, "learning_rate": 5.521332553059192e-06, "loss": 0.026151275634765624, "step": 42165 }, { "epoch": 0.3646315206958868, "grad_norm": 11.821077079742869, "learning_rate": 5.521222144623019e-06, "loss": 0.160504150390625, "step": 42170 }, { "epoch": 0.36467475421743, "grad_norm": 4.6338071707400985, "learning_rate": 5.521111724559191e-06, "loss": 0.023028564453125, "step": 42175 }, { "epoch": 0.3647179877389733, "grad_norm": 1.8045869528911493, "learning_rate": 5.521001292868219e-06, "loss": 0.14857978820800782, "step": 42180 }, { "epoch": 0.3647612212605166, "grad_norm": 25.714152705166402, "learning_rate": 5.520890849550613e-06, "loss": 0.10284423828125, "step": 42185 }, { "epoch": 0.3648044547820598, "grad_norm": 17.331582994935424, "learning_rate": 5.52078039460688e-06, "loss": 0.16806507110595703, "step": 42190 }, { "epoch": 0.3648476883036031, "grad_norm": 23.301907827945023, "learning_rate": 5.520669928037531e-06, "loss": 0.21432247161865234, "step": 42195 }, { "epoch": 0.3648909218251463, "grad_norm": 11.892451972909093, "learning_rate": 5.520559449843075e-06, "loss": 0.2319580078125, "step": 42200 }, { "epoch": 0.3649341553466896, "grad_norm": 14.127521452782398, "learning_rate": 5.520448960024021e-06, "loss": 0.16195297241210938, "step": 42205 }, { "epoch": 0.3649773888682329, "grad_norm": 3.7159516954471212, "learning_rate": 5.52033845858088e-06, "loss": 0.070703125, "step": 42210 }, { "epoch": 0.3650206223897761, "grad_norm": 3.1585096148228042, "learning_rate": 5.52022794551416e-06, "loss": 0.2575859069824219, "step": 42215 }, { "epoch": 0.3650638559113194, "grad_norm": 3.173725222899546, "learning_rate": 5.520117420824371e-06, "loss": 0.04730224609375, "step": 42220 }, { "epoch": 0.3651070894328627, "grad_norm": 1.4013596523584242, "learning_rate": 5.520006884512024e-06, "loss": 0.250408935546875, "step": 42225 }, { "epoch": 0.3651503229544059, "grad_norm": 11.971738812059689, "learning_rate": 5.519896336577628e-06, "loss": 0.19522705078125, "step": 42230 }, { "epoch": 0.3651935564759492, "grad_norm": 4.891613810613451, "learning_rate": 5.519785777021693e-06, "loss": 0.23323326110839843, "step": 42235 }, { "epoch": 0.36523678999749243, "grad_norm": 13.26318774623621, "learning_rate": 5.519675205844728e-06, "loss": 0.3106487274169922, "step": 42240 }, { "epoch": 0.3652800235190357, "grad_norm": 16.00919196773932, "learning_rate": 5.519564623047243e-06, "loss": 0.285552978515625, "step": 42245 }, { "epoch": 0.365323257040579, "grad_norm": 24.61818984246488, "learning_rate": 5.51945402862975e-06, "loss": 0.27358551025390626, "step": 42250 }, { "epoch": 0.3653664905621222, "grad_norm": 6.428748518002303, "learning_rate": 5.519343422592756e-06, "loss": 0.0748260498046875, "step": 42255 }, { "epoch": 0.3654097240836655, "grad_norm": 1.3621418617444918, "learning_rate": 5.519232804936775e-06, "loss": 0.09124755859375, "step": 42260 }, { "epoch": 0.3654529576052088, "grad_norm": 16.474930062322894, "learning_rate": 5.519122175662313e-06, "loss": 0.09266128540039062, "step": 42265 }, { "epoch": 0.365496191126752, "grad_norm": 1.0524847699057445, "learning_rate": 5.519011534769883e-06, "loss": 0.42860107421875, "step": 42270 }, { "epoch": 0.3655394246482953, "grad_norm": 0.10060426563438833, "learning_rate": 5.518900882259994e-06, "loss": 0.1226308822631836, "step": 42275 }, { "epoch": 0.3655826581698386, "grad_norm": 3.076429145206404, "learning_rate": 5.518790218133156e-06, "loss": 0.0686309814453125, "step": 42280 }, { "epoch": 0.3656258916913818, "grad_norm": 4.485216489394292, "learning_rate": 5.5186795423898815e-06, "loss": 0.2796657562255859, "step": 42285 }, { "epoch": 0.3656691252129251, "grad_norm": 3.9247220466357566, "learning_rate": 5.518568855030679e-06, "loss": 0.1945556640625, "step": 42290 }, { "epoch": 0.36571235873446833, "grad_norm": 18.155052794168537, "learning_rate": 5.51845815605606e-06, "loss": 0.11480960845947266, "step": 42295 }, { "epoch": 0.3657555922560116, "grad_norm": 2.794758894886772, "learning_rate": 5.518347445466533e-06, "loss": 0.371063232421875, "step": 42300 }, { "epoch": 0.3657988257775549, "grad_norm": 8.606355407182189, "learning_rate": 5.518236723262611e-06, "loss": 0.1205657958984375, "step": 42305 }, { "epoch": 0.36584205929909813, "grad_norm": 10.678982687076143, "learning_rate": 5.518125989444804e-06, "loss": 0.06268310546875, "step": 42310 }, { "epoch": 0.3658852928206414, "grad_norm": 16.163354022561208, "learning_rate": 5.518015244013622e-06, "loss": 0.17406845092773438, "step": 42315 }, { "epoch": 0.3659285263421847, "grad_norm": 2.3543275041518963, "learning_rate": 5.517904486969576e-06, "loss": 0.10579833984375, "step": 42320 }, { "epoch": 0.36597175986372793, "grad_norm": 0.28142095006226514, "learning_rate": 5.517793718313179e-06, "loss": 0.24773712158203126, "step": 42325 }, { "epoch": 0.3660149933852712, "grad_norm": 3.0665355030441983, "learning_rate": 5.517682938044937e-06, "loss": 0.380242919921875, "step": 42330 }, { "epoch": 0.36605822690681444, "grad_norm": 35.409409689806935, "learning_rate": 5.517572146165365e-06, "loss": 0.2594482421875, "step": 42335 }, { "epoch": 0.36610146042835773, "grad_norm": 15.304236993981144, "learning_rate": 5.517461342674973e-06, "loss": 0.20280914306640624, "step": 42340 }, { "epoch": 0.366144693949901, "grad_norm": 7.052307973689256, "learning_rate": 5.517350527574271e-06, "loss": 0.209820556640625, "step": 42345 }, { "epoch": 0.36618792747144424, "grad_norm": 7.202138686305275, "learning_rate": 5.517239700863772e-06, "loss": 0.03815460205078125, "step": 42350 }, { "epoch": 0.3662311609929875, "grad_norm": 8.561901367419358, "learning_rate": 5.517128862543985e-06, "loss": 0.11427726745605468, "step": 42355 }, { "epoch": 0.3662743945145308, "grad_norm": 2.4491509324746943, "learning_rate": 5.517018012615422e-06, "loss": 0.2841316223144531, "step": 42360 }, { "epoch": 0.36631762803607404, "grad_norm": 3.3005727494378982, "learning_rate": 5.516907151078595e-06, "loss": 0.2113311767578125, "step": 42365 }, { "epoch": 0.3663608615576173, "grad_norm": 27.577618832439374, "learning_rate": 5.516796277934014e-06, "loss": 0.2198577880859375, "step": 42370 }, { "epoch": 0.36640409507916055, "grad_norm": 0.5114844174550257, "learning_rate": 5.516685393182191e-06, "loss": 0.14366455078125, "step": 42375 }, { "epoch": 0.36644732860070384, "grad_norm": 18.671631742052995, "learning_rate": 5.5165744968236375e-06, "loss": 0.2626064300537109, "step": 42380 }, { "epoch": 0.3664905621222471, "grad_norm": 19.041722250289553, "learning_rate": 5.516463588858865e-06, "loss": 0.2126077175140381, "step": 42385 }, { "epoch": 0.36653379564379035, "grad_norm": 31.36351324290025, "learning_rate": 5.516352669288385e-06, "loss": 0.244482421875, "step": 42390 }, { "epoch": 0.36657702916533363, "grad_norm": 16.29214583490606, "learning_rate": 5.516241738112708e-06, "loss": 0.3953987121582031, "step": 42395 }, { "epoch": 0.3666202626868769, "grad_norm": 20.459518112370773, "learning_rate": 5.5161307953323465e-06, "loss": 0.1188232421875, "step": 42400 }, { "epoch": 0.36666349620842015, "grad_norm": 15.456331097842742, "learning_rate": 5.516019840947813e-06, "loss": 0.3129791259765625, "step": 42405 }, { "epoch": 0.36670672972996343, "grad_norm": 15.691565994144984, "learning_rate": 5.515908874959617e-06, "loss": 0.380950927734375, "step": 42410 }, { "epoch": 0.36674996325150666, "grad_norm": 3.6382023206296332, "learning_rate": 5.515797897368273e-06, "loss": 0.5365966796875, "step": 42415 }, { "epoch": 0.36679319677304995, "grad_norm": 1.9540798222510716, "learning_rate": 5.5156869081742905e-06, "loss": 0.06018638610839844, "step": 42420 }, { "epoch": 0.36683643029459323, "grad_norm": 25.33543295714028, "learning_rate": 5.5155759073781826e-06, "loss": 0.20211143493652345, "step": 42425 }, { "epoch": 0.36687966381613646, "grad_norm": 9.32618614008981, "learning_rate": 5.515464894980461e-06, "loss": 0.15268630981445314, "step": 42430 }, { "epoch": 0.36692289733767974, "grad_norm": 7.140017077252273, "learning_rate": 5.515353870981638e-06, "loss": 0.3093994140625, "step": 42435 }, { "epoch": 0.366966130859223, "grad_norm": 9.456980376516748, "learning_rate": 5.515242835382224e-06, "loss": 0.067724609375, "step": 42440 }, { "epoch": 0.36700936438076626, "grad_norm": 35.09821747185279, "learning_rate": 5.515131788182733e-06, "loss": 0.103924560546875, "step": 42445 }, { "epoch": 0.36705259790230954, "grad_norm": 6.488618771507872, "learning_rate": 5.515020729383676e-06, "loss": 0.24036865234375, "step": 42450 }, { "epoch": 0.3670958314238528, "grad_norm": 5.343467668924222, "learning_rate": 5.514909658985566e-06, "loss": 0.0769500732421875, "step": 42455 }, { "epoch": 0.36713906494539605, "grad_norm": 4.237647508624633, "learning_rate": 5.514798576988915e-06, "loss": 0.0657318115234375, "step": 42460 }, { "epoch": 0.36718229846693934, "grad_norm": 7.939094582587382, "learning_rate": 5.514687483394235e-06, "loss": 0.22223968505859376, "step": 42465 }, { "epoch": 0.36722553198848257, "grad_norm": 1.0323418381910423, "learning_rate": 5.514576378202039e-06, "loss": 0.042718505859375, "step": 42470 }, { "epoch": 0.36726876551002585, "grad_norm": 2.77532271888152, "learning_rate": 5.514465261412838e-06, "loss": 0.32865180969238283, "step": 42475 }, { "epoch": 0.36731199903156914, "grad_norm": 18.41944054718262, "learning_rate": 5.514354133027146e-06, "loss": 0.0994873046875, "step": 42480 }, { "epoch": 0.36735523255311237, "grad_norm": 17.026563720854238, "learning_rate": 5.514242993045475e-06, "loss": 0.21943283081054688, "step": 42485 }, { "epoch": 0.36739846607465565, "grad_norm": 23.722300871446798, "learning_rate": 5.514131841468337e-06, "loss": 0.31326904296875, "step": 42490 }, { "epoch": 0.36744169959619893, "grad_norm": 3.5115743239963275, "learning_rate": 5.514020678296245e-06, "loss": 0.146160888671875, "step": 42495 }, { "epoch": 0.36748493311774216, "grad_norm": 7.7055781119324624, "learning_rate": 5.5139095035297125e-06, "loss": 0.12667999267578126, "step": 42500 }, { "epoch": 0.36752816663928545, "grad_norm": 46.29255185684108, "learning_rate": 5.513798317169251e-06, "loss": 0.539862060546875, "step": 42505 }, { "epoch": 0.3675714001608287, "grad_norm": 31.064442538662345, "learning_rate": 5.513687119215374e-06, "loss": 0.1491180419921875, "step": 42510 }, { "epoch": 0.36761463368237196, "grad_norm": 0.6777679817361393, "learning_rate": 5.513575909668594e-06, "loss": 0.19002532958984375, "step": 42515 }, { "epoch": 0.36765786720391525, "grad_norm": 21.088877229145485, "learning_rate": 5.513464688529424e-06, "loss": 0.525103759765625, "step": 42520 }, { "epoch": 0.3677011007254585, "grad_norm": 0.5673260172283205, "learning_rate": 5.513353455798377e-06, "loss": 0.16951370239257812, "step": 42525 }, { "epoch": 0.36774433424700176, "grad_norm": 1.6438527124371585, "learning_rate": 5.5132422114759655e-06, "loss": 0.2506126403808594, "step": 42530 }, { "epoch": 0.36778756776854504, "grad_norm": 16.830783261234334, "learning_rate": 5.513130955562704e-06, "loss": 0.3315155029296875, "step": 42535 }, { "epoch": 0.36783080129008827, "grad_norm": 1.5091968248706555, "learning_rate": 5.513019688059103e-06, "loss": 0.023089599609375, "step": 42540 }, { "epoch": 0.36787403481163156, "grad_norm": 7.530773371997278, "learning_rate": 5.51290840896568e-06, "loss": 0.130096435546875, "step": 42545 }, { "epoch": 0.3679172683331748, "grad_norm": 30.57628886133942, "learning_rate": 5.512797118282944e-06, "loss": 0.28295745849609377, "step": 42550 }, { "epoch": 0.36796050185471807, "grad_norm": 5.180549109537284, "learning_rate": 5.512685816011408e-06, "loss": 0.24593887329101563, "step": 42555 }, { "epoch": 0.36800373537626135, "grad_norm": 1.8987299458888696, "learning_rate": 5.512574502151589e-06, "loss": 0.1506622314453125, "step": 42560 }, { "epoch": 0.3680469688978046, "grad_norm": 4.286502101036086, "learning_rate": 5.512463176703998e-06, "loss": 0.252703857421875, "step": 42565 }, { "epoch": 0.36809020241934787, "grad_norm": 3.9974975817363227, "learning_rate": 5.512351839669149e-06, "loss": 0.0978057861328125, "step": 42570 }, { "epoch": 0.36813343594089115, "grad_norm": 20.557188252281005, "learning_rate": 5.512240491047555e-06, "loss": 0.3617584228515625, "step": 42575 }, { "epoch": 0.3681766694624344, "grad_norm": 0.39004984052059316, "learning_rate": 5.5121291308397295e-06, "loss": 0.10633392333984375, "step": 42580 }, { "epoch": 0.36821990298397766, "grad_norm": 10.847683385901185, "learning_rate": 5.512017759046187e-06, "loss": 0.058916473388671876, "step": 42585 }, { "epoch": 0.3682631365055209, "grad_norm": 62.931724935141006, "learning_rate": 5.51190637566744e-06, "loss": 0.20839614868164064, "step": 42590 }, { "epoch": 0.3683063700270642, "grad_norm": 3.6482904028274334, "learning_rate": 5.511794980704003e-06, "loss": 0.4329536437988281, "step": 42595 }, { "epoch": 0.36834960354860746, "grad_norm": 19.923428569854277, "learning_rate": 5.51168357415639e-06, "loss": 0.0943267822265625, "step": 42600 }, { "epoch": 0.3683928370701507, "grad_norm": 8.936057721995676, "learning_rate": 5.511572156025115e-06, "loss": 0.38150634765625, "step": 42605 }, { "epoch": 0.368436070591694, "grad_norm": 7.752398380060742, "learning_rate": 5.511460726310689e-06, "loss": 0.221044921875, "step": 42610 }, { "epoch": 0.36847930411323726, "grad_norm": 0.16186770025491695, "learning_rate": 5.511349285013629e-06, "loss": 0.0433685302734375, "step": 42615 }, { "epoch": 0.3685225376347805, "grad_norm": 1.719535830099673, "learning_rate": 5.5112378321344486e-06, "loss": 0.381781005859375, "step": 42620 }, { "epoch": 0.3685657711563238, "grad_norm": 1.0022744104483807, "learning_rate": 5.511126367673661e-06, "loss": 0.1627471923828125, "step": 42625 }, { "epoch": 0.36860900467786706, "grad_norm": 7.4345445979155285, "learning_rate": 5.51101489163178e-06, "loss": 0.1537017822265625, "step": 42630 }, { "epoch": 0.3686522381994103, "grad_norm": 4.733710243527496, "learning_rate": 5.51090340400932e-06, "loss": 0.07135162353515626, "step": 42635 }, { "epoch": 0.36869547172095357, "grad_norm": 17.92961859729797, "learning_rate": 5.510791904806796e-06, "loss": 0.5755752563476563, "step": 42640 }, { "epoch": 0.3687387052424968, "grad_norm": 5.850895623933088, "learning_rate": 5.510680394024722e-06, "loss": 0.4106756210327148, "step": 42645 }, { "epoch": 0.3687819387640401, "grad_norm": 17.40049780924936, "learning_rate": 5.510568871663611e-06, "loss": 0.35730514526367185, "step": 42650 }, { "epoch": 0.36882517228558337, "grad_norm": 3.00857996486137, "learning_rate": 5.510457337723977e-06, "loss": 0.080181884765625, "step": 42655 }, { "epoch": 0.3688684058071266, "grad_norm": 12.356593454131726, "learning_rate": 5.510345792206338e-06, "loss": 0.05477294921875, "step": 42660 }, { "epoch": 0.3689116393286699, "grad_norm": 0.3622719651443987, "learning_rate": 5.510234235111204e-06, "loss": 0.08792266845703126, "step": 42665 }, { "epoch": 0.36895487285021317, "grad_norm": 8.13953097408313, "learning_rate": 5.510122666439094e-06, "loss": 0.06943511962890625, "step": 42670 }, { "epoch": 0.3689981063717564, "grad_norm": 1.5860283788683331, "learning_rate": 5.510011086190518e-06, "loss": 0.0879486083984375, "step": 42675 }, { "epoch": 0.3690413398932997, "grad_norm": 18.163526857571856, "learning_rate": 5.509899494365993e-06, "loss": 0.09272537231445313, "step": 42680 }, { "epoch": 0.3690845734148429, "grad_norm": 27.0105590212827, "learning_rate": 5.509787890966034e-06, "loss": 0.2232666015625, "step": 42685 }, { "epoch": 0.3691278069363862, "grad_norm": 1.0047654610494106, "learning_rate": 5.509676275991155e-06, "loss": 0.08752593994140626, "step": 42690 }, { "epoch": 0.3691710404579295, "grad_norm": 12.688766187896967, "learning_rate": 5.50956464944187e-06, "loss": 0.06307373046875, "step": 42695 }, { "epoch": 0.3692142739794727, "grad_norm": 24.340231879759678, "learning_rate": 5.509453011318695e-06, "loss": 0.133123779296875, "step": 42700 }, { "epoch": 0.369257507501016, "grad_norm": 12.491574798445138, "learning_rate": 5.509341361622145e-06, "loss": 0.24963226318359374, "step": 42705 }, { "epoch": 0.3693007410225593, "grad_norm": 73.31112085346393, "learning_rate": 5.509229700352733e-06, "loss": 0.36886138916015626, "step": 42710 }, { "epoch": 0.3693439745441025, "grad_norm": 2.3315436148193234, "learning_rate": 5.509118027510977e-06, "loss": 0.0468780517578125, "step": 42715 }, { "epoch": 0.3693872080656458, "grad_norm": 1.847776253144992, "learning_rate": 5.509006343097389e-06, "loss": 0.183038330078125, "step": 42720 }, { "epoch": 0.369430441587189, "grad_norm": 61.672550123924275, "learning_rate": 5.508894647112486e-06, "loss": 0.299200439453125, "step": 42725 }, { "epoch": 0.3694736751087323, "grad_norm": 30.46074600805107, "learning_rate": 5.5087829395567824e-06, "loss": 0.273486328125, "step": 42730 }, { "epoch": 0.3695169086302756, "grad_norm": 2.0343361181071415, "learning_rate": 5.508671220430794e-06, "loss": 0.1690277099609375, "step": 42735 }, { "epoch": 0.3695601421518188, "grad_norm": 5.694897639484896, "learning_rate": 5.508559489735035e-06, "loss": 0.2733649253845215, "step": 42740 }, { "epoch": 0.3696033756733621, "grad_norm": 34.1035516942765, "learning_rate": 5.508447747470021e-06, "loss": 0.163897705078125, "step": 42745 }, { "epoch": 0.3696466091949054, "grad_norm": 29.420107740428552, "learning_rate": 5.508335993636268e-06, "loss": 0.07233982086181641, "step": 42750 }, { "epoch": 0.3696898427164486, "grad_norm": 2.987612555187108, "learning_rate": 5.508224228234292e-06, "loss": 0.1335742950439453, "step": 42755 }, { "epoch": 0.3697330762379919, "grad_norm": 0.7748746619486995, "learning_rate": 5.5081124512646065e-06, "loss": 0.1852294921875, "step": 42760 }, { "epoch": 0.3697763097595351, "grad_norm": 5.456157689928639, "learning_rate": 5.508000662727728e-06, "loss": 0.082275390625, "step": 42765 }, { "epoch": 0.3698195432810784, "grad_norm": 5.539518422797297, "learning_rate": 5.5078888626241725e-06, "loss": 0.09237632751464844, "step": 42770 }, { "epoch": 0.3698627768026217, "grad_norm": 3.0968086427055037, "learning_rate": 5.507777050954455e-06, "loss": 0.2410888671875, "step": 42775 }, { "epoch": 0.3699060103241649, "grad_norm": 16.212731484022594, "learning_rate": 5.507665227719091e-06, "loss": 0.07888946533203126, "step": 42780 }, { "epoch": 0.3699492438457082, "grad_norm": 10.34509732563279, "learning_rate": 5.507553392918598e-06, "loss": 0.275048828125, "step": 42785 }, { "epoch": 0.3699924773672515, "grad_norm": 3.291816090355942, "learning_rate": 5.507441546553488e-06, "loss": 0.10812721252441407, "step": 42790 }, { "epoch": 0.3700357108887947, "grad_norm": 1.3442139113458822, "learning_rate": 5.507329688624281e-06, "loss": 0.09119415283203125, "step": 42795 }, { "epoch": 0.370078944410338, "grad_norm": 20.736409863761548, "learning_rate": 5.50721781913149e-06, "loss": 0.05887908935546875, "step": 42800 }, { "epoch": 0.3701221779318813, "grad_norm": 10.47263924549922, "learning_rate": 5.507105938075633e-06, "loss": 0.355645751953125, "step": 42805 }, { "epoch": 0.3701654114534245, "grad_norm": 6.983293196437513, "learning_rate": 5.506994045457224e-06, "loss": 0.20819091796875, "step": 42810 }, { "epoch": 0.3702086449749678, "grad_norm": 0.7212224448239557, "learning_rate": 5.50688214127678e-06, "loss": 0.06326141357421874, "step": 42815 }, { "epoch": 0.37025187849651103, "grad_norm": 67.55263085257064, "learning_rate": 5.506770225534817e-06, "loss": 0.5121414184570312, "step": 42820 }, { "epoch": 0.3702951120180543, "grad_norm": 1.325446241825966, "learning_rate": 5.506658298231851e-06, "loss": 0.30469970703125, "step": 42825 }, { "epoch": 0.3703383455395976, "grad_norm": 6.7451644152558226, "learning_rate": 5.506546359368399e-06, "loss": 0.5371192932128906, "step": 42830 }, { "epoch": 0.37038157906114083, "grad_norm": 0.3241788494593442, "learning_rate": 5.506434408944977e-06, "loss": 0.09964447021484375, "step": 42835 }, { "epoch": 0.3704248125826841, "grad_norm": 19.745118383071897, "learning_rate": 5.5063224469621e-06, "loss": 0.09878997802734375, "step": 42840 }, { "epoch": 0.3704680461042274, "grad_norm": 35.68794898834445, "learning_rate": 5.506210473420287e-06, "loss": 0.1599639892578125, "step": 42845 }, { "epoch": 0.37051127962577063, "grad_norm": 8.873828250881644, "learning_rate": 5.5060984883200515e-06, "loss": 0.1035888671875, "step": 42850 }, { "epoch": 0.3705545131473139, "grad_norm": 34.12205452645336, "learning_rate": 5.505986491661911e-06, "loss": 0.340167236328125, "step": 42855 }, { "epoch": 0.37059774666885714, "grad_norm": 4.292901924457606, "learning_rate": 5.505874483446383e-06, "loss": 0.14662399291992187, "step": 42860 }, { "epoch": 0.3706409801904004, "grad_norm": 34.455277885375864, "learning_rate": 5.505762463673983e-06, "loss": 0.392327880859375, "step": 42865 }, { "epoch": 0.3706842137119437, "grad_norm": 2.676959600354356, "learning_rate": 5.505650432345228e-06, "loss": 0.03364715576171875, "step": 42870 }, { "epoch": 0.37072744723348694, "grad_norm": 34.8179529255898, "learning_rate": 5.505538389460634e-06, "loss": 0.27325439453125, "step": 42875 }, { "epoch": 0.3707706807550302, "grad_norm": 1.692251884789156, "learning_rate": 5.505426335020719e-06, "loss": 0.25717887878417967, "step": 42880 }, { "epoch": 0.3708139142765735, "grad_norm": 2.6328430679362627, "learning_rate": 5.5053142690259984e-06, "loss": 0.1792360305786133, "step": 42885 }, { "epoch": 0.37085714779811674, "grad_norm": 5.107652192196084, "learning_rate": 5.505202191476991e-06, "loss": 0.16331024169921876, "step": 42890 }, { "epoch": 0.37090038131966, "grad_norm": 5.082596282141805, "learning_rate": 5.5050901023742114e-06, "loss": 0.11247272491455078, "step": 42895 }, { "epoch": 0.37094361484120325, "grad_norm": 45.78114125377722, "learning_rate": 5.504978001718179e-06, "loss": 0.30823593139648436, "step": 42900 }, { "epoch": 0.37098684836274654, "grad_norm": 5.389861075285743, "learning_rate": 5.504865889509407e-06, "loss": 0.1114898681640625, "step": 42905 }, { "epoch": 0.3710300818842898, "grad_norm": 5.7518146476800345, "learning_rate": 5.504753765748417e-06, "loss": 0.1703765869140625, "step": 42910 }, { "epoch": 0.37107331540583305, "grad_norm": 28.013086123532453, "learning_rate": 5.5046416304357224e-06, "loss": 0.115802001953125, "step": 42915 }, { "epoch": 0.37111654892737633, "grad_norm": 1.0150819187498996, "learning_rate": 5.504529483571844e-06, "loss": 0.062007904052734375, "step": 42920 }, { "epoch": 0.3711597824489196, "grad_norm": 37.224559823649244, "learning_rate": 5.504417325157294e-06, "loss": 0.2444122314453125, "step": 42925 }, { "epoch": 0.37120301597046285, "grad_norm": 16.31390646369234, "learning_rate": 5.504305155192595e-06, "loss": 0.0408538818359375, "step": 42930 }, { "epoch": 0.37124624949200613, "grad_norm": 43.215099164514214, "learning_rate": 5.504192973678261e-06, "loss": 0.304327392578125, "step": 42935 }, { "epoch": 0.37128948301354936, "grad_norm": 0.4917083237865728, "learning_rate": 5.504080780614811e-06, "loss": 0.03588180541992188, "step": 42940 }, { "epoch": 0.37133271653509264, "grad_norm": 0.5522241752345105, "learning_rate": 5.50396857600276e-06, "loss": 0.22119674682617188, "step": 42945 }, { "epoch": 0.37137595005663593, "grad_norm": 4.4219165451191715, "learning_rate": 5.503856359842628e-06, "loss": 0.055096435546875, "step": 42950 }, { "epoch": 0.37141918357817916, "grad_norm": 6.539045029830479, "learning_rate": 5.503744132134932e-06, "loss": 0.13789520263671876, "step": 42955 }, { "epoch": 0.37146241709972244, "grad_norm": 10.394238716273948, "learning_rate": 5.50363189288019e-06, "loss": 0.11530303955078125, "step": 42960 }, { "epoch": 0.3715056506212657, "grad_norm": 7.464916553345798, "learning_rate": 5.503519642078918e-06, "loss": 0.43016891479492186, "step": 42965 }, { "epoch": 0.37154888414280896, "grad_norm": 1.3675510605938024, "learning_rate": 5.503407379731635e-06, "loss": 0.17077865600585937, "step": 42970 }, { "epoch": 0.37159211766435224, "grad_norm": 8.023304679458148, "learning_rate": 5.503295105838858e-06, "loss": 0.06818580627441406, "step": 42975 }, { "epoch": 0.37163535118589547, "grad_norm": 6.759676384479623, "learning_rate": 5.503182820401106e-06, "loss": 0.094927978515625, "step": 42980 }, { "epoch": 0.37167858470743875, "grad_norm": 31.547523384226935, "learning_rate": 5.503070523418895e-06, "loss": 0.295703125, "step": 42985 }, { "epoch": 0.37172181822898204, "grad_norm": 4.751787185556274, "learning_rate": 5.502958214892744e-06, "loss": 0.1470489501953125, "step": 42990 }, { "epoch": 0.37176505175052527, "grad_norm": 6.593053413868408, "learning_rate": 5.502845894823171e-06, "loss": 0.27083663940429686, "step": 42995 }, { "epoch": 0.37180828527206855, "grad_norm": 22.391292091984077, "learning_rate": 5.502733563210694e-06, "loss": 0.10113067626953125, "step": 43000 }, { "epoch": 0.37185151879361183, "grad_norm": 32.29562332675997, "learning_rate": 5.502621220055831e-06, "loss": 0.5207443237304688, "step": 43005 }, { "epoch": 0.37189475231515506, "grad_norm": 2.3684169214295148, "learning_rate": 5.5025088653591e-06, "loss": 0.09587554931640625, "step": 43010 }, { "epoch": 0.37193798583669835, "grad_norm": 0.45428650889155114, "learning_rate": 5.50239649912102e-06, "loss": 0.08645477294921874, "step": 43015 }, { "epoch": 0.37198121935824163, "grad_norm": 0.40103643399284217, "learning_rate": 5.502284121342107e-06, "loss": 0.22245407104492188, "step": 43020 }, { "epoch": 0.37202445287978486, "grad_norm": 16.053954959215567, "learning_rate": 5.5021717320228826e-06, "loss": 0.09709930419921875, "step": 43025 }, { "epoch": 0.37206768640132815, "grad_norm": 0.3896132005078445, "learning_rate": 5.502059331163862e-06, "loss": 0.123419189453125, "step": 43030 }, { "epoch": 0.3721109199228714, "grad_norm": 17.570873286577623, "learning_rate": 5.501946918765564e-06, "loss": 0.408612060546875, "step": 43035 }, { "epoch": 0.37215415344441466, "grad_norm": 39.50310038984484, "learning_rate": 5.50183449482851e-06, "loss": 0.19945297241210938, "step": 43040 }, { "epoch": 0.37219738696595794, "grad_norm": 5.093263807122418, "learning_rate": 5.501722059353215e-06, "loss": 0.043485260009765624, "step": 43045 }, { "epoch": 0.3722406204875012, "grad_norm": 2.5779787062474764, "learning_rate": 5.501609612340199e-06, "loss": 0.05144500732421875, "step": 43050 }, { "epoch": 0.37228385400904446, "grad_norm": 33.222404293950405, "learning_rate": 5.501497153789982e-06, "loss": 0.09705581665039062, "step": 43055 }, { "epoch": 0.37232708753058774, "grad_norm": 43.709922365650705, "learning_rate": 5.501384683703079e-06, "loss": 0.5199966430664062, "step": 43060 }, { "epoch": 0.37237032105213097, "grad_norm": 1.1765999560951976, "learning_rate": 5.5012722020800115e-06, "loss": 0.17896041870117188, "step": 43065 }, { "epoch": 0.37241355457367425, "grad_norm": 22.547079557137128, "learning_rate": 5.501159708921297e-06, "loss": 0.5588088989257812, "step": 43070 }, { "epoch": 0.3724567880952175, "grad_norm": 0.33241675444669494, "learning_rate": 5.501047204227456e-06, "loss": 0.1129180908203125, "step": 43075 }, { "epoch": 0.37250002161676077, "grad_norm": 4.573991564391443, "learning_rate": 5.500934687999007e-06, "loss": 0.13103504180908204, "step": 43080 }, { "epoch": 0.37254325513830405, "grad_norm": 21.324929712896292, "learning_rate": 5.500822160236468e-06, "loss": 0.1996063232421875, "step": 43085 }, { "epoch": 0.3725864886598473, "grad_norm": 9.20510483596853, "learning_rate": 5.500709620940357e-06, "loss": 0.05034942626953125, "step": 43090 }, { "epoch": 0.37262972218139057, "grad_norm": 36.7500139659845, "learning_rate": 5.500597070111194e-06, "loss": 0.13835601806640624, "step": 43095 }, { "epoch": 0.37267295570293385, "grad_norm": 4.393947485904721, "learning_rate": 5.5004845077494996e-06, "loss": 0.2413116455078125, "step": 43100 }, { "epoch": 0.3727161892244771, "grad_norm": 0.7822313829474611, "learning_rate": 5.500371933855791e-06, "loss": 0.6645668029785157, "step": 43105 }, { "epoch": 0.37275942274602036, "grad_norm": 3.2420444870694527, "learning_rate": 5.5002593484305875e-06, "loss": 0.04719066619873047, "step": 43110 }, { "epoch": 0.3728026562675636, "grad_norm": 31.572977872591135, "learning_rate": 5.500146751474409e-06, "loss": 0.21887874603271484, "step": 43115 }, { "epoch": 0.3728458897891069, "grad_norm": 5.92025788730128, "learning_rate": 5.500034142987775e-06, "loss": 0.44608612060546876, "step": 43120 }, { "epoch": 0.37288912331065016, "grad_norm": 17.87276447449358, "learning_rate": 5.499921522971204e-06, "loss": 0.23276290893554688, "step": 43125 }, { "epoch": 0.3729323568321934, "grad_norm": 2.9690586447027316, "learning_rate": 5.499808891425217e-06, "loss": 0.1956714630126953, "step": 43130 }, { "epoch": 0.3729755903537367, "grad_norm": 8.7392763042239, "learning_rate": 5.499696248350331e-06, "loss": 0.20537185668945312, "step": 43135 }, { "epoch": 0.37301882387527996, "grad_norm": 1.3305116558683594, "learning_rate": 5.499583593747067e-06, "loss": 0.1279449462890625, "step": 43140 }, { "epoch": 0.3730620573968232, "grad_norm": 1.1313558887962885, "learning_rate": 5.499470927615944e-06, "loss": 0.033863067626953125, "step": 43145 }, { "epoch": 0.3731052909183665, "grad_norm": 14.853199585304917, "learning_rate": 5.4993582499574825e-06, "loss": 0.112286376953125, "step": 43150 }, { "epoch": 0.3731485244399097, "grad_norm": 16.148193747646744, "learning_rate": 5.4992455607722026e-06, "loss": 0.2343780517578125, "step": 43155 }, { "epoch": 0.373191757961453, "grad_norm": 0.8224779308218023, "learning_rate": 5.499132860060621e-06, "loss": 0.1520263671875, "step": 43160 }, { "epoch": 0.37323499148299627, "grad_norm": 2.8986848831758696, "learning_rate": 5.4990201478232615e-06, "loss": 0.0822357177734375, "step": 43165 }, { "epoch": 0.3732782250045395, "grad_norm": 0.32660831076936525, "learning_rate": 5.498907424060641e-06, "loss": 0.114984130859375, "step": 43170 }, { "epoch": 0.3733214585260828, "grad_norm": 0.6925227102074247, "learning_rate": 5.498794688773279e-06, "loss": 0.40823936462402344, "step": 43175 }, { "epoch": 0.37336469204762607, "grad_norm": 0.09196508497872084, "learning_rate": 5.498681941961698e-06, "loss": 0.0774566650390625, "step": 43180 }, { "epoch": 0.3734079255691693, "grad_norm": 0.6819057698441852, "learning_rate": 5.498569183626416e-06, "loss": 0.0267425537109375, "step": 43185 }, { "epoch": 0.3734511590907126, "grad_norm": 3.2542751451979415, "learning_rate": 5.498456413767954e-06, "loss": 0.21422348022460938, "step": 43190 }, { "epoch": 0.37349439261225587, "grad_norm": 5.583128501646165, "learning_rate": 5.4983436323868326e-06, "loss": 0.2822357177734375, "step": 43195 }, { "epoch": 0.3735376261337991, "grad_norm": 3.4504877878623694, "learning_rate": 5.49823083948357e-06, "loss": 0.09040679931640624, "step": 43200 }, { "epoch": 0.3735808596553424, "grad_norm": 81.44183528583832, "learning_rate": 5.498118035058688e-06, "loss": 0.09502487182617188, "step": 43205 }, { "epoch": 0.3736240931768856, "grad_norm": 2.2620009960302023, "learning_rate": 5.498005219112707e-06, "loss": 0.22794342041015625, "step": 43210 }, { "epoch": 0.3736673266984289, "grad_norm": 0.29000879604916185, "learning_rate": 5.497892391646145e-06, "loss": 0.13491973876953126, "step": 43215 }, { "epoch": 0.3737105602199722, "grad_norm": 8.653290614392159, "learning_rate": 5.497779552659526e-06, "loss": 0.2867435455322266, "step": 43220 }, { "epoch": 0.3737537937415154, "grad_norm": 23.584441450405386, "learning_rate": 5.497666702153367e-06, "loss": 0.11980819702148438, "step": 43225 }, { "epoch": 0.3737970272630587, "grad_norm": 17.158293115612338, "learning_rate": 5.497553840128191e-06, "loss": 0.11853485107421875, "step": 43230 }, { "epoch": 0.373840260784602, "grad_norm": 0.9660083135075385, "learning_rate": 5.497440966584516e-06, "loss": 0.17178497314453126, "step": 43235 }, { "epoch": 0.3738834943061452, "grad_norm": 0.18389193912023627, "learning_rate": 5.4973280815228645e-06, "loss": 0.3138394355773926, "step": 43240 }, { "epoch": 0.3739267278276885, "grad_norm": 2.5603422908294453, "learning_rate": 5.497215184943757e-06, "loss": 0.160174560546875, "step": 43245 }, { "epoch": 0.3739699613492317, "grad_norm": 3.875275259030302, "learning_rate": 5.497102276847714e-06, "loss": 0.23961563110351564, "step": 43250 }, { "epoch": 0.374013194870775, "grad_norm": 2.726360600539499, "learning_rate": 5.496989357235254e-06, "loss": 0.4451549530029297, "step": 43255 }, { "epoch": 0.3740564283923183, "grad_norm": 7.0243086893716224, "learning_rate": 5.496876426106901e-06, "loss": 0.2820037841796875, "step": 43260 }, { "epoch": 0.3740996619138615, "grad_norm": 2.573272344364317, "learning_rate": 5.496763483463175e-06, "loss": 0.3237892150878906, "step": 43265 }, { "epoch": 0.3741428954354048, "grad_norm": 9.280195740606864, "learning_rate": 5.496650529304596e-06, "loss": 0.28920745849609375, "step": 43270 }, { "epoch": 0.3741861289569481, "grad_norm": 12.650174527921665, "learning_rate": 5.496537563631686e-06, "loss": 0.37185134887695315, "step": 43275 }, { "epoch": 0.3742293624784913, "grad_norm": 20.913290200670613, "learning_rate": 5.496424586444965e-06, "loss": 0.1376953125, "step": 43280 }, { "epoch": 0.3742725960000346, "grad_norm": 9.1143241193292, "learning_rate": 5.4963115977449534e-06, "loss": 0.19807357788085939, "step": 43285 }, { "epoch": 0.3743158295215778, "grad_norm": 23.531002311413488, "learning_rate": 5.496198597532174e-06, "loss": 0.26064071655273435, "step": 43290 }, { "epoch": 0.3743590630431211, "grad_norm": 36.609213785140646, "learning_rate": 5.496085585807147e-06, "loss": 0.4064434051513672, "step": 43295 }, { "epoch": 0.3744022965646644, "grad_norm": 0.9922674673130926, "learning_rate": 5.495972562570394e-06, "loss": 0.0726409912109375, "step": 43300 }, { "epoch": 0.3744455300862076, "grad_norm": 35.799571139554196, "learning_rate": 5.495859527822436e-06, "loss": 0.1611175537109375, "step": 43305 }, { "epoch": 0.3744887636077509, "grad_norm": 39.69089641816607, "learning_rate": 5.495746481563795e-06, "loss": 0.62213134765625, "step": 43310 }, { "epoch": 0.3745319971292942, "grad_norm": 2.197534420086273, "learning_rate": 5.495633423794991e-06, "loss": 0.16108665466308594, "step": 43315 }, { "epoch": 0.3745752306508374, "grad_norm": 10.467494648809291, "learning_rate": 5.495520354516547e-06, "loss": 0.054993820190429685, "step": 43320 }, { "epoch": 0.3746184641723807, "grad_norm": 4.755363151206429, "learning_rate": 5.495407273728983e-06, "loss": 0.0250213623046875, "step": 43325 }, { "epoch": 0.37466169769392393, "grad_norm": 0.6451360474677644, "learning_rate": 5.495294181432822e-06, "loss": 0.10507659912109375, "step": 43330 }, { "epoch": 0.3747049312154672, "grad_norm": 24.825779849810115, "learning_rate": 5.495181077628583e-06, "loss": 0.2909149169921875, "step": 43335 }, { "epoch": 0.3747481647370105, "grad_norm": 0.3673211173209336, "learning_rate": 5.49506796231679e-06, "loss": 0.10428524017333984, "step": 43340 }, { "epoch": 0.37479139825855373, "grad_norm": 11.400393690734102, "learning_rate": 5.494954835497965e-06, "loss": 0.0942962646484375, "step": 43345 }, { "epoch": 0.374834631780097, "grad_norm": 0.19629118359053144, "learning_rate": 5.494841697172628e-06, "loss": 0.037725830078125, "step": 43350 }, { "epoch": 0.3748778653016403, "grad_norm": 13.240751608795902, "learning_rate": 5.494728547341302e-06, "loss": 0.07894020080566407, "step": 43355 }, { "epoch": 0.37492109882318353, "grad_norm": 2.849562826082907, "learning_rate": 5.4946153860045075e-06, "loss": 0.12680397033691407, "step": 43360 }, { "epoch": 0.3749643323447268, "grad_norm": 22.685999295848873, "learning_rate": 5.494502213162768e-06, "loss": 0.10062484741210938, "step": 43365 }, { "epoch": 0.3750075658662701, "grad_norm": 1.7475536633927773, "learning_rate": 5.494389028816605e-06, "loss": 0.11414794921875, "step": 43370 }, { "epoch": 0.3750507993878133, "grad_norm": 4.530918681965543, "learning_rate": 5.49427583296654e-06, "loss": 0.37105560302734375, "step": 43375 }, { "epoch": 0.3750940329093566, "grad_norm": 5.2024489188038645, "learning_rate": 5.494162625613094e-06, "loss": 0.1177703857421875, "step": 43380 }, { "epoch": 0.37513726643089984, "grad_norm": 1.141406531320194, "learning_rate": 5.494049406756792e-06, "loss": 0.10079345703125, "step": 43385 }, { "epoch": 0.3751804999524431, "grad_norm": 19.737772706549237, "learning_rate": 5.493936176398154e-06, "loss": 0.09179840087890626, "step": 43390 }, { "epoch": 0.3752237334739864, "grad_norm": 25.831433982287958, "learning_rate": 5.493822934537703e-06, "loss": 0.16909637451171874, "step": 43395 }, { "epoch": 0.37526696699552964, "grad_norm": 7.89529631669344, "learning_rate": 5.49370968117596e-06, "loss": 0.247100830078125, "step": 43400 }, { "epoch": 0.3753102005170729, "grad_norm": 0.3044044369432907, "learning_rate": 5.493596416313449e-06, "loss": 0.04246864318847656, "step": 43405 }, { "epoch": 0.3753534340386162, "grad_norm": 28.49055823091018, "learning_rate": 5.49348313995069e-06, "loss": 0.2331573486328125, "step": 43410 }, { "epoch": 0.37539666756015944, "grad_norm": 23.123841725309934, "learning_rate": 5.493369852088209e-06, "loss": 0.10566177368164062, "step": 43415 }, { "epoch": 0.3754399010817027, "grad_norm": 13.787104046035392, "learning_rate": 5.493256552726527e-06, "loss": 0.14039764404296876, "step": 43420 }, { "epoch": 0.37548313460324595, "grad_norm": 2.3249447066377953, "learning_rate": 5.493143241866165e-06, "loss": 0.3017425537109375, "step": 43425 }, { "epoch": 0.37552636812478923, "grad_norm": 19.680317934685437, "learning_rate": 5.4930299195076476e-06, "loss": 0.2975738525390625, "step": 43430 }, { "epoch": 0.3755696016463325, "grad_norm": 3.8451853152834494, "learning_rate": 5.492916585651495e-06, "loss": 0.20527992248535157, "step": 43435 }, { "epoch": 0.37561283516787575, "grad_norm": 11.404594124694592, "learning_rate": 5.492803240298233e-06, "loss": 0.4036369323730469, "step": 43440 }, { "epoch": 0.37565606868941903, "grad_norm": 28.036679823556206, "learning_rate": 5.492689883448382e-06, "loss": 0.4948394775390625, "step": 43445 }, { "epoch": 0.3756993022109623, "grad_norm": 0.2458106043567353, "learning_rate": 5.492576515102466e-06, "loss": 0.14245147705078126, "step": 43450 }, { "epoch": 0.37574253573250554, "grad_norm": 7.3766054238787, "learning_rate": 5.492463135261006e-06, "loss": 0.07065277099609375, "step": 43455 }, { "epoch": 0.37578576925404883, "grad_norm": 0.4937093371713382, "learning_rate": 5.492349743924527e-06, "loss": 0.11446914672851563, "step": 43460 }, { "epoch": 0.37582900277559206, "grad_norm": 10.226209619014776, "learning_rate": 5.4922363410935515e-06, "loss": 0.19262542724609374, "step": 43465 }, { "epoch": 0.37587223629713534, "grad_norm": 9.5021444304025, "learning_rate": 5.492122926768602e-06, "loss": 0.104132080078125, "step": 43470 }, { "epoch": 0.3759154698186786, "grad_norm": 5.9796194218122185, "learning_rate": 5.492009500950201e-06, "loss": 0.34298248291015626, "step": 43475 }, { "epoch": 0.37595870334022186, "grad_norm": 1.230473283466772, "learning_rate": 5.4918960636388734e-06, "loss": 0.145001220703125, "step": 43480 }, { "epoch": 0.37600193686176514, "grad_norm": 16.181934148400376, "learning_rate": 5.491782614835141e-06, "loss": 0.0426727294921875, "step": 43485 }, { "epoch": 0.3760451703833084, "grad_norm": 10.448723546616089, "learning_rate": 5.491669154539528e-06, "loss": 0.28263397216796876, "step": 43490 }, { "epoch": 0.37608840390485165, "grad_norm": 3.4300167905134016, "learning_rate": 5.491555682752557e-06, "loss": 0.06251220703125, "step": 43495 }, { "epoch": 0.37613163742639494, "grad_norm": 0.5324125943614045, "learning_rate": 5.491442199474749e-06, "loss": 0.1360931396484375, "step": 43500 }, { "epoch": 0.37617487094793817, "grad_norm": 4.6868804967824556, "learning_rate": 5.491328704706632e-06, "loss": 0.13839187622070312, "step": 43505 }, { "epoch": 0.37621810446948145, "grad_norm": 6.92527402827326, "learning_rate": 5.491215198448727e-06, "loss": 0.1266021728515625, "step": 43510 }, { "epoch": 0.37626133799102474, "grad_norm": 37.869003675723796, "learning_rate": 5.4911016807015576e-06, "loss": 0.16094474792480468, "step": 43515 }, { "epoch": 0.37630457151256796, "grad_norm": 5.324875760116542, "learning_rate": 5.4909881514656465e-06, "loss": 0.1920684814453125, "step": 43520 }, { "epoch": 0.37634780503411125, "grad_norm": 11.674481011498285, "learning_rate": 5.490874610741518e-06, "loss": 0.19197235107421876, "step": 43525 }, { "epoch": 0.37639103855565453, "grad_norm": 9.698172969141535, "learning_rate": 5.490761058529697e-06, "loss": 0.1289093017578125, "step": 43530 }, { "epoch": 0.37643427207719776, "grad_norm": 16.082417111434626, "learning_rate": 5.4906474948307065e-06, "loss": 0.101068115234375, "step": 43535 }, { "epoch": 0.37647750559874105, "grad_norm": 24.88924992244423, "learning_rate": 5.490533919645069e-06, "loss": 0.39297332763671877, "step": 43540 }, { "epoch": 0.37652073912028433, "grad_norm": 22.949552558125678, "learning_rate": 5.490420332973309e-06, "loss": 0.257965087890625, "step": 43545 }, { "epoch": 0.37656397264182756, "grad_norm": 9.951098523981877, "learning_rate": 5.490306734815951e-06, "loss": 0.1491943359375, "step": 43550 }, { "epoch": 0.37660720616337084, "grad_norm": 11.567877754083982, "learning_rate": 5.490193125173517e-06, "loss": 0.1061309814453125, "step": 43555 }, { "epoch": 0.3766504396849141, "grad_norm": 11.333868918432543, "learning_rate": 5.490079504046535e-06, "loss": 0.21719970703125, "step": 43560 }, { "epoch": 0.37669367320645736, "grad_norm": 19.50649966922712, "learning_rate": 5.489965871435524e-06, "loss": 0.3197540283203125, "step": 43565 }, { "epoch": 0.37673690672800064, "grad_norm": 3.8707029901252645, "learning_rate": 5.489852227341011e-06, "loss": 0.511578369140625, "step": 43570 }, { "epoch": 0.37678014024954387, "grad_norm": 9.35768062311951, "learning_rate": 5.48973857176352e-06, "loss": 0.0772237777709961, "step": 43575 }, { "epoch": 0.37682337377108716, "grad_norm": 0.5133215904384529, "learning_rate": 5.489624904703575e-06, "loss": 0.046262359619140624, "step": 43580 }, { "epoch": 0.37686660729263044, "grad_norm": 40.88372904790289, "learning_rate": 5.489511226161699e-06, "loss": 0.3969635009765625, "step": 43585 }, { "epoch": 0.37690984081417367, "grad_norm": 4.9090295910992765, "learning_rate": 5.489397536138418e-06, "loss": 0.24599151611328124, "step": 43590 }, { "epoch": 0.37695307433571695, "grad_norm": 6.432790384966777, "learning_rate": 5.489283834634255e-06, "loss": 0.1904510498046875, "step": 43595 }, { "epoch": 0.3769963078572602, "grad_norm": 3.0235759293072886, "learning_rate": 5.489170121649735e-06, "loss": 0.191961669921875, "step": 43600 }, { "epoch": 0.37703954137880347, "grad_norm": 31.132779973010432, "learning_rate": 5.4890563971853814e-06, "loss": 0.38581047058105467, "step": 43605 }, { "epoch": 0.37708277490034675, "grad_norm": 4.159811174069653, "learning_rate": 5.48894266124172e-06, "loss": 0.0670440673828125, "step": 43610 }, { "epoch": 0.37712600842189, "grad_norm": 0.7433788720166278, "learning_rate": 5.4888289138192766e-06, "loss": 0.053479766845703124, "step": 43615 }, { "epoch": 0.37716924194343326, "grad_norm": 3.237508868046934, "learning_rate": 5.488715154918573e-06, "loss": 0.242291259765625, "step": 43620 }, { "epoch": 0.37721247546497655, "grad_norm": 1.9007834706955253, "learning_rate": 5.488601384540135e-06, "loss": 0.18122711181640624, "step": 43625 }, { "epoch": 0.3772557089865198, "grad_norm": 58.42582529646827, "learning_rate": 5.488487602684487e-06, "loss": 0.1720245361328125, "step": 43630 }, { "epoch": 0.37729894250806306, "grad_norm": 4.094564092999294, "learning_rate": 5.488373809352155e-06, "loss": 0.13092193603515626, "step": 43635 }, { "epoch": 0.3773421760296063, "grad_norm": 38.13840200884481, "learning_rate": 5.488260004543663e-06, "loss": 0.1240234375, "step": 43640 }, { "epoch": 0.3773854095511496, "grad_norm": 4.7784933803767435, "learning_rate": 5.4881461882595346e-06, "loss": 0.22529773712158202, "step": 43645 }, { "epoch": 0.37742864307269286, "grad_norm": 0.7698114754955435, "learning_rate": 5.488032360500296e-06, "loss": 0.1034698486328125, "step": 43650 }, { "epoch": 0.3774718765942361, "grad_norm": 28.843259351753474, "learning_rate": 5.4879185212664725e-06, "loss": 0.1940032958984375, "step": 43655 }, { "epoch": 0.3775151101157794, "grad_norm": 6.344936066932554, "learning_rate": 5.487804670558588e-06, "loss": 0.142962646484375, "step": 43660 }, { "epoch": 0.37755834363732266, "grad_norm": 4.394657016167086, "learning_rate": 5.487690808377168e-06, "loss": 0.053326416015625, "step": 43665 }, { "epoch": 0.3776015771588659, "grad_norm": 10.94721736956757, "learning_rate": 5.4875769347227395e-06, "loss": 0.1727264404296875, "step": 43670 }, { "epoch": 0.37764481068040917, "grad_norm": 15.514844837603052, "learning_rate": 5.487463049595824e-06, "loss": 0.1365509033203125, "step": 43675 }, { "epoch": 0.3776880442019524, "grad_norm": 4.7652281193532255, "learning_rate": 5.48734915299695e-06, "loss": 0.05732231140136719, "step": 43680 }, { "epoch": 0.3777312777234957, "grad_norm": 3.132784714017411, "learning_rate": 5.4872352449266415e-06, "loss": 0.137384033203125, "step": 43685 }, { "epoch": 0.37777451124503897, "grad_norm": 10.456310730861563, "learning_rate": 5.4871213253854226e-06, "loss": 0.044469451904296874, "step": 43690 }, { "epoch": 0.3778177447665822, "grad_norm": 1.4722759359498157, "learning_rate": 5.487007394373821e-06, "loss": 0.27172279357910156, "step": 43695 }, { "epoch": 0.3778609782881255, "grad_norm": 39.71532412769557, "learning_rate": 5.486893451892361e-06, "loss": 0.28316612243652345, "step": 43700 }, { "epoch": 0.37790421180966877, "grad_norm": 18.224209262054412, "learning_rate": 5.486779497941568e-06, "loss": 0.4657596588134766, "step": 43705 }, { "epoch": 0.377947445331212, "grad_norm": 14.006251133426153, "learning_rate": 5.486665532521967e-06, "loss": 0.23515625, "step": 43710 }, { "epoch": 0.3779906788527553, "grad_norm": 24.972514843296125, "learning_rate": 5.486551555634085e-06, "loss": 0.38018646240234377, "step": 43715 }, { "epoch": 0.3780339123742985, "grad_norm": 21.462004436080765, "learning_rate": 5.486437567278448e-06, "loss": 0.10430984497070313, "step": 43720 }, { "epoch": 0.3780771458958418, "grad_norm": 27.160827341305687, "learning_rate": 5.486323567455578e-06, "loss": 0.15954437255859374, "step": 43725 }, { "epoch": 0.3781203794173851, "grad_norm": 5.982581540945716, "learning_rate": 5.486209556166006e-06, "loss": 0.27202720642089845, "step": 43730 }, { "epoch": 0.3781636129389283, "grad_norm": 28.010029442455547, "learning_rate": 5.486095533410254e-06, "loss": 0.466094970703125, "step": 43735 }, { "epoch": 0.3782068464604716, "grad_norm": 1.148413420423748, "learning_rate": 5.485981499188848e-06, "loss": 0.0797821044921875, "step": 43740 }, { "epoch": 0.3782500799820149, "grad_norm": 3.004449432605923, "learning_rate": 5.485867453502317e-06, "loss": 0.39065399169921877, "step": 43745 }, { "epoch": 0.3782933135035581, "grad_norm": 22.07957879532725, "learning_rate": 5.485753396351184e-06, "loss": 0.1408447265625, "step": 43750 }, { "epoch": 0.3783365470251014, "grad_norm": 11.015434254147014, "learning_rate": 5.4856393277359764e-06, "loss": 0.20901641845703126, "step": 43755 }, { "epoch": 0.3783797805466447, "grad_norm": 67.03410189811949, "learning_rate": 5.48552524765722e-06, "loss": 0.61026611328125, "step": 43760 }, { "epoch": 0.3784230140681879, "grad_norm": 3.167921427218215, "learning_rate": 5.48541115611544e-06, "loss": 0.15137958526611328, "step": 43765 }, { "epoch": 0.3784662475897312, "grad_norm": 13.217226470093301, "learning_rate": 5.485297053111163e-06, "loss": 0.40618896484375, "step": 43770 }, { "epoch": 0.3785094811112744, "grad_norm": 19.901811495204623, "learning_rate": 5.485182938644917e-06, "loss": 0.249493408203125, "step": 43775 }, { "epoch": 0.3785527146328177, "grad_norm": 1.1391971067251432, "learning_rate": 5.485068812717226e-06, "loss": 0.09355087280273437, "step": 43780 }, { "epoch": 0.378595948154361, "grad_norm": 54.76770154096042, "learning_rate": 5.4849546753286175e-06, "loss": 0.278875732421875, "step": 43785 }, { "epoch": 0.3786391816759042, "grad_norm": 3.3623027587559062, "learning_rate": 5.484840526479617e-06, "loss": 0.147076416015625, "step": 43790 }, { "epoch": 0.3786824151974475, "grad_norm": 1.8943725683665797, "learning_rate": 5.484726366170752e-06, "loss": 0.02373924255371094, "step": 43795 }, { "epoch": 0.3787256487189908, "grad_norm": 1.2937491204654943, "learning_rate": 5.484612194402549e-06, "loss": 0.1815887451171875, "step": 43800 }, { "epoch": 0.378768882240534, "grad_norm": 18.958036429312767, "learning_rate": 5.4844980111755334e-06, "loss": 0.4677886962890625, "step": 43805 }, { "epoch": 0.3788121157620773, "grad_norm": 1.1256379830749879, "learning_rate": 5.484383816490233e-06, "loss": 0.07642822265625, "step": 43810 }, { "epoch": 0.3788553492836205, "grad_norm": 5.012219282089206, "learning_rate": 5.484269610347174e-06, "loss": 0.05133686065673828, "step": 43815 }, { "epoch": 0.3788985828051638, "grad_norm": 59.41802732991578, "learning_rate": 5.484155392746883e-06, "loss": 0.4271484375, "step": 43820 }, { "epoch": 0.3789418163267071, "grad_norm": 1.9192067359589278, "learning_rate": 5.484041163689887e-06, "loss": 0.1732654571533203, "step": 43825 }, { "epoch": 0.3789850498482503, "grad_norm": 6.346673702725154, "learning_rate": 5.483926923176713e-06, "loss": 0.2676567077636719, "step": 43830 }, { "epoch": 0.3790282833697936, "grad_norm": 34.03164499684542, "learning_rate": 5.483812671207885e-06, "loss": 0.21812057495117188, "step": 43835 }, { "epoch": 0.3790715168913369, "grad_norm": 8.97892143707442, "learning_rate": 5.4836984077839344e-06, "loss": 0.265374755859375, "step": 43840 }, { "epoch": 0.3791147504128801, "grad_norm": 5.509777631244143, "learning_rate": 5.483584132905387e-06, "loss": 0.13665924072265626, "step": 43845 }, { "epoch": 0.3791579839344234, "grad_norm": 1.2699863461445844, "learning_rate": 5.483469846572768e-06, "loss": 0.164459228515625, "step": 43850 }, { "epoch": 0.37920121745596663, "grad_norm": 0.585001418865177, "learning_rate": 5.483355548786606e-06, "loss": 0.0973175048828125, "step": 43855 }, { "epoch": 0.3792444509775099, "grad_norm": 3.5042917333226904, "learning_rate": 5.483241239547428e-06, "loss": 0.1968231201171875, "step": 43860 }, { "epoch": 0.3792876844990532, "grad_norm": 0.7297789140320659, "learning_rate": 5.483126918855759e-06, "loss": 0.05061149597167969, "step": 43865 }, { "epoch": 0.37933091802059643, "grad_norm": 2.387089391427703, "learning_rate": 5.48301258671213e-06, "loss": 0.19183349609375, "step": 43870 }, { "epoch": 0.3793741515421397, "grad_norm": 20.69205660894346, "learning_rate": 5.482898243117065e-06, "loss": 0.15593948364257812, "step": 43875 }, { "epoch": 0.379417385063683, "grad_norm": 7.314109080741355, "learning_rate": 5.482783888071093e-06, "loss": 0.24255599975585937, "step": 43880 }, { "epoch": 0.37946061858522623, "grad_norm": 0.2840062379688594, "learning_rate": 5.4826695215747406e-06, "loss": 0.021207427978515624, "step": 43885 }, { "epoch": 0.3795038521067695, "grad_norm": 11.582472812067723, "learning_rate": 5.482555143628538e-06, "loss": 0.2071929931640625, "step": 43890 }, { "epoch": 0.37954708562831274, "grad_norm": 8.287914286692793, "learning_rate": 5.482440754233008e-06, "loss": 0.168011474609375, "step": 43895 }, { "epoch": 0.379590319149856, "grad_norm": 1.8990330972269678, "learning_rate": 5.482326353388681e-06, "loss": 0.11445236206054688, "step": 43900 }, { "epoch": 0.3796335526713993, "grad_norm": 0.5224952615174705, "learning_rate": 5.4822119410960855e-06, "loss": 0.07910385131835937, "step": 43905 }, { "epoch": 0.37967678619294254, "grad_norm": 8.264771231884536, "learning_rate": 5.482097517355746e-06, "loss": 0.297900390625, "step": 43910 }, { "epoch": 0.3797200197144858, "grad_norm": 31.48308924482875, "learning_rate": 5.481983082168194e-06, "loss": 0.71669921875, "step": 43915 }, { "epoch": 0.3797632532360291, "grad_norm": 29.95620040594587, "learning_rate": 5.481868635533954e-06, "loss": 0.43655548095703123, "step": 43920 }, { "epoch": 0.37980648675757234, "grad_norm": 4.767063704039091, "learning_rate": 5.481754177453556e-06, "loss": 0.06489181518554688, "step": 43925 }, { "epoch": 0.3798497202791156, "grad_norm": 8.539306438337093, "learning_rate": 5.481639707927527e-06, "loss": 0.175830078125, "step": 43930 }, { "epoch": 0.3798929538006589, "grad_norm": 0.8552798160053563, "learning_rate": 5.481525226956394e-06, "loss": 0.11141357421875, "step": 43935 }, { "epoch": 0.37993618732220213, "grad_norm": 98.03846334777639, "learning_rate": 5.481410734540687e-06, "loss": 0.5094146728515625, "step": 43940 }, { "epoch": 0.3799794208437454, "grad_norm": 0.500179308425044, "learning_rate": 5.481296230680932e-06, "loss": 0.10242233276367188, "step": 43945 }, { "epoch": 0.38002265436528865, "grad_norm": 5.819365675135459, "learning_rate": 5.481181715377658e-06, "loss": 0.134527587890625, "step": 43950 }, { "epoch": 0.38006588788683193, "grad_norm": 37.387883184348915, "learning_rate": 5.481067188631394e-06, "loss": 0.66162109375, "step": 43955 }, { "epoch": 0.3801091214083752, "grad_norm": 46.850292607013884, "learning_rate": 5.4809526504426676e-06, "loss": 0.1712890625, "step": 43960 }, { "epoch": 0.38015235492991845, "grad_norm": 1.1265949933104793, "learning_rate": 5.480838100812005e-06, "loss": 0.041187286376953125, "step": 43965 }, { "epoch": 0.38019558845146173, "grad_norm": 5.947932882273587, "learning_rate": 5.480723539739938e-06, "loss": 0.1969757080078125, "step": 43970 }, { "epoch": 0.380238821973005, "grad_norm": 10.581536953676673, "learning_rate": 5.480608967226993e-06, "loss": 0.27593994140625, "step": 43975 }, { "epoch": 0.38028205549454824, "grad_norm": 6.634011591678558, "learning_rate": 5.480494383273698e-06, "loss": 0.14288482666015626, "step": 43980 }, { "epoch": 0.38032528901609153, "grad_norm": 4.407114523117595, "learning_rate": 5.480379787880584e-06, "loss": 0.2791330337524414, "step": 43985 }, { "epoch": 0.38036852253763476, "grad_norm": 26.93629518273842, "learning_rate": 5.480265181048175e-06, "loss": 0.11940460205078125, "step": 43990 }, { "epoch": 0.38041175605917804, "grad_norm": 7.343832793926656, "learning_rate": 5.480150562777003e-06, "loss": 0.11142425537109375, "step": 43995 }, { "epoch": 0.3804549895807213, "grad_norm": 0.7731850692129854, "learning_rate": 5.4800359330675965e-06, "loss": 0.28409881591796876, "step": 44000 }, { "epoch": 0.38049822310226455, "grad_norm": 25.986824273818424, "learning_rate": 5.479921291920482e-06, "loss": 0.1731658935546875, "step": 44005 }, { "epoch": 0.38054145662380784, "grad_norm": 0.8818111867013648, "learning_rate": 5.479806639336191e-06, "loss": 0.340338134765625, "step": 44010 }, { "epoch": 0.3805846901453511, "grad_norm": 0.3085841094858339, "learning_rate": 5.479691975315251e-06, "loss": 0.03069915771484375, "step": 44015 }, { "epoch": 0.38062792366689435, "grad_norm": 9.279736551894189, "learning_rate": 5.47957729985819e-06, "loss": 0.0839385986328125, "step": 44020 }, { "epoch": 0.38067115718843764, "grad_norm": 5.360576486320866, "learning_rate": 5.479462612965537e-06, "loss": 0.03330230712890625, "step": 44025 }, { "epoch": 0.38071439070998087, "grad_norm": 3.7381032872968696, "learning_rate": 5.479347914637822e-06, "loss": 0.0896484375, "step": 44030 }, { "epoch": 0.38075762423152415, "grad_norm": 9.107852483065193, "learning_rate": 5.479233204875574e-06, "loss": 0.06980743408203124, "step": 44035 }, { "epoch": 0.38080085775306743, "grad_norm": 1.954231771773525, "learning_rate": 5.479118483679321e-06, "loss": 0.319537353515625, "step": 44040 }, { "epoch": 0.38084409127461066, "grad_norm": 1.160777122535235, "learning_rate": 5.479003751049593e-06, "loss": 0.1006103515625, "step": 44045 }, { "epoch": 0.38088732479615395, "grad_norm": 2.2620902680878436, "learning_rate": 5.478889006986918e-06, "loss": 0.113006591796875, "step": 44050 }, { "epoch": 0.38093055831769723, "grad_norm": 26.037087378385596, "learning_rate": 5.478774251491827e-06, "loss": 0.0907073974609375, "step": 44055 }, { "epoch": 0.38097379183924046, "grad_norm": 16.473357526377427, "learning_rate": 5.478659484564848e-06, "loss": 0.15119705200195313, "step": 44060 }, { "epoch": 0.38101702536078375, "grad_norm": 6.173399707081099, "learning_rate": 5.4785447062065095e-06, "loss": 0.1895751953125, "step": 44065 }, { "epoch": 0.381060258882327, "grad_norm": 0.2756804718860803, "learning_rate": 5.478429916417344e-06, "loss": 0.1167694091796875, "step": 44070 }, { "epoch": 0.38110349240387026, "grad_norm": 3.575846841027654, "learning_rate": 5.478315115197877e-06, "loss": 0.054705810546875, "step": 44075 }, { "epoch": 0.38114672592541354, "grad_norm": 1.4704349320482415, "learning_rate": 5.47820030254864e-06, "loss": 0.20214767456054689, "step": 44080 }, { "epoch": 0.38118995944695677, "grad_norm": 1.1499955030839029, "learning_rate": 5.478085478470162e-06, "loss": 0.038238525390625, "step": 44085 }, { "epoch": 0.38123319296850006, "grad_norm": 17.425833467413398, "learning_rate": 5.477970642962972e-06, "loss": 0.1544281005859375, "step": 44090 }, { "epoch": 0.38127642649004334, "grad_norm": 54.02824792673214, "learning_rate": 5.477855796027601e-06, "loss": 0.25745849609375, "step": 44095 }, { "epoch": 0.38131966001158657, "grad_norm": 12.11689946507417, "learning_rate": 5.477740937664579e-06, "loss": 0.08437690734863282, "step": 44100 }, { "epoch": 0.38136289353312985, "grad_norm": 4.3184205080960085, "learning_rate": 5.477626067874434e-06, "loss": 0.46233177185058594, "step": 44105 }, { "epoch": 0.38140612705467314, "grad_norm": 11.024822372675189, "learning_rate": 5.477511186657696e-06, "loss": 0.06939620971679687, "step": 44110 }, { "epoch": 0.38144936057621637, "grad_norm": 2.986373296487096, "learning_rate": 5.4773962940148965e-06, "loss": 0.2036163330078125, "step": 44115 }, { "epoch": 0.38149259409775965, "grad_norm": 9.118200664750375, "learning_rate": 5.477281389946563e-06, "loss": 0.15218887329101563, "step": 44120 }, { "epoch": 0.3815358276193029, "grad_norm": 4.376190990049772, "learning_rate": 5.477166474453227e-06, "loss": 0.0672454833984375, "step": 44125 }, { "epoch": 0.38157906114084617, "grad_norm": 5.468494909386899, "learning_rate": 5.477051547535418e-06, "loss": 0.1197418212890625, "step": 44130 }, { "epoch": 0.38162229466238945, "grad_norm": 16.314931146315836, "learning_rate": 5.476936609193667e-06, "loss": 0.10909423828125, "step": 44135 }, { "epoch": 0.3816655281839327, "grad_norm": 4.001832307653622, "learning_rate": 5.476821659428502e-06, "loss": 0.35132598876953125, "step": 44140 }, { "epoch": 0.38170876170547596, "grad_norm": 25.927145192927544, "learning_rate": 5.476706698240455e-06, "loss": 0.25867462158203125, "step": 44145 }, { "epoch": 0.38175199522701925, "grad_norm": 2.720713773925191, "learning_rate": 5.476591725630055e-06, "loss": 0.108197021484375, "step": 44150 }, { "epoch": 0.3817952287485625, "grad_norm": 18.1957094695026, "learning_rate": 5.4764767415978335e-06, "loss": 0.15084762573242189, "step": 44155 }, { "epoch": 0.38183846227010576, "grad_norm": 7.271502290623601, "learning_rate": 5.47636174614432e-06, "loss": 0.22192535400390626, "step": 44160 }, { "epoch": 0.381881695791649, "grad_norm": 7.037132641107962, "learning_rate": 5.476246739270045e-06, "loss": 0.1304840087890625, "step": 44165 }, { "epoch": 0.3819249293131923, "grad_norm": 12.722742558618213, "learning_rate": 5.476131720975538e-06, "loss": 0.07802200317382812, "step": 44170 }, { "epoch": 0.38196816283473556, "grad_norm": 0.5516728986685568, "learning_rate": 5.476016691261331e-06, "loss": 0.25994071960449217, "step": 44175 }, { "epoch": 0.3820113963562788, "grad_norm": 3.6601190819246137, "learning_rate": 5.475901650127954e-06, "loss": 0.05514373779296875, "step": 44180 }, { "epoch": 0.38205462987782207, "grad_norm": 11.613982715229943, "learning_rate": 5.475786597575937e-06, "loss": 0.0963623046875, "step": 44185 }, { "epoch": 0.38209786339936536, "grad_norm": 0.8561409090630762, "learning_rate": 5.475671533605811e-06, "loss": 0.072906494140625, "step": 44190 }, { "epoch": 0.3821410969209086, "grad_norm": 5.57854184901463, "learning_rate": 5.4755564582181075e-06, "loss": 0.07089157104492187, "step": 44195 }, { "epoch": 0.38218433044245187, "grad_norm": 17.790443972980157, "learning_rate": 5.475441371413355e-06, "loss": 0.304327392578125, "step": 44200 }, { "epoch": 0.3822275639639951, "grad_norm": 19.186592950909134, "learning_rate": 5.475326273192086e-06, "loss": 0.13793792724609374, "step": 44205 }, { "epoch": 0.3822707974855384, "grad_norm": 2.2521433756724347, "learning_rate": 5.475211163554832e-06, "loss": 0.07401123046875, "step": 44210 }, { "epoch": 0.38231403100708167, "grad_norm": 25.803532820237482, "learning_rate": 5.475096042502121e-06, "loss": 0.109771728515625, "step": 44215 }, { "epoch": 0.3823572645286249, "grad_norm": 7.512405109993741, "learning_rate": 5.474980910034487e-06, "loss": 0.2866424560546875, "step": 44220 }, { "epoch": 0.3824004980501682, "grad_norm": 3.8693448608954757, "learning_rate": 5.4748657661524596e-06, "loss": 0.0541717529296875, "step": 44225 }, { "epoch": 0.38244373157171146, "grad_norm": 13.160543961852206, "learning_rate": 5.4747506108565695e-06, "loss": 0.084881591796875, "step": 44230 }, { "epoch": 0.3824869650932547, "grad_norm": 42.26488982942941, "learning_rate": 5.474635444147349e-06, "loss": 0.2733968734741211, "step": 44235 }, { "epoch": 0.382530198614798, "grad_norm": 10.634988491059925, "learning_rate": 5.474520266025328e-06, "loss": 0.274078369140625, "step": 44240 }, { "epoch": 0.3825734321363412, "grad_norm": 1.349726622165307, "learning_rate": 5.474405076491039e-06, "loss": 0.020528030395507813, "step": 44245 }, { "epoch": 0.3826166656578845, "grad_norm": 0.5787354960500498, "learning_rate": 5.474289875545011e-06, "loss": 0.06857147216796874, "step": 44250 }, { "epoch": 0.3826598991794278, "grad_norm": 13.703594318291051, "learning_rate": 5.474174663187778e-06, "loss": 0.1799285888671875, "step": 44255 }, { "epoch": 0.382703132700971, "grad_norm": 7.4241082301784305, "learning_rate": 5.47405943941987e-06, "loss": 0.09972991943359374, "step": 44260 }, { "epoch": 0.3827463662225143, "grad_norm": 6.873513731422722, "learning_rate": 5.473944204241817e-06, "loss": 0.1931610107421875, "step": 44265 }, { "epoch": 0.3827895997440576, "grad_norm": 10.989063085566753, "learning_rate": 5.4738289576541524e-06, "loss": 0.0939666748046875, "step": 44270 }, { "epoch": 0.3828328332656008, "grad_norm": 0.9465661067277065, "learning_rate": 5.473713699657409e-06, "loss": 0.12060470581054687, "step": 44275 }, { "epoch": 0.3828760667871441, "grad_norm": 17.061957548024576, "learning_rate": 5.473598430252114e-06, "loss": 0.4340797424316406, "step": 44280 }, { "epoch": 0.38291930030868737, "grad_norm": 0.7280197129696424, "learning_rate": 5.473483149438804e-06, "loss": 0.08512535095214843, "step": 44285 }, { "epoch": 0.3829625338302306, "grad_norm": 0.9959277684090453, "learning_rate": 5.4733678572180065e-06, "loss": 0.042686843872070314, "step": 44290 }, { "epoch": 0.3830057673517739, "grad_norm": 1.5026544473040946, "learning_rate": 5.4732525535902565e-06, "loss": 0.14931640625, "step": 44295 }, { "epoch": 0.3830490008733171, "grad_norm": 8.461940145761535, "learning_rate": 5.473137238556083e-06, "loss": 0.0861053466796875, "step": 44300 }, { "epoch": 0.3830922343948604, "grad_norm": 4.675733506492102, "learning_rate": 5.47302191211602e-06, "loss": 0.14906005859375, "step": 44305 }, { "epoch": 0.3831354679164037, "grad_norm": 7.638803027330174, "learning_rate": 5.4729065742705974e-06, "loss": 0.0441192626953125, "step": 44310 }, { "epoch": 0.3831787014379469, "grad_norm": 5.748119317634074, "learning_rate": 5.472791225020349e-06, "loss": 0.171875, "step": 44315 }, { "epoch": 0.3832219349594902, "grad_norm": 0.5555170441796236, "learning_rate": 5.472675864365806e-06, "loss": 0.15772857666015624, "step": 44320 }, { "epoch": 0.3832651684810335, "grad_norm": 2.0553277070158025, "learning_rate": 5.4725604923075e-06, "loss": 0.1184906005859375, "step": 44325 }, { "epoch": 0.3833084020025767, "grad_norm": 5.560968925420995, "learning_rate": 5.472445108845963e-06, "loss": 0.1091033935546875, "step": 44330 }, { "epoch": 0.38335163552412, "grad_norm": 6.57814908889275, "learning_rate": 5.472329713981729e-06, "loss": 0.14611597061157228, "step": 44335 }, { "epoch": 0.3833948690456632, "grad_norm": 3.989663302418769, "learning_rate": 5.472214307715328e-06, "loss": 0.106341552734375, "step": 44340 }, { "epoch": 0.3834381025672065, "grad_norm": 8.575875936466142, "learning_rate": 5.472098890047293e-06, "loss": 0.19271697998046874, "step": 44345 }, { "epoch": 0.3834813360887498, "grad_norm": 1.0070488011213299, "learning_rate": 5.471983460978157e-06, "loss": 0.1700592041015625, "step": 44350 }, { "epoch": 0.383524569610293, "grad_norm": 14.059152950057838, "learning_rate": 5.4718680205084515e-06, "loss": 0.46512451171875, "step": 44355 }, { "epoch": 0.3835678031318363, "grad_norm": 23.800923339441827, "learning_rate": 5.471752568638709e-06, "loss": 0.24921188354492188, "step": 44360 }, { "epoch": 0.3836110366533796, "grad_norm": 1.0847489583699357, "learning_rate": 5.471637105369462e-06, "loss": 0.19837188720703125, "step": 44365 }, { "epoch": 0.3836542701749228, "grad_norm": 6.922374304368603, "learning_rate": 5.4715216307012424e-06, "loss": 0.10609893798828125, "step": 44370 }, { "epoch": 0.3836975036964661, "grad_norm": 3.2343254447473684, "learning_rate": 5.4714061446345845e-06, "loss": 0.03549880981445312, "step": 44375 }, { "epoch": 0.38374073721800933, "grad_norm": 2.0365688046601202, "learning_rate": 5.471290647170019e-06, "loss": 0.114654541015625, "step": 44380 }, { "epoch": 0.3837839707395526, "grad_norm": 0.45433732003375266, "learning_rate": 5.47117513830808e-06, "loss": 0.14760360717773438, "step": 44385 }, { "epoch": 0.3838272042610959, "grad_norm": 0.2274188833650072, "learning_rate": 5.471059618049299e-06, "loss": 0.17363433837890624, "step": 44390 }, { "epoch": 0.38387043778263913, "grad_norm": 2.6672870399735187, "learning_rate": 5.4709440863942104e-06, "loss": 0.06573486328125, "step": 44395 }, { "epoch": 0.3839136713041824, "grad_norm": 2.819561661229212, "learning_rate": 5.470828543343345e-06, "loss": 0.06735773086547851, "step": 44400 }, { "epoch": 0.3839569048257257, "grad_norm": 5.444129123916589, "learning_rate": 5.470712988897237e-06, "loss": 0.21210861206054688, "step": 44405 }, { "epoch": 0.3840001383472689, "grad_norm": 22.543625580677492, "learning_rate": 5.4705974230564194e-06, "loss": 0.3579376220703125, "step": 44410 }, { "epoch": 0.3840433718688122, "grad_norm": 2.384265738758016, "learning_rate": 5.470481845821424e-06, "loss": 0.17635231018066405, "step": 44415 }, { "epoch": 0.38408660539035544, "grad_norm": 0.52129627153228, "learning_rate": 5.470366257192786e-06, "loss": 0.4623992919921875, "step": 44420 }, { "epoch": 0.3841298389118987, "grad_norm": 4.2446825338150775, "learning_rate": 5.4702506571710354e-06, "loss": 0.1501800537109375, "step": 44425 }, { "epoch": 0.384173072433442, "grad_norm": 4.227574097597585, "learning_rate": 5.470135045756708e-06, "loss": 0.27454833984375, "step": 44430 }, { "epoch": 0.38421630595498524, "grad_norm": 3.911975893708599, "learning_rate": 5.470019422950336e-06, "loss": 0.282708740234375, "step": 44435 }, { "epoch": 0.3842595394765285, "grad_norm": 1.5150233057227898, "learning_rate": 5.469903788752452e-06, "loss": 0.18057708740234374, "step": 44440 }, { "epoch": 0.3843027729980718, "grad_norm": 23.8564369812352, "learning_rate": 5.469788143163591e-06, "loss": 0.10480194091796875, "step": 44445 }, { "epoch": 0.38434600651961504, "grad_norm": 12.046061456403864, "learning_rate": 5.4696724861842846e-06, "loss": 0.1198822021484375, "step": 44450 }, { "epoch": 0.3843892400411583, "grad_norm": 28.355596159013224, "learning_rate": 5.469556817815066e-06, "loss": 0.1220693588256836, "step": 44455 }, { "epoch": 0.38443247356270155, "grad_norm": 17.022364355591474, "learning_rate": 5.469441138056472e-06, "loss": 0.16636123657226562, "step": 44460 }, { "epoch": 0.38447570708424483, "grad_norm": 15.425610990872219, "learning_rate": 5.469325446909033e-06, "loss": 0.16817626953125, "step": 44465 }, { "epoch": 0.3845189406057881, "grad_norm": 2.5416055379273392, "learning_rate": 5.469209744373281e-06, "loss": 0.10750503540039062, "step": 44470 }, { "epoch": 0.38456217412733135, "grad_norm": 1.8807772404015872, "learning_rate": 5.469094030449755e-06, "loss": 0.38666534423828125, "step": 44475 }, { "epoch": 0.38460540764887463, "grad_norm": 6.665271636143978, "learning_rate": 5.468978305138984e-06, "loss": 0.130029296875, "step": 44480 }, { "epoch": 0.3846486411704179, "grad_norm": 4.260366823086772, "learning_rate": 5.468862568441502e-06, "loss": 0.2393218994140625, "step": 44485 }, { "epoch": 0.38469187469196114, "grad_norm": 20.26658404221925, "learning_rate": 5.468746820357846e-06, "loss": 0.08425531387329102, "step": 44490 }, { "epoch": 0.38473510821350443, "grad_norm": 9.237952196842189, "learning_rate": 5.468631060888547e-06, "loss": 0.08160552978515626, "step": 44495 }, { "epoch": 0.3847783417350477, "grad_norm": 0.5536583703220777, "learning_rate": 5.468515290034139e-06, "loss": 0.38276824951171873, "step": 44500 }, { "epoch": 0.38482157525659094, "grad_norm": 17.636633280875753, "learning_rate": 5.468399507795158e-06, "loss": 0.08690338134765625, "step": 44505 }, { "epoch": 0.3848648087781342, "grad_norm": 6.462111366585513, "learning_rate": 5.4682837141721355e-06, "loss": 0.0789031982421875, "step": 44510 }, { "epoch": 0.38490804229967746, "grad_norm": 5.457536019931883, "learning_rate": 5.468167909165608e-06, "loss": 0.05978240966796875, "step": 44515 }, { "epoch": 0.38495127582122074, "grad_norm": 0.6041683810856875, "learning_rate": 5.468052092776106e-06, "loss": 0.28902397155761717, "step": 44520 }, { "epoch": 0.384994509342764, "grad_norm": 3.4209061234079425, "learning_rate": 5.467936265004167e-06, "loss": 0.07458076477050782, "step": 44525 }, { "epoch": 0.38503774286430725, "grad_norm": 13.073730144082521, "learning_rate": 5.467820425850325e-06, "loss": 0.13597869873046875, "step": 44530 }, { "epoch": 0.38508097638585054, "grad_norm": 39.586732849045056, "learning_rate": 5.467704575315112e-06, "loss": 0.4898872375488281, "step": 44535 }, { "epoch": 0.3851242099073938, "grad_norm": 0.293598709078842, "learning_rate": 5.4675887133990635e-06, "loss": 0.12969913482666015, "step": 44540 }, { "epoch": 0.38516744342893705, "grad_norm": 54.67755720018433, "learning_rate": 5.467472840102713e-06, "loss": 0.3840015411376953, "step": 44545 }, { "epoch": 0.38521067695048034, "grad_norm": 4.704943736489314, "learning_rate": 5.467356955426598e-06, "loss": 0.07543487548828125, "step": 44550 }, { "epoch": 0.38525391047202356, "grad_norm": 37.04409782873164, "learning_rate": 5.46724105937125e-06, "loss": 0.21635208129882813, "step": 44555 }, { "epoch": 0.38529714399356685, "grad_norm": 0.1728873173015424, "learning_rate": 5.467125151937203e-06, "loss": 0.023582839965820314, "step": 44560 }, { "epoch": 0.38534037751511013, "grad_norm": 5.114208169020282, "learning_rate": 5.467009233124994e-06, "loss": 0.20474853515625, "step": 44565 }, { "epoch": 0.38538361103665336, "grad_norm": 34.09802334527712, "learning_rate": 5.466893302935157e-06, "loss": 0.14786300659179688, "step": 44570 }, { "epoch": 0.38542684455819665, "grad_norm": 23.38196676844157, "learning_rate": 5.466777361368224e-06, "loss": 0.13677825927734374, "step": 44575 }, { "epoch": 0.38547007807973993, "grad_norm": 0.6635928047096554, "learning_rate": 5.466661408424733e-06, "loss": 0.038861083984375, "step": 44580 }, { "epoch": 0.38551331160128316, "grad_norm": 35.89743997381034, "learning_rate": 5.466545444105219e-06, "loss": 0.3801158905029297, "step": 44585 }, { "epoch": 0.38555654512282644, "grad_norm": 5.739060984457336, "learning_rate": 5.4664294684102134e-06, "loss": 0.05715484619140625, "step": 44590 }, { "epoch": 0.3855997786443697, "grad_norm": 43.72349591453352, "learning_rate": 5.466313481340253e-06, "loss": 0.19977569580078125, "step": 44595 }, { "epoch": 0.38564301216591296, "grad_norm": 6.684358034574735, "learning_rate": 5.466197482895874e-06, "loss": 0.5167572021484375, "step": 44600 }, { "epoch": 0.38568624568745624, "grad_norm": 23.763127309726144, "learning_rate": 5.466081473077609e-06, "loss": 0.097015380859375, "step": 44605 }, { "epoch": 0.38572947920899947, "grad_norm": 0.13665248413119005, "learning_rate": 5.465965451885995e-06, "loss": 0.06096954345703125, "step": 44610 }, { "epoch": 0.38577271273054276, "grad_norm": 0.4557887621467741, "learning_rate": 5.465849419321566e-06, "loss": 0.06947402954101563, "step": 44615 }, { "epoch": 0.38581594625208604, "grad_norm": 12.464743540724923, "learning_rate": 5.465733375384857e-06, "loss": 0.2533355712890625, "step": 44620 }, { "epoch": 0.38585917977362927, "grad_norm": 20.36555903735328, "learning_rate": 5.465617320076403e-06, "loss": 0.27167816162109376, "step": 44625 }, { "epoch": 0.38590241329517255, "grad_norm": 2.698543715205454, "learning_rate": 5.465501253396741e-06, "loss": 0.2141357421875, "step": 44630 }, { "epoch": 0.3859456468167158, "grad_norm": 30.075102974367148, "learning_rate": 5.465385175346405e-06, "loss": 0.09979629516601562, "step": 44635 }, { "epoch": 0.38598888033825907, "grad_norm": 8.440404381311245, "learning_rate": 5.465269085925929e-06, "loss": 0.358892822265625, "step": 44640 }, { "epoch": 0.38603211385980235, "grad_norm": 0.7791528338641068, "learning_rate": 5.465152985135851e-06, "loss": 0.2189422607421875, "step": 44645 }, { "epoch": 0.3860753473813456, "grad_norm": 11.825850390007496, "learning_rate": 5.465036872976705e-06, "loss": 0.087548828125, "step": 44650 }, { "epoch": 0.38611858090288886, "grad_norm": 46.525808679232505, "learning_rate": 5.464920749449027e-06, "loss": 0.76041259765625, "step": 44655 }, { "epoch": 0.38616181442443215, "grad_norm": 12.615425174539332, "learning_rate": 5.464804614553352e-06, "loss": 0.163458251953125, "step": 44660 }, { "epoch": 0.3862050479459754, "grad_norm": 3.833416568262103, "learning_rate": 5.464688468290217e-06, "loss": 0.16169281005859376, "step": 44665 }, { "epoch": 0.38624828146751866, "grad_norm": 47.30347323178761, "learning_rate": 5.4645723106601545e-06, "loss": 0.192291259765625, "step": 44670 }, { "epoch": 0.38629151498906195, "grad_norm": 7.757119611117802, "learning_rate": 5.464456141663704e-06, "loss": 0.1179351806640625, "step": 44675 }, { "epoch": 0.3863347485106052, "grad_norm": 1.5150208732813681, "learning_rate": 5.464339961301399e-06, "loss": 0.5342117309570312, "step": 44680 }, { "epoch": 0.38637798203214846, "grad_norm": 27.03209767446761, "learning_rate": 5.464223769573775e-06, "loss": 0.28345947265625, "step": 44685 }, { "epoch": 0.3864212155536917, "grad_norm": 5.9435993917882755, "learning_rate": 5.464107566481371e-06, "loss": 0.18877944946289063, "step": 44690 }, { "epoch": 0.386464449075235, "grad_norm": 7.212132725856374, "learning_rate": 5.463991352024718e-06, "loss": 0.15995407104492188, "step": 44695 }, { "epoch": 0.38650768259677826, "grad_norm": 1.320392041182512, "learning_rate": 5.463875126204355e-06, "loss": 0.06360092163085937, "step": 44700 }, { "epoch": 0.3865509161183215, "grad_norm": 24.8472169791927, "learning_rate": 5.46375888902082e-06, "loss": 0.1805328369140625, "step": 44705 }, { "epoch": 0.38659414963986477, "grad_norm": 1.9910863564461545, "learning_rate": 5.4636426404746436e-06, "loss": 0.1873809814453125, "step": 44710 }, { "epoch": 0.38663738316140805, "grad_norm": 21.144541656042595, "learning_rate": 5.463526380566368e-06, "loss": 0.259320068359375, "step": 44715 }, { "epoch": 0.3866806166829513, "grad_norm": 56.026106074415175, "learning_rate": 5.4634101092965244e-06, "loss": 0.8745033264160156, "step": 44720 }, { "epoch": 0.38672385020449457, "grad_norm": 24.79567245247091, "learning_rate": 5.463293826665651e-06, "loss": 0.3665252685546875, "step": 44725 }, { "epoch": 0.3867670837260378, "grad_norm": 3.2400268853332186, "learning_rate": 5.463177532674285e-06, "loss": 0.4633796691894531, "step": 44730 }, { "epoch": 0.3868103172475811, "grad_norm": 5.079300740636236, "learning_rate": 5.463061227322961e-06, "loss": 0.14161605834960939, "step": 44735 }, { "epoch": 0.38685355076912437, "grad_norm": 13.932196009755188, "learning_rate": 5.462944910612217e-06, "loss": 0.11485214233398437, "step": 44740 }, { "epoch": 0.3868967842906676, "grad_norm": 8.906351807356993, "learning_rate": 5.462828582542589e-06, "loss": 0.20088958740234375, "step": 44745 }, { "epoch": 0.3869400178122109, "grad_norm": 0.7353000649903444, "learning_rate": 5.462712243114613e-06, "loss": 0.0963104248046875, "step": 44750 }, { "epoch": 0.38698325133375416, "grad_norm": 7.392973415279169, "learning_rate": 5.462595892328825e-06, "loss": 0.1502685546875, "step": 44755 }, { "epoch": 0.3870264848552974, "grad_norm": 9.618861960032294, "learning_rate": 5.462479530185763e-06, "loss": 0.07141075134277344, "step": 44760 }, { "epoch": 0.3870697183768407, "grad_norm": 5.59610481398384, "learning_rate": 5.462363156685962e-06, "loss": 0.316387939453125, "step": 44765 }, { "epoch": 0.3871129518983839, "grad_norm": 6.019207798514456, "learning_rate": 5.462246771829961e-06, "loss": 0.14256668090820312, "step": 44770 }, { "epoch": 0.3871561854199272, "grad_norm": 30.382318880624208, "learning_rate": 5.4621303756182955e-06, "loss": 0.17788238525390626, "step": 44775 }, { "epoch": 0.3871994189414705, "grad_norm": 0.46452432911079705, "learning_rate": 5.4620139680515015e-06, "loss": 0.33155479431152346, "step": 44780 }, { "epoch": 0.3872426524630137, "grad_norm": 37.932566012583116, "learning_rate": 5.4618975491301175e-06, "loss": 0.559344482421875, "step": 44785 }, { "epoch": 0.387285885984557, "grad_norm": 33.3667455269607, "learning_rate": 5.461781118854678e-06, "loss": 0.637847900390625, "step": 44790 }, { "epoch": 0.3873291195061003, "grad_norm": 30.939040091337755, "learning_rate": 5.461664677225723e-06, "loss": 0.4957547187805176, "step": 44795 }, { "epoch": 0.3873723530276435, "grad_norm": 3.651181818396817, "learning_rate": 5.461548224243787e-06, "loss": 0.2588958740234375, "step": 44800 }, { "epoch": 0.3874155865491868, "grad_norm": 5.907315139325936, "learning_rate": 5.461431759909409e-06, "loss": 0.0634185791015625, "step": 44805 }, { "epoch": 0.38745882007073, "grad_norm": 2.0455763841137995, "learning_rate": 5.461315284223124e-06, "loss": 0.070965576171875, "step": 44810 }, { "epoch": 0.3875020535922733, "grad_norm": 11.1700073831079, "learning_rate": 5.4611987971854715e-06, "loss": 0.282855224609375, "step": 44815 }, { "epoch": 0.3875452871138166, "grad_norm": 8.209876826953867, "learning_rate": 5.461082298796987e-06, "loss": 0.14671630859375, "step": 44820 }, { "epoch": 0.3875885206353598, "grad_norm": 0.48211055388216373, "learning_rate": 5.460965789058208e-06, "loss": 0.204302978515625, "step": 44825 }, { "epoch": 0.3876317541569031, "grad_norm": 17.656613168148922, "learning_rate": 5.4608492679696736e-06, "loss": 0.13130340576171876, "step": 44830 }, { "epoch": 0.3876749876784464, "grad_norm": 2.3273513465976716, "learning_rate": 5.460732735531919e-06, "loss": 0.43165283203125, "step": 44835 }, { "epoch": 0.3877182211999896, "grad_norm": 6.33412788881548, "learning_rate": 5.460616191745483e-06, "loss": 0.10897979736328126, "step": 44840 }, { "epoch": 0.3877614547215329, "grad_norm": 10.889616209185222, "learning_rate": 5.460499636610902e-06, "loss": 0.4287567138671875, "step": 44845 }, { "epoch": 0.3878046882430762, "grad_norm": 30.349180300236945, "learning_rate": 5.4603830701287135e-06, "loss": 0.19996795654296876, "step": 44850 }, { "epoch": 0.3878479217646194, "grad_norm": 1.0439884769833634, "learning_rate": 5.460266492299457e-06, "loss": 0.1944122314453125, "step": 44855 }, { "epoch": 0.3878911552861627, "grad_norm": 13.406496498203781, "learning_rate": 5.460149903123668e-06, "loss": 0.07603034973144532, "step": 44860 }, { "epoch": 0.3879343888077059, "grad_norm": 6.667316099059308, "learning_rate": 5.460033302601885e-06, "loss": 0.08579559326171875, "step": 44865 }, { "epoch": 0.3879776223292492, "grad_norm": 30.261812087211922, "learning_rate": 5.459916690734646e-06, "loss": 0.42413330078125, "step": 44870 }, { "epoch": 0.3880208558507925, "grad_norm": 13.757871739639903, "learning_rate": 5.459800067522489e-06, "loss": 0.3433349609375, "step": 44875 }, { "epoch": 0.3880640893723357, "grad_norm": 3.595987971397167, "learning_rate": 5.45968343296595e-06, "loss": 0.095550537109375, "step": 44880 }, { "epoch": 0.388107322893879, "grad_norm": 9.137148388457657, "learning_rate": 5.459566787065571e-06, "loss": 0.08859710693359375, "step": 44885 }, { "epoch": 0.3881505564154223, "grad_norm": 1.6968704222449238, "learning_rate": 5.459450129821885e-06, "loss": 0.04304084777832031, "step": 44890 }, { "epoch": 0.3881937899369655, "grad_norm": 0.7267360868843582, "learning_rate": 5.4593334612354325e-06, "loss": 0.32906532287597656, "step": 44895 }, { "epoch": 0.3882370234585088, "grad_norm": 35.843905506076204, "learning_rate": 5.459216781306752e-06, "loss": 0.151800537109375, "step": 44900 }, { "epoch": 0.38828025698005203, "grad_norm": 0.12438752204385266, "learning_rate": 5.459100090036382e-06, "loss": 0.033856201171875, "step": 44905 }, { "epoch": 0.3883234905015953, "grad_norm": 30.191521621263025, "learning_rate": 5.458983387424858e-06, "loss": 0.60499267578125, "step": 44910 }, { "epoch": 0.3883667240231386, "grad_norm": 6.1338479222306495, "learning_rate": 5.45886667347272e-06, "loss": 0.12581405639648438, "step": 44915 }, { "epoch": 0.3884099575446818, "grad_norm": 17.466571093762507, "learning_rate": 5.458749948180506e-06, "loss": 0.06782608032226563, "step": 44920 }, { "epoch": 0.3884531910662251, "grad_norm": 1.8687817534139863, "learning_rate": 5.458633211548756e-06, "loss": 0.211968994140625, "step": 44925 }, { "epoch": 0.3884964245877684, "grad_norm": 2.9689152220042536, "learning_rate": 5.458516463578006e-06, "loss": 0.03999786376953125, "step": 44930 }, { "epoch": 0.3885396581093116, "grad_norm": 3.21843276580538, "learning_rate": 5.458399704268795e-06, "loss": 0.37235107421875, "step": 44935 }, { "epoch": 0.3885828916308549, "grad_norm": 20.05179989685526, "learning_rate": 5.458282933621662e-06, "loss": 0.2741973876953125, "step": 44940 }, { "epoch": 0.38862612515239814, "grad_norm": 4.5670883958028226, "learning_rate": 5.458166151637146e-06, "loss": 0.199603271484375, "step": 44945 }, { "epoch": 0.3886693586739414, "grad_norm": 5.3355584293825045, "learning_rate": 5.458049358315785e-06, "loss": 0.3392444610595703, "step": 44950 }, { "epoch": 0.3887125921954847, "grad_norm": 40.609473995470374, "learning_rate": 5.457932553658116e-06, "loss": 0.7416259765625, "step": 44955 }, { "epoch": 0.38875582571702794, "grad_norm": 2.46985786846335, "learning_rate": 5.457815737664681e-06, "loss": 0.1618408203125, "step": 44960 }, { "epoch": 0.3887990592385712, "grad_norm": 14.024313986802259, "learning_rate": 5.457698910336015e-06, "loss": 0.5108062744140625, "step": 44965 }, { "epoch": 0.3888422927601145, "grad_norm": 0.907123152440889, "learning_rate": 5.45758207167266e-06, "loss": 0.16597900390625, "step": 44970 }, { "epoch": 0.38888552628165773, "grad_norm": 0.8930992526046393, "learning_rate": 5.457465221675154e-06, "loss": 0.39395599365234374, "step": 44975 }, { "epoch": 0.388928759803201, "grad_norm": 0.9983742341974426, "learning_rate": 5.4573483603440355e-06, "loss": 0.05028076171875, "step": 44980 }, { "epoch": 0.38897199332474425, "grad_norm": 9.270018145759899, "learning_rate": 5.4572314876798435e-06, "loss": 0.13016242980957032, "step": 44985 }, { "epoch": 0.38901522684628753, "grad_norm": 2.3110232219441884, "learning_rate": 5.457114603683116e-06, "loss": 0.1764801025390625, "step": 44990 }, { "epoch": 0.3890584603678308, "grad_norm": 0.5631241104864417, "learning_rate": 5.456997708354394e-06, "loss": 0.20912628173828124, "step": 44995 }, { "epoch": 0.38910169388937405, "grad_norm": 14.784233689172762, "learning_rate": 5.456880801694216e-06, "loss": 0.3895538330078125, "step": 45000 }, { "epoch": 0.38914492741091733, "grad_norm": 22.29620204650619, "learning_rate": 5.456763883703119e-06, "loss": 0.118109130859375, "step": 45005 }, { "epoch": 0.3891881609324606, "grad_norm": 1.2317249951938924, "learning_rate": 5.456646954381645e-06, "loss": 0.14289169311523436, "step": 45010 }, { "epoch": 0.38923139445400384, "grad_norm": 2.559678365794615, "learning_rate": 5.4565300137303335e-06, "loss": 0.13384857177734374, "step": 45015 }, { "epoch": 0.3892746279755471, "grad_norm": 8.370226414708272, "learning_rate": 5.456413061749721e-06, "loss": 0.07818107604980469, "step": 45020 }, { "epoch": 0.3893178614970904, "grad_norm": 2.025587566784332, "learning_rate": 5.456296098440349e-06, "loss": 0.121063232421875, "step": 45025 }, { "epoch": 0.38936109501863364, "grad_norm": 24.09580824784668, "learning_rate": 5.456179123802757e-06, "loss": 0.13427734375, "step": 45030 }, { "epoch": 0.3894043285401769, "grad_norm": 7.6275975936614335, "learning_rate": 5.456062137837484e-06, "loss": 0.0992828369140625, "step": 45035 }, { "epoch": 0.38944756206172015, "grad_norm": 2.042470842111136, "learning_rate": 5.4559451405450695e-06, "loss": 0.12803955078125, "step": 45040 }, { "epoch": 0.38949079558326344, "grad_norm": 3.309634811947122, "learning_rate": 5.455828131926052e-06, "loss": 0.062860107421875, "step": 45045 }, { "epoch": 0.3895340291048067, "grad_norm": 12.22101262558532, "learning_rate": 5.455711111980972e-06, "loss": 0.3025810241699219, "step": 45050 }, { "epoch": 0.38957726262634995, "grad_norm": 30.497849869445005, "learning_rate": 5.455594080710371e-06, "loss": 0.13373222351074218, "step": 45055 }, { "epoch": 0.38962049614789324, "grad_norm": 6.060247204585349, "learning_rate": 5.455477038114786e-06, "loss": 0.20071449279785156, "step": 45060 }, { "epoch": 0.3896637296694365, "grad_norm": 7.477650213276588, "learning_rate": 5.4553599841947574e-06, "loss": 0.2986602783203125, "step": 45065 }, { "epoch": 0.38970696319097975, "grad_norm": 2.4175216899575744, "learning_rate": 5.455242918950827e-06, "loss": 0.176849365234375, "step": 45070 }, { "epoch": 0.38975019671252303, "grad_norm": 19.52615546736309, "learning_rate": 5.455125842383532e-06, "loss": 0.12518081665039063, "step": 45075 }, { "epoch": 0.38979343023406626, "grad_norm": 6.745291242994963, "learning_rate": 5.455008754493414e-06, "loss": 0.38996105194091796, "step": 45080 }, { "epoch": 0.38983666375560955, "grad_norm": 21.254503712881327, "learning_rate": 5.454891655281012e-06, "loss": 0.09786376953125, "step": 45085 }, { "epoch": 0.38987989727715283, "grad_norm": 2.107247109502836, "learning_rate": 5.454774544746867e-06, "loss": 0.111474609375, "step": 45090 }, { "epoch": 0.38992313079869606, "grad_norm": 7.178697591116562, "learning_rate": 5.4546574228915195e-06, "loss": 0.22262725830078126, "step": 45095 }, { "epoch": 0.38996636432023934, "grad_norm": 8.832598983970572, "learning_rate": 5.454540289715507e-06, "loss": 0.07530441284179687, "step": 45100 }, { "epoch": 0.39000959784178263, "grad_norm": 0.6031122402686471, "learning_rate": 5.454423145219373e-06, "loss": 0.11935558319091796, "step": 45105 }, { "epoch": 0.39005283136332586, "grad_norm": 3.602881402397209, "learning_rate": 5.454305989403656e-06, "loss": 0.0787384033203125, "step": 45110 }, { "epoch": 0.39009606488486914, "grad_norm": 3.696874611601929, "learning_rate": 5.454188822268897e-06, "loss": 0.19307861328125, "step": 45115 }, { "epoch": 0.39013929840641237, "grad_norm": 21.131038013717756, "learning_rate": 5.454071643815635e-06, "loss": 0.24930419921875, "step": 45120 }, { "epoch": 0.39018253192795566, "grad_norm": 0.8674709734306492, "learning_rate": 5.4539544540444124e-06, "loss": 0.092236328125, "step": 45125 }, { "epoch": 0.39022576544949894, "grad_norm": 33.646912098611296, "learning_rate": 5.4538372529557685e-06, "loss": 0.09300994873046875, "step": 45130 }, { "epoch": 0.39026899897104217, "grad_norm": 32.52819415034131, "learning_rate": 5.4537200405502434e-06, "loss": 0.527642822265625, "step": 45135 }, { "epoch": 0.39031223249258545, "grad_norm": 0.5170155847473085, "learning_rate": 5.4536028168283786e-06, "loss": 0.0842529296875, "step": 45140 }, { "epoch": 0.39035546601412874, "grad_norm": 7.416808788813987, "learning_rate": 5.453485581790715e-06, "loss": 0.13897705078125, "step": 45145 }, { "epoch": 0.39039869953567197, "grad_norm": 4.281655503837577, "learning_rate": 5.453368335437792e-06, "loss": 0.16443634033203125, "step": 45150 }, { "epoch": 0.39044193305721525, "grad_norm": 12.35474063749433, "learning_rate": 5.453251077770151e-06, "loss": 0.159649658203125, "step": 45155 }, { "epoch": 0.3904851665787585, "grad_norm": 34.53293720303093, "learning_rate": 5.453133808788334e-06, "loss": 0.3578521728515625, "step": 45160 }, { "epoch": 0.39052840010030176, "grad_norm": 2.4925469776726152, "learning_rate": 5.453016528492879e-06, "loss": 0.0596588134765625, "step": 45165 }, { "epoch": 0.39057163362184505, "grad_norm": 19.269348542888846, "learning_rate": 5.45289923688433e-06, "loss": 0.19599761962890624, "step": 45170 }, { "epoch": 0.3906148671433883, "grad_norm": 11.259315128999868, "learning_rate": 5.452781933963225e-06, "loss": 0.2768989562988281, "step": 45175 }, { "epoch": 0.39065810066493156, "grad_norm": 4.32002687902404, "learning_rate": 5.452664619730108e-06, "loss": 0.03251838684082031, "step": 45180 }, { "epoch": 0.39070133418647485, "grad_norm": 2.1129274639633295, "learning_rate": 5.452547294185517e-06, "loss": 0.191949462890625, "step": 45185 }, { "epoch": 0.3907445677080181, "grad_norm": 0.8043413100743012, "learning_rate": 5.452429957329996e-06, "loss": 0.306640625, "step": 45190 }, { "epoch": 0.39078780122956136, "grad_norm": 19.328241557870307, "learning_rate": 5.452312609164084e-06, "loss": 0.12693252563476562, "step": 45195 }, { "epoch": 0.3908310347511046, "grad_norm": 5.997107163559422, "learning_rate": 5.452195249688323e-06, "loss": 0.195806884765625, "step": 45200 }, { "epoch": 0.3908742682726479, "grad_norm": 1.433420066603822, "learning_rate": 5.452077878903253e-06, "loss": 0.2567138671875, "step": 45205 }, { "epoch": 0.39091750179419116, "grad_norm": 6.4545676829989365, "learning_rate": 5.451960496809418e-06, "loss": 0.22807731628417968, "step": 45210 }, { "epoch": 0.3909607353157344, "grad_norm": 11.084192907514112, "learning_rate": 5.451843103407357e-06, "loss": 0.2445068359375, "step": 45215 }, { "epoch": 0.39100396883727767, "grad_norm": 11.512601083401487, "learning_rate": 5.4517256986976135e-06, "loss": 0.10962677001953125, "step": 45220 }, { "epoch": 0.39104720235882096, "grad_norm": 36.81508905018583, "learning_rate": 5.451608282680726e-06, "loss": 0.2650482177734375, "step": 45225 }, { "epoch": 0.3910904358803642, "grad_norm": 2.640074028405149, "learning_rate": 5.451490855357239e-06, "loss": 0.05066757202148438, "step": 45230 }, { "epoch": 0.39113366940190747, "grad_norm": 10.29647771133859, "learning_rate": 5.451373416727693e-06, "loss": 0.4037261962890625, "step": 45235 }, { "epoch": 0.39117690292345075, "grad_norm": 3.5477226945313225, "learning_rate": 5.451255966792628e-06, "loss": 0.41510610580444335, "step": 45240 }, { "epoch": 0.391220136444994, "grad_norm": 33.797142781679966, "learning_rate": 5.451138505552589e-06, "loss": 0.423199462890625, "step": 45245 }, { "epoch": 0.39126336996653727, "grad_norm": 33.20934267821307, "learning_rate": 5.451021033008114e-06, "loss": 0.22820758819580078, "step": 45250 }, { "epoch": 0.3913066034880805, "grad_norm": 16.55815041774884, "learning_rate": 5.450903549159748e-06, "loss": 0.15705795288085939, "step": 45255 }, { "epoch": 0.3913498370096238, "grad_norm": 19.729455385743456, "learning_rate": 5.45078605400803e-06, "loss": 0.25679645538330076, "step": 45260 }, { "epoch": 0.39139307053116706, "grad_norm": 1.6562550611935263, "learning_rate": 5.4506685475535035e-06, "loss": 0.046154022216796875, "step": 45265 }, { "epoch": 0.3914363040527103, "grad_norm": 1.1287984200769299, "learning_rate": 5.450551029796711e-06, "loss": 0.14165802001953126, "step": 45270 }, { "epoch": 0.3914795375742536, "grad_norm": 18.85507764142911, "learning_rate": 5.450433500738194e-06, "loss": 0.1729717254638672, "step": 45275 }, { "epoch": 0.39152277109579686, "grad_norm": 50.36346560862175, "learning_rate": 5.450315960378493e-06, "loss": 0.26090106964111326, "step": 45280 }, { "epoch": 0.3915660046173401, "grad_norm": 21.515448006251432, "learning_rate": 5.450198408718152e-06, "loss": 0.11311416625976563, "step": 45285 }, { "epoch": 0.3916092381388834, "grad_norm": 5.997139453279023, "learning_rate": 5.450080845757712e-06, "loss": 0.07284088134765625, "step": 45290 }, { "epoch": 0.3916524716604266, "grad_norm": 12.766357285916742, "learning_rate": 5.4499632714977155e-06, "loss": 0.129638671875, "step": 45295 }, { "epoch": 0.3916957051819699, "grad_norm": 16.7088771871116, "learning_rate": 5.449845685938706e-06, "loss": 0.06242942810058594, "step": 45300 }, { "epoch": 0.3917389387035132, "grad_norm": 7.026237538266415, "learning_rate": 5.449728089081224e-06, "loss": 0.238018798828125, "step": 45305 }, { "epoch": 0.3917821722250564, "grad_norm": 0.043466387273442916, "learning_rate": 5.449610480925812e-06, "loss": 0.19540824890136718, "step": 45310 }, { "epoch": 0.3918254057465997, "grad_norm": 10.581637588819333, "learning_rate": 5.449492861473013e-06, "loss": 0.0690704345703125, "step": 45315 }, { "epoch": 0.39186863926814297, "grad_norm": 1.1042435302711884, "learning_rate": 5.44937523072337e-06, "loss": 0.19726638793945311, "step": 45320 }, { "epoch": 0.3919118727896862, "grad_norm": 5.888168875019044, "learning_rate": 5.449257588677425e-06, "loss": 0.1290313720703125, "step": 45325 }, { "epoch": 0.3919551063112295, "grad_norm": 19.50683361438374, "learning_rate": 5.4491399353357196e-06, "loss": 0.115997314453125, "step": 45330 }, { "epoch": 0.3919983398327727, "grad_norm": 3.859112844096939, "learning_rate": 5.449022270698798e-06, "loss": 0.07987442016601562, "step": 45335 }, { "epoch": 0.392041573354316, "grad_norm": 14.301575124786709, "learning_rate": 5.448904594767203e-06, "loss": 0.072662353515625, "step": 45340 }, { "epoch": 0.3920848068758593, "grad_norm": 34.11456177188459, "learning_rate": 5.4487869075414746e-06, "loss": 0.40974273681640627, "step": 45345 }, { "epoch": 0.3921280403974025, "grad_norm": 23.37879178078739, "learning_rate": 5.448669209022158e-06, "loss": 0.12970504760742188, "step": 45350 }, { "epoch": 0.3921712739189458, "grad_norm": 16.20918224319134, "learning_rate": 5.448551499209795e-06, "loss": 0.22583656311035155, "step": 45355 }, { "epoch": 0.3922145074404891, "grad_norm": 7.87188097550546, "learning_rate": 5.4484337781049285e-06, "loss": 0.164263916015625, "step": 45360 }, { "epoch": 0.3922577409620323, "grad_norm": 10.353868166661286, "learning_rate": 5.448316045708103e-06, "loss": 0.1783843994140625, "step": 45365 }, { "epoch": 0.3923009744835756, "grad_norm": 2.085014504758457, "learning_rate": 5.44819830201986e-06, "loss": 0.08562774658203125, "step": 45370 }, { "epoch": 0.3923442080051188, "grad_norm": 7.936590986416514, "learning_rate": 5.4480805470407425e-06, "loss": 0.11806259155273438, "step": 45375 }, { "epoch": 0.3923874415266621, "grad_norm": 9.326153475202831, "learning_rate": 5.447962780771293e-06, "loss": 0.0968170166015625, "step": 45380 }, { "epoch": 0.3924306750482054, "grad_norm": 5.398717680199185, "learning_rate": 5.447845003212057e-06, "loss": 0.20926437377929688, "step": 45385 }, { "epoch": 0.3924739085697486, "grad_norm": 1.13379296875807, "learning_rate": 5.447727214363575e-06, "loss": 0.2182586669921875, "step": 45390 }, { "epoch": 0.3925171420912919, "grad_norm": 23.38469505242335, "learning_rate": 5.447609414226391e-06, "loss": 0.22611541748046876, "step": 45395 }, { "epoch": 0.3925603756128352, "grad_norm": 9.072735615945238, "learning_rate": 5.44749160280105e-06, "loss": 0.16359481811523438, "step": 45400 }, { "epoch": 0.3926036091343784, "grad_norm": 8.945777237980161, "learning_rate": 5.447373780088093e-06, "loss": 0.08605690002441406, "step": 45405 }, { "epoch": 0.3926468426559217, "grad_norm": 2.704034002185759, "learning_rate": 5.447255946088065e-06, "loss": 0.39104156494140624, "step": 45410 }, { "epoch": 0.392690076177465, "grad_norm": 2.475081954946596, "learning_rate": 5.447138100801509e-06, "loss": 0.13401641845703124, "step": 45415 }, { "epoch": 0.3927333096990082, "grad_norm": 2.037525451176817, "learning_rate": 5.447020244228967e-06, "loss": 0.0569091796875, "step": 45420 }, { "epoch": 0.3927765432205515, "grad_norm": 1.5876219221103445, "learning_rate": 5.446902376370985e-06, "loss": 0.10064697265625, "step": 45425 }, { "epoch": 0.39281977674209473, "grad_norm": 11.36258550061293, "learning_rate": 5.446784497228105e-06, "loss": 0.128131103515625, "step": 45430 }, { "epoch": 0.392863010263638, "grad_norm": 6.36847145604479, "learning_rate": 5.446666606800872e-06, "loss": 0.13596763610839843, "step": 45435 }, { "epoch": 0.3929062437851813, "grad_norm": 1.5557994066043754, "learning_rate": 5.446548705089828e-06, "loss": 0.18414306640625, "step": 45440 }, { "epoch": 0.3929494773067245, "grad_norm": 49.753810555697534, "learning_rate": 5.446430792095517e-06, "loss": 0.09886970520019531, "step": 45445 }, { "epoch": 0.3929927108282678, "grad_norm": 1.0110834231862325, "learning_rate": 5.446312867818485e-06, "loss": 0.150421142578125, "step": 45450 }, { "epoch": 0.3930359443498111, "grad_norm": 5.954923189589145, "learning_rate": 5.446194932259273e-06, "loss": 0.07880935668945313, "step": 45455 }, { "epoch": 0.3930791778713543, "grad_norm": 0.6377933573590409, "learning_rate": 5.446076985418425e-06, "loss": 0.05684585571289062, "step": 45460 }, { "epoch": 0.3931224113928976, "grad_norm": 20.144785290832548, "learning_rate": 5.445959027296488e-06, "loss": 0.270111083984375, "step": 45465 }, { "epoch": 0.39316564491444084, "grad_norm": 3.133906945060741, "learning_rate": 5.445841057894003e-06, "loss": 0.21415786743164061, "step": 45470 }, { "epoch": 0.3932088784359841, "grad_norm": 0.4040140435524277, "learning_rate": 5.445723077211516e-06, "loss": 0.20308837890625, "step": 45475 }, { "epoch": 0.3932521119575274, "grad_norm": 1.7019497713480376, "learning_rate": 5.44560508524957e-06, "loss": 0.06696128845214844, "step": 45480 }, { "epoch": 0.39329534547907063, "grad_norm": 1.6379670896945058, "learning_rate": 5.445487082008709e-06, "loss": 0.15839691162109376, "step": 45485 }, { "epoch": 0.3933385790006139, "grad_norm": 3.279133676769928, "learning_rate": 5.445369067489479e-06, "loss": 0.0696044921875, "step": 45490 }, { "epoch": 0.3933818125221572, "grad_norm": 17.088398924591655, "learning_rate": 5.445251041692422e-06, "loss": 0.40135498046875, "step": 45495 }, { "epoch": 0.39342504604370043, "grad_norm": 4.212851070723344, "learning_rate": 5.445133004618083e-06, "loss": 0.10727920532226562, "step": 45500 }, { "epoch": 0.3934682795652437, "grad_norm": 0.5500324303644114, "learning_rate": 5.445014956267007e-06, "loss": 0.08381195068359375, "step": 45505 }, { "epoch": 0.39351151308678695, "grad_norm": 6.116265805119168, "learning_rate": 5.444896896639738e-06, "loss": 0.03645477294921875, "step": 45510 }, { "epoch": 0.39355474660833023, "grad_norm": 6.580598000730393, "learning_rate": 5.44477882573682e-06, "loss": 0.24734954833984374, "step": 45515 }, { "epoch": 0.3935979801298735, "grad_norm": 14.378397999495743, "learning_rate": 5.444660743558799e-06, "loss": 0.20542478561401367, "step": 45520 }, { "epoch": 0.39364121365141674, "grad_norm": 15.302854200933748, "learning_rate": 5.444542650106218e-06, "loss": 0.2176025390625, "step": 45525 }, { "epoch": 0.39368444717296003, "grad_norm": 13.09227774467083, "learning_rate": 5.444424545379622e-06, "loss": 0.0711883544921875, "step": 45530 }, { "epoch": 0.3937276806945033, "grad_norm": 12.280028600617017, "learning_rate": 5.444306429379557e-06, "loss": 0.10612106323242188, "step": 45535 }, { "epoch": 0.39377091421604654, "grad_norm": 0.41896308283283645, "learning_rate": 5.4441883021065665e-06, "loss": 0.10836257934570312, "step": 45540 }, { "epoch": 0.3938141477375898, "grad_norm": 14.659816517562998, "learning_rate": 5.444070163561195e-06, "loss": 0.14233856201171874, "step": 45545 }, { "epoch": 0.39385738125913305, "grad_norm": 26.15083646274244, "learning_rate": 5.443952013743989e-06, "loss": 0.5689682006835938, "step": 45550 }, { "epoch": 0.39390061478067634, "grad_norm": 4.928998355984894, "learning_rate": 5.443833852655491e-06, "loss": 0.07900848388671874, "step": 45555 }, { "epoch": 0.3939438483022196, "grad_norm": 28.11244207961824, "learning_rate": 5.4437156802962474e-06, "loss": 0.10041770935058594, "step": 45560 }, { "epoch": 0.39398708182376285, "grad_norm": 9.29843833702891, "learning_rate": 5.443597496666803e-06, "loss": 0.1479625701904297, "step": 45565 }, { "epoch": 0.39403031534530614, "grad_norm": 5.220507549586121, "learning_rate": 5.443479301767703e-06, "loss": 0.10310783386230468, "step": 45570 }, { "epoch": 0.3940735488668494, "grad_norm": 24.362899948766618, "learning_rate": 5.443361095599492e-06, "loss": 0.4060791015625, "step": 45575 }, { "epoch": 0.39411678238839265, "grad_norm": 4.3703553674662015, "learning_rate": 5.4432428781627164e-06, "loss": 0.17476654052734375, "step": 45580 }, { "epoch": 0.39416001590993593, "grad_norm": 27.951140723780004, "learning_rate": 5.44312464945792e-06, "loss": 0.14749927520751954, "step": 45585 }, { "epoch": 0.3942032494314792, "grad_norm": 0.28586345651896766, "learning_rate": 5.443006409485649e-06, "loss": 0.15276947021484374, "step": 45590 }, { "epoch": 0.39424648295302245, "grad_norm": 2.842562074452444, "learning_rate": 5.442888158246448e-06, "loss": 0.27756271362304685, "step": 45595 }, { "epoch": 0.39428971647456573, "grad_norm": 15.307290757657714, "learning_rate": 5.442769895740863e-06, "loss": 0.51080322265625, "step": 45600 }, { "epoch": 0.39433294999610896, "grad_norm": 5.571448207167338, "learning_rate": 5.442651621969439e-06, "loss": 0.30485191345214846, "step": 45605 }, { "epoch": 0.39437618351765225, "grad_norm": 18.878618702740454, "learning_rate": 5.44253333693272e-06, "loss": 0.17498779296875, "step": 45610 }, { "epoch": 0.39441941703919553, "grad_norm": 5.229354752358557, "learning_rate": 5.442415040631256e-06, "loss": 0.112725830078125, "step": 45615 }, { "epoch": 0.39446265056073876, "grad_norm": 0.5272444783428872, "learning_rate": 5.442296733065587e-06, "loss": 0.04300994873046875, "step": 45620 }, { "epoch": 0.39450588408228204, "grad_norm": 22.705916047596244, "learning_rate": 5.442178414236263e-06, "loss": 0.3950836181640625, "step": 45625 }, { "epoch": 0.39454911760382533, "grad_norm": 7.535888100874517, "learning_rate": 5.4420600841438275e-06, "loss": 0.07328338623046875, "step": 45630 }, { "epoch": 0.39459235112536856, "grad_norm": 3.6956209373985054, "learning_rate": 5.441941742788827e-06, "loss": 0.07356643676757812, "step": 45635 }, { "epoch": 0.39463558464691184, "grad_norm": 17.50806232907921, "learning_rate": 5.441823390171805e-06, "loss": 0.2763214111328125, "step": 45640 }, { "epoch": 0.39467881816845507, "grad_norm": 6.544538330046117, "learning_rate": 5.4417050262933116e-06, "loss": 0.0983673095703125, "step": 45645 }, { "epoch": 0.39472205168999835, "grad_norm": 18.83007174365412, "learning_rate": 5.4415866511538895e-06, "loss": 0.286431884765625, "step": 45650 }, { "epoch": 0.39476528521154164, "grad_norm": 0.04038267021525401, "learning_rate": 5.441468264754084e-06, "loss": 0.28877696990966795, "step": 45655 }, { "epoch": 0.39480851873308487, "grad_norm": 0.338880695118077, "learning_rate": 5.4413498670944455e-06, "loss": 0.0986297607421875, "step": 45660 }, { "epoch": 0.39485175225462815, "grad_norm": 1.3523608722751363, "learning_rate": 5.4412314581755155e-06, "loss": 0.08728179931640626, "step": 45665 }, { "epoch": 0.39489498577617144, "grad_norm": 2.2156632418628446, "learning_rate": 5.441113037997841e-06, "loss": 0.17081146240234374, "step": 45670 }, { "epoch": 0.39493821929771467, "grad_norm": 14.823688409530748, "learning_rate": 5.440994606561971e-06, "loss": 0.06885185241699218, "step": 45675 }, { "epoch": 0.39498145281925795, "grad_norm": 2.44080209701778, "learning_rate": 5.4408761638684475e-06, "loss": 0.06082916259765625, "step": 45680 }, { "epoch": 0.3950246863408012, "grad_norm": 44.75520127554571, "learning_rate": 5.440757709917821e-06, "loss": 0.26729278564453124, "step": 45685 }, { "epoch": 0.39506791986234446, "grad_norm": 7.676470688172489, "learning_rate": 5.440639244710634e-06, "loss": 0.16175384521484376, "step": 45690 }, { "epoch": 0.39511115338388775, "grad_norm": 2.4231513395411772, "learning_rate": 5.440520768247433e-06, "loss": 0.161285400390625, "step": 45695 }, { "epoch": 0.395154386905431, "grad_norm": 17.15878096410887, "learning_rate": 5.440402280528769e-06, "loss": 0.27567138671875, "step": 45700 }, { "epoch": 0.39519762042697426, "grad_norm": 4.829694037430561, "learning_rate": 5.440283781555183e-06, "loss": 0.28227081298828127, "step": 45705 }, { "epoch": 0.39524085394851755, "grad_norm": 5.292821525400804, "learning_rate": 5.440165271327226e-06, "loss": 0.21095046997070313, "step": 45710 }, { "epoch": 0.3952840874700608, "grad_norm": 3.0771818477816537, "learning_rate": 5.440046749845441e-06, "loss": 0.12867279052734376, "step": 45715 }, { "epoch": 0.39532732099160406, "grad_norm": 2.0305840953916765, "learning_rate": 5.439928217110376e-06, "loss": 0.084619140625, "step": 45720 }, { "epoch": 0.3953705545131473, "grad_norm": 30.474373646712973, "learning_rate": 5.439809673122577e-06, "loss": 0.7262321472167969, "step": 45725 }, { "epoch": 0.39541378803469057, "grad_norm": 2.6973925524873823, "learning_rate": 5.439691117882593e-06, "loss": 0.1397003173828125, "step": 45730 }, { "epoch": 0.39545702155623386, "grad_norm": 11.198867759861983, "learning_rate": 5.439572551390968e-06, "loss": 0.28264427185058594, "step": 45735 }, { "epoch": 0.3955002550777771, "grad_norm": 0.9261372612828788, "learning_rate": 5.43945397364825e-06, "loss": 0.06344223022460938, "step": 45740 }, { "epoch": 0.39554348859932037, "grad_norm": 4.728716172927978, "learning_rate": 5.439335384654986e-06, "loss": 0.22943801879882814, "step": 45745 }, { "epoch": 0.39558672212086365, "grad_norm": 1.064902860427808, "learning_rate": 5.4392167844117235e-06, "loss": 0.48213043212890627, "step": 45750 }, { "epoch": 0.3956299556424069, "grad_norm": 21.614554674502166, "learning_rate": 5.439098172919008e-06, "loss": 0.11224365234375, "step": 45755 }, { "epoch": 0.39567318916395017, "grad_norm": 0.3996930201062001, "learning_rate": 5.438979550177387e-06, "loss": 0.08910369873046875, "step": 45760 }, { "epoch": 0.39571642268549345, "grad_norm": 10.477156133579564, "learning_rate": 5.438860916187408e-06, "loss": 0.04722518920898437, "step": 45765 }, { "epoch": 0.3957596562070367, "grad_norm": 1.0900216172991133, "learning_rate": 5.438742270949618e-06, "loss": 0.07656173706054688, "step": 45770 }, { "epoch": 0.39580288972857997, "grad_norm": 2.3809814511503, "learning_rate": 5.438623614464565e-06, "loss": 0.06407470703125, "step": 45775 }, { "epoch": 0.3958461232501232, "grad_norm": 2.9869655143366227, "learning_rate": 5.438504946732794e-06, "loss": 0.07433624267578125, "step": 45780 }, { "epoch": 0.3958893567716665, "grad_norm": 8.847766825402877, "learning_rate": 5.438386267754854e-06, "loss": 0.15001373291015624, "step": 45785 }, { "epoch": 0.39593259029320976, "grad_norm": 0.8472966946286724, "learning_rate": 5.438267577531293e-06, "loss": 0.058414459228515625, "step": 45790 }, { "epoch": 0.395975823814753, "grad_norm": 8.012744000069786, "learning_rate": 5.438148876062656e-06, "loss": 0.32334442138671876, "step": 45795 }, { "epoch": 0.3960190573362963, "grad_norm": 7.89999840876827, "learning_rate": 5.438030163349492e-06, "loss": 0.23076324462890624, "step": 45800 }, { "epoch": 0.39606229085783956, "grad_norm": 9.994393580918256, "learning_rate": 5.43791143939235e-06, "loss": 0.1921661376953125, "step": 45805 }, { "epoch": 0.3961055243793828, "grad_norm": 1.6836470657482832, "learning_rate": 5.437792704191774e-06, "loss": 0.17489013671875, "step": 45810 }, { "epoch": 0.3961487579009261, "grad_norm": 2.163506950134128, "learning_rate": 5.4376739577483146e-06, "loss": 0.1033172607421875, "step": 45815 }, { "epoch": 0.3961919914224693, "grad_norm": 31.111689119838182, "learning_rate": 5.437555200062517e-06, "loss": 0.12887191772460938, "step": 45820 }, { "epoch": 0.3962352249440126, "grad_norm": 0.22161756255425794, "learning_rate": 5.437436431134931e-06, "loss": 0.11917037963867187, "step": 45825 }, { "epoch": 0.39627845846555587, "grad_norm": 0.7143678505343948, "learning_rate": 5.437317650966104e-06, "loss": 0.14989013671875, "step": 45830 }, { "epoch": 0.3963216919870991, "grad_norm": 10.392955129325973, "learning_rate": 5.437198859556582e-06, "loss": 0.3193756103515625, "step": 45835 }, { "epoch": 0.3963649255086424, "grad_norm": 31.815564075767416, "learning_rate": 5.437080056906916e-06, "loss": 0.40721435546875, "step": 45840 }, { "epoch": 0.39640815903018567, "grad_norm": 0.6600960017745082, "learning_rate": 5.43696124301765e-06, "loss": 0.2242462158203125, "step": 45845 }, { "epoch": 0.3964513925517289, "grad_norm": 4.636400996349435, "learning_rate": 5.4368424178893365e-06, "loss": 0.15807113647460938, "step": 45850 }, { "epoch": 0.3964946260732722, "grad_norm": 38.89740942303284, "learning_rate": 5.43672358152252e-06, "loss": 0.46407623291015626, "step": 45855 }, { "epoch": 0.3965378595948154, "grad_norm": 4.082849234752132, "learning_rate": 5.436604733917749e-06, "loss": 0.20607452392578124, "step": 45860 }, { "epoch": 0.3965810931163587, "grad_norm": 5.2830115879261585, "learning_rate": 5.436485875075573e-06, "loss": 0.04609832763671875, "step": 45865 }, { "epoch": 0.396624326637902, "grad_norm": 7.7780307093972425, "learning_rate": 5.43636700499654e-06, "loss": 0.052820587158203126, "step": 45870 }, { "epoch": 0.3966675601594452, "grad_norm": 2.706523708594068, "learning_rate": 5.436248123681197e-06, "loss": 0.18100318908691407, "step": 45875 }, { "epoch": 0.3967107936809885, "grad_norm": 17.655799734348708, "learning_rate": 5.436129231130093e-06, "loss": 0.31824951171875, "step": 45880 }, { "epoch": 0.3967540272025318, "grad_norm": 29.9845060914814, "learning_rate": 5.436010327343776e-06, "loss": 0.294830322265625, "step": 45885 }, { "epoch": 0.396797260724075, "grad_norm": 7.72286569005724, "learning_rate": 5.435891412322795e-06, "loss": 0.19352264404296876, "step": 45890 }, { "epoch": 0.3968404942456183, "grad_norm": 9.890418787997937, "learning_rate": 5.435772486067697e-06, "loss": 0.14709091186523438, "step": 45895 }, { "epoch": 0.3968837277671615, "grad_norm": 5.810989564834476, "learning_rate": 5.435653548579032e-06, "loss": 0.363055419921875, "step": 45900 }, { "epoch": 0.3969269612887048, "grad_norm": 3.3647931347672717, "learning_rate": 5.435534599857349e-06, "loss": 0.36795654296875, "step": 45905 }, { "epoch": 0.3969701948102481, "grad_norm": 18.46656080068253, "learning_rate": 5.435415639903194e-06, "loss": 0.22645416259765624, "step": 45910 }, { "epoch": 0.3970134283317913, "grad_norm": 1.9139194313609456, "learning_rate": 5.435296668717119e-06, "loss": 0.43171844482421873, "step": 45915 }, { "epoch": 0.3970566618533346, "grad_norm": 11.955643977935912, "learning_rate": 5.43517768629967e-06, "loss": 0.21074066162109376, "step": 45920 }, { "epoch": 0.3970998953748779, "grad_norm": 2.153255815538391, "learning_rate": 5.435058692651397e-06, "loss": 0.12563705444335938, "step": 45925 }, { "epoch": 0.3971431288964211, "grad_norm": 3.042964378744833, "learning_rate": 5.434939687772849e-06, "loss": 0.11637420654296875, "step": 45930 }, { "epoch": 0.3971863624179644, "grad_norm": 2.99687190233438, "learning_rate": 5.434820671664573e-06, "loss": 0.088568115234375, "step": 45935 }, { "epoch": 0.39722959593950763, "grad_norm": 21.82875096780498, "learning_rate": 5.434701644327119e-06, "loss": 0.3176361083984375, "step": 45940 }, { "epoch": 0.3972728294610509, "grad_norm": 33.75229235169117, "learning_rate": 5.434582605761037e-06, "loss": 0.461859130859375, "step": 45945 }, { "epoch": 0.3973160629825942, "grad_norm": 38.1225261377095, "learning_rate": 5.434463555966876e-06, "loss": 0.5511859893798828, "step": 45950 }, { "epoch": 0.3973592965041374, "grad_norm": 14.136671457177716, "learning_rate": 5.434344494945183e-06, "loss": 0.25965576171875, "step": 45955 }, { "epoch": 0.3974025300256807, "grad_norm": 22.353010652584203, "learning_rate": 5.434225422696509e-06, "loss": 0.20850448608398436, "step": 45960 }, { "epoch": 0.397445763547224, "grad_norm": 7.689030276087616, "learning_rate": 5.434106339221401e-06, "loss": 0.07767486572265625, "step": 45965 }, { "epoch": 0.3974889970687672, "grad_norm": 17.201923467645926, "learning_rate": 5.433987244520411e-06, "loss": 0.30447988510131835, "step": 45970 }, { "epoch": 0.3975322305903105, "grad_norm": 1.1467744808584037, "learning_rate": 5.433868138594087e-06, "loss": 0.07555389404296875, "step": 45975 }, { "epoch": 0.3975754641118538, "grad_norm": 1.8423165962948498, "learning_rate": 5.433749021442977e-06, "loss": 0.048626708984375, "step": 45980 }, { "epoch": 0.397618697633397, "grad_norm": 0.5316489889486791, "learning_rate": 5.433629893067633e-06, "loss": 0.051741600036621094, "step": 45985 }, { "epoch": 0.3976619311549403, "grad_norm": 1.287094965094084, "learning_rate": 5.4335107534686024e-06, "loss": 0.0498687744140625, "step": 45990 }, { "epoch": 0.39770516467648354, "grad_norm": 7.57455213011002, "learning_rate": 5.433391602646435e-06, "loss": 0.08891887664794922, "step": 45995 }, { "epoch": 0.3977483981980268, "grad_norm": 19.362308221903827, "learning_rate": 5.4332724406016815e-06, "loss": 0.29253158569335935, "step": 46000 }, { "epoch": 0.3977916317195701, "grad_norm": 17.388607643690058, "learning_rate": 5.43315326733489e-06, "loss": 0.14778900146484375, "step": 46005 }, { "epoch": 0.39783486524111333, "grad_norm": 3.736488069713629, "learning_rate": 5.43303408284661e-06, "loss": 0.10618534088134765, "step": 46010 }, { "epoch": 0.3978780987626566, "grad_norm": 67.20238895690021, "learning_rate": 5.4329148871373926e-06, "loss": 0.20673065185546874, "step": 46015 }, { "epoch": 0.3979213322841999, "grad_norm": 0.6697171774090928, "learning_rate": 5.432795680207785e-06, "loss": 0.0493408203125, "step": 46020 }, { "epoch": 0.39796456580574313, "grad_norm": 3.0613429569275636, "learning_rate": 5.432676462058341e-06, "loss": 0.11086959838867187, "step": 46025 }, { "epoch": 0.3980077993272864, "grad_norm": 27.413907486964668, "learning_rate": 5.432557232689607e-06, "loss": 0.0816314697265625, "step": 46030 }, { "epoch": 0.39805103284882964, "grad_norm": 18.89357514399255, "learning_rate": 5.432437992102134e-06, "loss": 0.2047271728515625, "step": 46035 }, { "epoch": 0.39809426637037293, "grad_norm": 4.262037405176947, "learning_rate": 5.432318740296472e-06, "loss": 0.09910888671875, "step": 46040 }, { "epoch": 0.3981374998919162, "grad_norm": 19.000758472754722, "learning_rate": 5.432199477273171e-06, "loss": 0.571331787109375, "step": 46045 }, { "epoch": 0.39818073341345944, "grad_norm": 15.420489486976912, "learning_rate": 5.4320802030327805e-06, "loss": 0.269403076171875, "step": 46050 }, { "epoch": 0.3982239669350027, "grad_norm": 1.1371541144379922, "learning_rate": 5.431960917575851e-06, "loss": 0.0488128662109375, "step": 46055 }, { "epoch": 0.398267200456546, "grad_norm": 0.25912833931968837, "learning_rate": 5.431841620902934e-06, "loss": 0.09367904663085938, "step": 46060 }, { "epoch": 0.39831043397808924, "grad_norm": 4.214222311070237, "learning_rate": 5.4317223130145766e-06, "loss": 0.2344390869140625, "step": 46065 }, { "epoch": 0.3983536674996325, "grad_norm": 44.56117197434424, "learning_rate": 5.431602993911332e-06, "loss": 0.243743896484375, "step": 46070 }, { "epoch": 0.39839690102117575, "grad_norm": 37.496433812105415, "learning_rate": 5.431483663593748e-06, "loss": 0.4790679931640625, "step": 46075 }, { "epoch": 0.39844013454271904, "grad_norm": 0.6517762209408189, "learning_rate": 5.431364322062377e-06, "loss": 0.05994911193847656, "step": 46080 }, { "epoch": 0.3984833680642623, "grad_norm": 7.570811745757126, "learning_rate": 5.431244969317768e-06, "loss": 0.2740234375, "step": 46085 }, { "epoch": 0.39852660158580555, "grad_norm": 1.2365064912765042, "learning_rate": 5.431125605360473e-06, "loss": 0.18662681579589843, "step": 46090 }, { "epoch": 0.39856983510734884, "grad_norm": 21.770353613619424, "learning_rate": 5.431006230191042e-06, "loss": 0.10278472900390626, "step": 46095 }, { "epoch": 0.3986130686288921, "grad_norm": 45.992210413169175, "learning_rate": 5.430886843810023e-06, "loss": 0.2815521240234375, "step": 46100 }, { "epoch": 0.39865630215043535, "grad_norm": 8.386402319813266, "learning_rate": 5.43076744621797e-06, "loss": 0.224639892578125, "step": 46105 }, { "epoch": 0.39869953567197863, "grad_norm": 16.911532958845434, "learning_rate": 5.430648037415432e-06, "loss": 0.17242431640625, "step": 46110 }, { "epoch": 0.39874276919352186, "grad_norm": 1.0572571369593236, "learning_rate": 5.4305286174029605e-06, "loss": 0.1616363525390625, "step": 46115 }, { "epoch": 0.39878600271506515, "grad_norm": 2.7300663268044896, "learning_rate": 5.430409186181106e-06, "loss": 0.10707168579101563, "step": 46120 }, { "epoch": 0.39882923623660843, "grad_norm": 7.013238332132874, "learning_rate": 5.430289743750418e-06, "loss": 0.08150787353515625, "step": 46125 }, { "epoch": 0.39887246975815166, "grad_norm": 0.6103900174875785, "learning_rate": 5.430170290111449e-06, "loss": 0.08063278198242188, "step": 46130 }, { "epoch": 0.39891570327969494, "grad_norm": 17.627213592963503, "learning_rate": 5.430050825264749e-06, "loss": 0.27130126953125, "step": 46135 }, { "epoch": 0.39895893680123823, "grad_norm": 15.161507996925799, "learning_rate": 5.429931349210869e-06, "loss": 0.3660743713378906, "step": 46140 }, { "epoch": 0.39900217032278146, "grad_norm": 2.0649032026852123, "learning_rate": 5.4298118619503615e-06, "loss": 0.2959228515625, "step": 46145 }, { "epoch": 0.39904540384432474, "grad_norm": 16.27519893400018, "learning_rate": 5.429692363483776e-06, "loss": 0.3509735107421875, "step": 46150 }, { "epoch": 0.399088637365868, "grad_norm": 35.32304321051276, "learning_rate": 5.4295728538116636e-06, "loss": 0.3581298828125, "step": 46155 }, { "epoch": 0.39913187088741126, "grad_norm": 1.6832578731058183, "learning_rate": 5.429453332934576e-06, "loss": 0.09610443115234375, "step": 46160 }, { "epoch": 0.39917510440895454, "grad_norm": 0.21954680632535054, "learning_rate": 5.429333800853064e-06, "loss": 0.10103302001953125, "step": 46165 }, { "epoch": 0.39921833793049777, "grad_norm": 1.3949758629994329, "learning_rate": 5.42921425756768e-06, "loss": 0.04197998046875, "step": 46170 }, { "epoch": 0.39926157145204105, "grad_norm": 1.4912083855574754, "learning_rate": 5.429094703078974e-06, "loss": 0.03748397827148438, "step": 46175 }, { "epoch": 0.39930480497358434, "grad_norm": 0.9887415648828352, "learning_rate": 5.428975137387498e-06, "loss": 0.189996337890625, "step": 46180 }, { "epoch": 0.39934803849512757, "grad_norm": 27.361660494868197, "learning_rate": 5.428855560493804e-06, "loss": 0.10720329284667969, "step": 46185 }, { "epoch": 0.39939127201667085, "grad_norm": 2.6270590218740226, "learning_rate": 5.428735972398441e-06, "loss": 0.2372314453125, "step": 46190 }, { "epoch": 0.39943450553821414, "grad_norm": 15.868044036699896, "learning_rate": 5.428616373101964e-06, "loss": 0.15584716796875, "step": 46195 }, { "epoch": 0.39947773905975736, "grad_norm": 22.510984179637287, "learning_rate": 5.428496762604923e-06, "loss": 0.2340576171875, "step": 46200 }, { "epoch": 0.39952097258130065, "grad_norm": 1.2068170450230262, "learning_rate": 5.428377140907868e-06, "loss": 0.09659767150878906, "step": 46205 }, { "epoch": 0.3995642061028439, "grad_norm": 5.471607989422295, "learning_rate": 5.4282575080113526e-06, "loss": 0.09839935302734375, "step": 46210 }, { "epoch": 0.39960743962438716, "grad_norm": 7.61516289771854, "learning_rate": 5.428137863915929e-06, "loss": 0.38338623046875, "step": 46215 }, { "epoch": 0.39965067314593045, "grad_norm": 2.7392158411017014, "learning_rate": 5.428018208622147e-06, "loss": 0.25701370239257815, "step": 46220 }, { "epoch": 0.3996939066674737, "grad_norm": 29.460354054424034, "learning_rate": 5.42789854213056e-06, "loss": 0.17087364196777344, "step": 46225 }, { "epoch": 0.39973714018901696, "grad_norm": 0.6491843060326701, "learning_rate": 5.4277788644417196e-06, "loss": 0.12524795532226562, "step": 46230 }, { "epoch": 0.39978037371056024, "grad_norm": 12.05701425158566, "learning_rate": 5.427659175556178e-06, "loss": 0.14870147705078124, "step": 46235 }, { "epoch": 0.3998236072321035, "grad_norm": 1.3386771694770452, "learning_rate": 5.427539475474486e-06, "loss": 0.10686798095703125, "step": 46240 }, { "epoch": 0.39986684075364676, "grad_norm": 2.7338312211502225, "learning_rate": 5.427419764197198e-06, "loss": 0.04244308471679688, "step": 46245 }, { "epoch": 0.39991007427519, "grad_norm": 0.4493726493871415, "learning_rate": 5.427300041724863e-06, "loss": 0.2202838897705078, "step": 46250 }, { "epoch": 0.39995330779673327, "grad_norm": 16.293699787313248, "learning_rate": 5.427180308058035e-06, "loss": 0.1363311767578125, "step": 46255 }, { "epoch": 0.39999654131827656, "grad_norm": 3.625375119926678, "learning_rate": 5.427060563197266e-06, "loss": 0.3534088134765625, "step": 46260 }, { "epoch": 0.4000397748398198, "grad_norm": 0.2804929982337846, "learning_rate": 5.426940807143108e-06, "loss": 0.301423454284668, "step": 46265 }, { "epoch": 0.40008300836136307, "grad_norm": 4.9638837976057495, "learning_rate": 5.4268210398961135e-06, "loss": 0.26551971435546873, "step": 46270 }, { "epoch": 0.40012624188290635, "grad_norm": 10.730684775276377, "learning_rate": 5.426701261456835e-06, "loss": 0.16232452392578126, "step": 46275 }, { "epoch": 0.4001694754044496, "grad_norm": 1.0194072459451646, "learning_rate": 5.426581471825824e-06, "loss": 0.29117813110351565, "step": 46280 }, { "epoch": 0.40021270892599287, "grad_norm": 0.38521555010076586, "learning_rate": 5.426461671003635e-06, "loss": 0.09234237670898438, "step": 46285 }, { "epoch": 0.4002559424475361, "grad_norm": 21.33827352853395, "learning_rate": 5.426341858990818e-06, "loss": 0.16741485595703126, "step": 46290 }, { "epoch": 0.4002991759690794, "grad_norm": 5.043870671116529, "learning_rate": 5.426222035787927e-06, "loss": 0.244091796875, "step": 46295 }, { "epoch": 0.40034240949062266, "grad_norm": 1.5169824425797753, "learning_rate": 5.426102201395515e-06, "loss": 0.0352203369140625, "step": 46300 }, { "epoch": 0.4003856430121659, "grad_norm": 40.399513265007585, "learning_rate": 5.425982355814133e-06, "loss": 0.2563438415527344, "step": 46305 }, { "epoch": 0.4004288765337092, "grad_norm": 40.794598079453465, "learning_rate": 5.425862499044335e-06, "loss": 0.3467559814453125, "step": 46310 }, { "epoch": 0.40047211005525246, "grad_norm": 6.293910017932192, "learning_rate": 5.425742631086674e-06, "loss": 0.0358856201171875, "step": 46315 }, { "epoch": 0.4005153435767957, "grad_norm": 6.356807243407288, "learning_rate": 5.425622751941702e-06, "loss": 0.131109619140625, "step": 46320 }, { "epoch": 0.400558577098339, "grad_norm": 1.9918727433881245, "learning_rate": 5.425502861609972e-06, "loss": 0.07959990501403809, "step": 46325 }, { "epoch": 0.40060181061988226, "grad_norm": 13.797267850126124, "learning_rate": 5.4253829600920375e-06, "loss": 0.2010141372680664, "step": 46330 }, { "epoch": 0.4006450441414255, "grad_norm": 6.12682005699473, "learning_rate": 5.425263047388452e-06, "loss": 0.0391326904296875, "step": 46335 }, { "epoch": 0.4006882776629688, "grad_norm": 13.619534761141074, "learning_rate": 5.425143123499766e-06, "loss": 0.2107452392578125, "step": 46340 }, { "epoch": 0.400731511184512, "grad_norm": 27.951498091078527, "learning_rate": 5.425023188426535e-06, "loss": 0.20304412841796876, "step": 46345 }, { "epoch": 0.4007747447060553, "grad_norm": 11.082359894950647, "learning_rate": 5.424903242169311e-06, "loss": 0.23934173583984375, "step": 46350 }, { "epoch": 0.40081797822759857, "grad_norm": 10.262007542007401, "learning_rate": 5.4247832847286474e-06, "loss": 0.10382003784179687, "step": 46355 }, { "epoch": 0.4008612117491418, "grad_norm": 27.497477582045157, "learning_rate": 5.424663316105098e-06, "loss": 0.15228805541992188, "step": 46360 }, { "epoch": 0.4009044452706851, "grad_norm": 0.5393353245248748, "learning_rate": 5.424543336299215e-06, "loss": 0.08123416900634765, "step": 46365 }, { "epoch": 0.40094767879222837, "grad_norm": 0.09733608862136386, "learning_rate": 5.4244233453115535e-06, "loss": 0.2044053077697754, "step": 46370 }, { "epoch": 0.4009909123137716, "grad_norm": 29.05693707576983, "learning_rate": 5.424303343142665e-06, "loss": 0.19250907897949218, "step": 46375 }, { "epoch": 0.4010341458353149, "grad_norm": 26.178879020662098, "learning_rate": 5.424183329793104e-06, "loss": 0.318304443359375, "step": 46380 }, { "epoch": 0.4010773793568581, "grad_norm": 12.75224101454974, "learning_rate": 5.424063305263423e-06, "loss": 0.3979248046875, "step": 46385 }, { "epoch": 0.4011206128784014, "grad_norm": 25.486459945350745, "learning_rate": 5.423943269554177e-06, "loss": 0.2769327163696289, "step": 46390 }, { "epoch": 0.4011638463999447, "grad_norm": 7.277082604757436, "learning_rate": 5.423823222665919e-06, "loss": 0.0980010986328125, "step": 46395 }, { "epoch": 0.4012070799214879, "grad_norm": 14.976653297957746, "learning_rate": 5.423703164599202e-06, "loss": 0.1283721923828125, "step": 46400 }, { "epoch": 0.4012503134430312, "grad_norm": 1.0784226000382764, "learning_rate": 5.423583095354579e-06, "loss": 0.043310546875, "step": 46405 }, { "epoch": 0.4012935469645745, "grad_norm": 3.322500592175056, "learning_rate": 5.423463014932607e-06, "loss": 0.16479644775390626, "step": 46410 }, { "epoch": 0.4013367804861177, "grad_norm": 1.0477353315190094, "learning_rate": 5.423342923333836e-06, "loss": 0.01780853271484375, "step": 46415 }, { "epoch": 0.401380014007661, "grad_norm": 24.986976197161045, "learning_rate": 5.4232228205588226e-06, "loss": 0.20626678466796874, "step": 46420 }, { "epoch": 0.4014232475292042, "grad_norm": 4.186639245065592, "learning_rate": 5.42310270660812e-06, "loss": 0.217999267578125, "step": 46425 }, { "epoch": 0.4014664810507475, "grad_norm": 10.89289679212179, "learning_rate": 5.422982581482281e-06, "loss": 0.201654052734375, "step": 46430 }, { "epoch": 0.4015097145722908, "grad_norm": 11.003647430541081, "learning_rate": 5.422862445181861e-06, "loss": 0.1539947509765625, "step": 46435 }, { "epoch": 0.401552948093834, "grad_norm": 18.0484355350781, "learning_rate": 5.422742297707413e-06, "loss": 0.3699493408203125, "step": 46440 }, { "epoch": 0.4015961816153773, "grad_norm": 27.525007962501743, "learning_rate": 5.422622139059492e-06, "loss": 0.35947418212890625, "step": 46445 }, { "epoch": 0.4016394151369206, "grad_norm": 1.0149723972234053, "learning_rate": 5.4225019692386515e-06, "loss": 0.0286102294921875, "step": 46450 }, { "epoch": 0.4016826486584638, "grad_norm": 1.0864317764983478, "learning_rate": 5.422381788245446e-06, "loss": 0.0953094482421875, "step": 46455 }, { "epoch": 0.4017258821800071, "grad_norm": 29.760441893246774, "learning_rate": 5.42226159608043e-06, "loss": 0.16738967895507811, "step": 46460 }, { "epoch": 0.40176911570155033, "grad_norm": 13.279942713346216, "learning_rate": 5.422141392744157e-06, "loss": 0.164898681640625, "step": 46465 }, { "epoch": 0.4018123492230936, "grad_norm": 2.905326608734627, "learning_rate": 5.422021178237182e-06, "loss": 0.12596588134765624, "step": 46470 }, { "epoch": 0.4018555827446369, "grad_norm": 0.44251118638718556, "learning_rate": 5.42190095256006e-06, "loss": 0.41602630615234376, "step": 46475 }, { "epoch": 0.4018988162661801, "grad_norm": 0.08231741104972601, "learning_rate": 5.421780715713344e-06, "loss": 0.09201507568359375, "step": 46480 }, { "epoch": 0.4019420497877234, "grad_norm": 25.54723393617961, "learning_rate": 5.421660467697589e-06, "loss": 0.2618206024169922, "step": 46485 }, { "epoch": 0.4019852833092667, "grad_norm": 2.836222444302388, "learning_rate": 5.421540208513352e-06, "loss": 0.216424560546875, "step": 46490 }, { "epoch": 0.4020285168308099, "grad_norm": 5.9723973819289435, "learning_rate": 5.421419938161183e-06, "loss": 0.15759601593017578, "step": 46495 }, { "epoch": 0.4020717503523532, "grad_norm": 3.249533558548813, "learning_rate": 5.42129965664164e-06, "loss": 0.11658782958984375, "step": 46500 }, { "epoch": 0.4021149838738965, "grad_norm": 16.86182961414274, "learning_rate": 5.4211793639552775e-06, "loss": 0.14256134033203124, "step": 46505 }, { "epoch": 0.4021582173954397, "grad_norm": 7.217253855584849, "learning_rate": 5.42105906010265e-06, "loss": 0.1616180419921875, "step": 46510 }, { "epoch": 0.402201450916983, "grad_norm": 3.814867164553247, "learning_rate": 5.4209387450843114e-06, "loss": 0.2330810546875, "step": 46515 }, { "epoch": 0.40224468443852623, "grad_norm": 32.957029282888, "learning_rate": 5.420818418900818e-06, "loss": 0.20452880859375, "step": 46520 }, { "epoch": 0.4022879179600695, "grad_norm": 21.096247901597284, "learning_rate": 5.420698081552723e-06, "loss": 0.07206573486328124, "step": 46525 }, { "epoch": 0.4023311514816128, "grad_norm": 0.15464738863925634, "learning_rate": 5.420577733040582e-06, "loss": 0.2560295104980469, "step": 46530 }, { "epoch": 0.40237438500315603, "grad_norm": 3.4803005181551296, "learning_rate": 5.420457373364951e-06, "loss": 0.02405853271484375, "step": 46535 }, { "epoch": 0.4024176185246993, "grad_norm": 10.424998092060223, "learning_rate": 5.420337002526385e-06, "loss": 0.0722747802734375, "step": 46540 }, { "epoch": 0.4024608520462426, "grad_norm": 48.57844161740482, "learning_rate": 5.420216620525438e-06, "loss": 0.41383819580078124, "step": 46545 }, { "epoch": 0.40250408556778583, "grad_norm": 2.307082569307237, "learning_rate": 5.4200962273626664e-06, "loss": 0.4246063232421875, "step": 46550 }, { "epoch": 0.4025473190893291, "grad_norm": 15.379705142635217, "learning_rate": 5.4199758230386246e-06, "loss": 0.1512054443359375, "step": 46555 }, { "epoch": 0.40259055261087234, "grad_norm": 0.18815426591032008, "learning_rate": 5.419855407553868e-06, "loss": 0.05248870849609375, "step": 46560 }, { "epoch": 0.4026337861324156, "grad_norm": 19.455366085670384, "learning_rate": 5.419734980908951e-06, "loss": 0.34844970703125, "step": 46565 }, { "epoch": 0.4026770196539589, "grad_norm": 1.8066226357179649, "learning_rate": 5.419614543104431e-06, "loss": 0.16414947509765626, "step": 46570 }, { "epoch": 0.40272025317550214, "grad_norm": 0.4960023738225904, "learning_rate": 5.419494094140863e-06, "loss": 0.34815826416015627, "step": 46575 }, { "epoch": 0.4027634866970454, "grad_norm": 0.7328465527360514, "learning_rate": 5.419373634018802e-06, "loss": 0.2150665283203125, "step": 46580 }, { "epoch": 0.4028067202185887, "grad_norm": 9.697721890836409, "learning_rate": 5.4192531627388025e-06, "loss": 0.08266143798828125, "step": 46585 }, { "epoch": 0.40284995374013194, "grad_norm": 0.1622216532333457, "learning_rate": 5.419132680301422e-06, "loss": 0.0539703369140625, "step": 46590 }, { "epoch": 0.4028931872616752, "grad_norm": 0.2594809804795666, "learning_rate": 5.419012186707216e-06, "loss": 0.07011260986328124, "step": 46595 }, { "epoch": 0.40293642078321845, "grad_norm": 29.253074671459462, "learning_rate": 5.418891681956738e-06, "loss": 0.33680877685546873, "step": 46600 }, { "epoch": 0.40297965430476174, "grad_norm": 7.938236851931368, "learning_rate": 5.418771166050546e-06, "loss": 0.323822021484375, "step": 46605 }, { "epoch": 0.403022887826305, "grad_norm": 8.238657893380699, "learning_rate": 5.418650638989196e-06, "loss": 0.1124420166015625, "step": 46610 }, { "epoch": 0.40306612134784825, "grad_norm": 9.320754216423328, "learning_rate": 5.418530100773241e-06, "loss": 0.09958343505859375, "step": 46615 }, { "epoch": 0.40310935486939153, "grad_norm": 35.448264538929536, "learning_rate": 5.41840955140324e-06, "loss": 0.3522987365722656, "step": 46620 }, { "epoch": 0.4031525883909348, "grad_norm": 4.765384903791516, "learning_rate": 5.418288990879748e-06, "loss": 0.12818145751953125, "step": 46625 }, { "epoch": 0.40319582191247805, "grad_norm": 1.9549507636480217, "learning_rate": 5.418168419203321e-06, "loss": 0.1106689453125, "step": 46630 }, { "epoch": 0.40323905543402133, "grad_norm": 40.44520670170841, "learning_rate": 5.4180478363745155e-06, "loss": 0.59044189453125, "step": 46635 }, { "epoch": 0.40328228895556456, "grad_norm": 15.316876035069878, "learning_rate": 5.417927242393885e-06, "loss": 0.09636993408203125, "step": 46640 }, { "epoch": 0.40332552247710785, "grad_norm": 2.7940601621842056, "learning_rate": 5.417806637261989e-06, "loss": 0.201611328125, "step": 46645 }, { "epoch": 0.40336875599865113, "grad_norm": 16.568139539653075, "learning_rate": 5.417686020979382e-06, "loss": 0.158197021484375, "step": 46650 }, { "epoch": 0.40341198952019436, "grad_norm": 0.7258310501892766, "learning_rate": 5.4175653935466214e-06, "loss": 0.14575958251953125, "step": 46655 }, { "epoch": 0.40345522304173764, "grad_norm": 7.026017339371473, "learning_rate": 5.417444754964263e-06, "loss": 0.15977630615234376, "step": 46660 }, { "epoch": 0.4034984565632809, "grad_norm": 9.776144175938972, "learning_rate": 5.417324105232862e-06, "loss": 0.26663970947265625, "step": 46665 }, { "epoch": 0.40354169008482416, "grad_norm": 33.952616561912755, "learning_rate": 5.417203444352976e-06, "loss": 0.31346282958984373, "step": 46670 }, { "epoch": 0.40358492360636744, "grad_norm": 0.8241922632207959, "learning_rate": 5.417082772325162e-06, "loss": 0.111016845703125, "step": 46675 }, { "epoch": 0.40362815712791067, "grad_norm": 7.562028442806661, "learning_rate": 5.416962089149976e-06, "loss": 0.214324951171875, "step": 46680 }, { "epoch": 0.40367139064945395, "grad_norm": 19.99516205215316, "learning_rate": 5.416841394827973e-06, "loss": 0.1112213134765625, "step": 46685 }, { "epoch": 0.40371462417099724, "grad_norm": 13.978079080526605, "learning_rate": 5.416720689359712e-06, "loss": 0.0692291259765625, "step": 46690 }, { "epoch": 0.40375785769254047, "grad_norm": 3.6790765125075957, "learning_rate": 5.416599972745748e-06, "loss": 0.19815139770507811, "step": 46695 }, { "epoch": 0.40380109121408375, "grad_norm": 0.19681230662719024, "learning_rate": 5.4164792449866395e-06, "loss": 0.04152679443359375, "step": 46700 }, { "epoch": 0.40384432473562704, "grad_norm": 2.2018350266862474, "learning_rate": 5.416358506082942e-06, "loss": 0.1615020751953125, "step": 46705 }, { "epoch": 0.40388755825717026, "grad_norm": 36.8175375430685, "learning_rate": 5.416237756035212e-06, "loss": 0.3054718017578125, "step": 46710 }, { "epoch": 0.40393079177871355, "grad_norm": 3.6445259916607036, "learning_rate": 5.4161169948440075e-06, "loss": 0.15428543090820312, "step": 46715 }, { "epoch": 0.40397402530025683, "grad_norm": 24.096357522478, "learning_rate": 5.415996222509885e-06, "loss": 0.21138458251953124, "step": 46720 }, { "epoch": 0.40401725882180006, "grad_norm": 0.6797223564942741, "learning_rate": 5.415875439033401e-06, "loss": 0.4022193908691406, "step": 46725 }, { "epoch": 0.40406049234334335, "grad_norm": 5.130061056679608, "learning_rate": 5.415754644415112e-06, "loss": 0.06480941772460938, "step": 46730 }, { "epoch": 0.4041037258648866, "grad_norm": 3.6548034655789903, "learning_rate": 5.4156338386555785e-06, "loss": 0.088067626953125, "step": 46735 }, { "epoch": 0.40414695938642986, "grad_norm": 1.2984407242165432, "learning_rate": 5.415513021755354e-06, "loss": 0.12079200744628907, "step": 46740 }, { "epoch": 0.40419019290797314, "grad_norm": 28.665573982474093, "learning_rate": 5.415392193714996e-06, "loss": 0.40689544677734374, "step": 46745 }, { "epoch": 0.4042334264295164, "grad_norm": 16.624875378142118, "learning_rate": 5.415271354535064e-06, "loss": 0.07353515625, "step": 46750 }, { "epoch": 0.40427665995105966, "grad_norm": 1.4616463413382283, "learning_rate": 5.415150504216113e-06, "loss": 0.0741363525390625, "step": 46755 }, { "epoch": 0.40431989347260294, "grad_norm": 0.8663617437939605, "learning_rate": 5.415029642758701e-06, "loss": 0.0226654052734375, "step": 46760 }, { "epoch": 0.40436312699414617, "grad_norm": 0.5076329908020502, "learning_rate": 5.414908770163387e-06, "loss": 0.527798843383789, "step": 46765 }, { "epoch": 0.40440636051568946, "grad_norm": 13.73349468824997, "learning_rate": 5.414787886430726e-06, "loss": 0.183587646484375, "step": 46770 }, { "epoch": 0.4044495940372327, "grad_norm": 43.54539601950611, "learning_rate": 5.414666991561277e-06, "loss": 0.2636932373046875, "step": 46775 }, { "epoch": 0.40449282755877597, "grad_norm": 1.0452098482566898, "learning_rate": 5.414546085555597e-06, "loss": 0.0920074462890625, "step": 46780 }, { "epoch": 0.40453606108031925, "grad_norm": 16.79536913166361, "learning_rate": 5.414425168414244e-06, "loss": 0.3194988250732422, "step": 46785 }, { "epoch": 0.4045792946018625, "grad_norm": 26.75832907021791, "learning_rate": 5.414304240137776e-06, "loss": 0.1923236846923828, "step": 46790 }, { "epoch": 0.40462252812340577, "grad_norm": 2.864925573914559, "learning_rate": 5.414183300726749e-06, "loss": 0.08297119140625, "step": 46795 }, { "epoch": 0.40466576164494905, "grad_norm": 2.0163748306747418, "learning_rate": 5.414062350181722e-06, "loss": 0.26929168701171874, "step": 46800 }, { "epoch": 0.4047089951664923, "grad_norm": 35.19381260105806, "learning_rate": 5.413941388503254e-06, "loss": 0.393853759765625, "step": 46805 }, { "epoch": 0.40475222868803556, "grad_norm": 0.10347795929422426, "learning_rate": 5.413820415691901e-06, "loss": 0.15150508880615235, "step": 46810 }, { "epoch": 0.4047954622095788, "grad_norm": 18.4825347246553, "learning_rate": 5.413699431748222e-06, "loss": 0.11048660278320313, "step": 46815 }, { "epoch": 0.4048386957311221, "grad_norm": 3.301882937272807, "learning_rate": 5.413578436672774e-06, "loss": 0.0869232177734375, "step": 46820 }, { "epoch": 0.40488192925266536, "grad_norm": 0.34692533177916324, "learning_rate": 5.4134574304661145e-06, "loss": 0.08740997314453125, "step": 46825 }, { "epoch": 0.4049251627742086, "grad_norm": 0.5870216698904741, "learning_rate": 5.413336413128804e-06, "loss": 0.24383926391601562, "step": 46830 }, { "epoch": 0.4049683962957519, "grad_norm": 2.022606962988488, "learning_rate": 5.413215384661399e-06, "loss": 0.36318359375, "step": 46835 }, { "epoch": 0.40501162981729516, "grad_norm": 12.873492856224619, "learning_rate": 5.4130943450644575e-06, "loss": 0.126202392578125, "step": 46840 }, { "epoch": 0.4050548633388384, "grad_norm": 6.481415389525959, "learning_rate": 5.412973294338538e-06, "loss": 0.32647705078125, "step": 46845 }, { "epoch": 0.4050980968603817, "grad_norm": 11.353751020907428, "learning_rate": 5.4128522324842e-06, "loss": 0.16556396484375, "step": 46850 }, { "epoch": 0.4051413303819249, "grad_norm": 12.158843285839678, "learning_rate": 5.412731159501999e-06, "loss": 0.26386566162109376, "step": 46855 }, { "epoch": 0.4051845639034682, "grad_norm": 1.0598174327987913, "learning_rate": 5.412610075392496e-06, "loss": 0.0553924560546875, "step": 46860 }, { "epoch": 0.40522779742501147, "grad_norm": 1.7309237424404829, "learning_rate": 5.412488980156249e-06, "loss": 0.19538002014160155, "step": 46865 }, { "epoch": 0.4052710309465547, "grad_norm": 2.112265938783345, "learning_rate": 5.412367873793815e-06, "loss": 0.069158935546875, "step": 46870 }, { "epoch": 0.405314264468098, "grad_norm": 2.110484946415149, "learning_rate": 5.4122467563057546e-06, "loss": 0.152435302734375, "step": 46875 }, { "epoch": 0.40535749798964127, "grad_norm": 1.1625055331160554, "learning_rate": 5.412125627692624e-06, "loss": 0.335076904296875, "step": 46880 }, { "epoch": 0.4054007315111845, "grad_norm": 1.656720265032619, "learning_rate": 5.412004487954985e-06, "loss": 0.03736305236816406, "step": 46885 }, { "epoch": 0.4054439650327278, "grad_norm": 83.19440889843742, "learning_rate": 5.411883337093393e-06, "loss": 0.376080322265625, "step": 46890 }, { "epoch": 0.40548719855427107, "grad_norm": 0.2519392064101671, "learning_rate": 5.411762175108408e-06, "loss": 0.1297975540161133, "step": 46895 }, { "epoch": 0.4055304320758143, "grad_norm": 1.5280412537844392, "learning_rate": 5.411641002000589e-06, "loss": 0.2974082946777344, "step": 46900 }, { "epoch": 0.4055736655973576, "grad_norm": 4.60412127063727, "learning_rate": 5.411519817770496e-06, "loss": 0.10074462890625, "step": 46905 }, { "epoch": 0.4056168991189008, "grad_norm": 0.6466932180440713, "learning_rate": 5.411398622418686e-06, "loss": 0.15884170532226563, "step": 46910 }, { "epoch": 0.4056601326404441, "grad_norm": 38.96471254093137, "learning_rate": 5.4112774159457175e-06, "loss": 0.2534515380859375, "step": 46915 }, { "epoch": 0.4057033661619874, "grad_norm": 0.1558191450255482, "learning_rate": 5.411156198352152e-06, "loss": 0.040576171875, "step": 46920 }, { "epoch": 0.4057465996835306, "grad_norm": 2.3136839372917035, "learning_rate": 5.411034969638547e-06, "loss": 0.064703369140625, "step": 46925 }, { "epoch": 0.4057898332050739, "grad_norm": 1.9264577390274649, "learning_rate": 5.410913729805461e-06, "loss": 0.09429893493652344, "step": 46930 }, { "epoch": 0.4058330667266172, "grad_norm": 0.7811992823860967, "learning_rate": 5.410792478853455e-06, "loss": 0.1817718505859375, "step": 46935 }, { "epoch": 0.4058763002481604, "grad_norm": 9.103370964124082, "learning_rate": 5.4106712167830865e-06, "loss": 0.14377784729003906, "step": 46940 }, { "epoch": 0.4059195337697037, "grad_norm": 0.18172342384649198, "learning_rate": 5.410549943594915e-06, "loss": 0.49226856231689453, "step": 46945 }, { "epoch": 0.4059627672912469, "grad_norm": 38.90396963858381, "learning_rate": 5.410428659289501e-06, "loss": 0.1553070068359375, "step": 46950 }, { "epoch": 0.4060060008127902, "grad_norm": 3.5724588769296473, "learning_rate": 5.410307363867403e-06, "loss": 0.6404953002929688, "step": 46955 }, { "epoch": 0.4060492343343335, "grad_norm": 33.07161209875613, "learning_rate": 5.410186057329179e-06, "loss": 0.224456787109375, "step": 46960 }, { "epoch": 0.4060924678558767, "grad_norm": 13.93481251954223, "learning_rate": 5.410064739675391e-06, "loss": 0.16484375, "step": 46965 }, { "epoch": 0.40613570137742, "grad_norm": 25.641798938979402, "learning_rate": 5.409943410906598e-06, "loss": 0.11002349853515625, "step": 46970 }, { "epoch": 0.4061789348989633, "grad_norm": 1.1571729618365982, "learning_rate": 5.409822071023358e-06, "loss": 0.13892478942871095, "step": 46975 }, { "epoch": 0.4062221684205065, "grad_norm": 5.893952642493627, "learning_rate": 5.409700720026231e-06, "loss": 0.0968484878540039, "step": 46980 }, { "epoch": 0.4062654019420498, "grad_norm": 0.45419815286160375, "learning_rate": 5.409579357915778e-06, "loss": 0.087554931640625, "step": 46985 }, { "epoch": 0.406308635463593, "grad_norm": 8.692001350619657, "learning_rate": 5.409457984692558e-06, "loss": 0.1411407470703125, "step": 46990 }, { "epoch": 0.4063518689851363, "grad_norm": 17.734353456317358, "learning_rate": 5.409336600357131e-06, "loss": 0.19351806640625, "step": 46995 }, { "epoch": 0.4063951025066796, "grad_norm": 0.23914488189946262, "learning_rate": 5.409215204910056e-06, "loss": 0.06713218688964843, "step": 47000 }, { "epoch": 0.4064383360282228, "grad_norm": 33.262863161274964, "learning_rate": 5.409093798351893e-06, "loss": 0.223974609375, "step": 47005 }, { "epoch": 0.4064815695497661, "grad_norm": 60.646278410608396, "learning_rate": 5.408972380683203e-06, "loss": 0.3185791015625, "step": 47010 }, { "epoch": 0.4065248030713094, "grad_norm": 1.476958676686028, "learning_rate": 5.4088509519045444e-06, "loss": 0.0362548828125, "step": 47015 }, { "epoch": 0.4065680365928526, "grad_norm": 1.3821546873497494, "learning_rate": 5.408729512016479e-06, "loss": 0.08218536376953126, "step": 47020 }, { "epoch": 0.4066112701143959, "grad_norm": 27.617073573105063, "learning_rate": 5.408608061019565e-06, "loss": 0.13320693969726563, "step": 47025 }, { "epoch": 0.40665450363593914, "grad_norm": 27.67502463434984, "learning_rate": 5.408486598914365e-06, "loss": 0.3040252685546875, "step": 47030 }, { "epoch": 0.4066977371574824, "grad_norm": 9.175410184203802, "learning_rate": 5.408365125701437e-06, "loss": 0.33799285888671876, "step": 47035 }, { "epoch": 0.4067409706790257, "grad_norm": 0.4775172563975779, "learning_rate": 5.4082436413813406e-06, "loss": 0.059637451171875, "step": 47040 }, { "epoch": 0.40678420420056893, "grad_norm": 17.75011770446585, "learning_rate": 5.4081221459546384e-06, "loss": 0.09751663208007813, "step": 47045 }, { "epoch": 0.4068274377221122, "grad_norm": 11.69848882094574, "learning_rate": 5.4080006394218885e-06, "loss": 0.2147216796875, "step": 47050 }, { "epoch": 0.4068706712436555, "grad_norm": 31.160398703774444, "learning_rate": 5.407879121783654e-06, "loss": 0.173016357421875, "step": 47055 }, { "epoch": 0.40691390476519873, "grad_norm": 8.308386935768265, "learning_rate": 5.407757593040493e-06, "loss": 0.086102294921875, "step": 47060 }, { "epoch": 0.406957138286742, "grad_norm": 5.866221720359643, "learning_rate": 5.407636053192967e-06, "loss": 0.098529052734375, "step": 47065 }, { "epoch": 0.4070003718082853, "grad_norm": 9.793671350917561, "learning_rate": 5.407514502241635e-06, "loss": 0.1922119140625, "step": 47070 }, { "epoch": 0.40704360532982853, "grad_norm": 1.7900673928035993, "learning_rate": 5.407392940187061e-06, "loss": 0.2910675048828125, "step": 47075 }, { "epoch": 0.4070868388513718, "grad_norm": 7.2467825659940726, "learning_rate": 5.407271367029802e-06, "loss": 0.3473846435546875, "step": 47080 }, { "epoch": 0.40713007237291504, "grad_norm": 45.57915646430686, "learning_rate": 5.40714978277042e-06, "loss": 0.23738231658935546, "step": 47085 }, { "epoch": 0.4071733058944583, "grad_norm": 3.4299162397097045, "learning_rate": 5.407028187409478e-06, "loss": 0.13460922241210938, "step": 47090 }, { "epoch": 0.4072165394160016, "grad_norm": 2.697515854956884, "learning_rate": 5.406906580947532e-06, "loss": 0.245343017578125, "step": 47095 }, { "epoch": 0.40725977293754484, "grad_norm": 2.5231582180399417, "learning_rate": 5.406784963385147e-06, "loss": 0.2260498046875, "step": 47100 }, { "epoch": 0.4073030064590881, "grad_norm": 19.70341696473216, "learning_rate": 5.406663334722882e-06, "loss": 0.1533477783203125, "step": 47105 }, { "epoch": 0.4073462399806314, "grad_norm": 0.12172702805161864, "learning_rate": 5.406541694961298e-06, "loss": 0.12250537872314453, "step": 47110 }, { "epoch": 0.40738947350217464, "grad_norm": 0.7963603294984821, "learning_rate": 5.406420044100957e-06, "loss": 0.221038818359375, "step": 47115 }, { "epoch": 0.4074327070237179, "grad_norm": 6.638652799418068, "learning_rate": 5.406298382142419e-06, "loss": 0.5341110229492188, "step": 47120 }, { "epoch": 0.40747594054526115, "grad_norm": 1.0661584852090795, "learning_rate": 5.406176709086246e-06, "loss": 0.0557220458984375, "step": 47125 }, { "epoch": 0.40751917406680443, "grad_norm": 6.749210425190622, "learning_rate": 5.4060550249329984e-06, "loss": 0.1409912109375, "step": 47130 }, { "epoch": 0.4075624075883477, "grad_norm": 11.008277581749217, "learning_rate": 5.4059333296832365e-06, "loss": 0.15027618408203125, "step": 47135 }, { "epoch": 0.40760564110989095, "grad_norm": 19.68670025920023, "learning_rate": 5.405811623337525e-06, "loss": 0.18846664428710938, "step": 47140 }, { "epoch": 0.40764887463143423, "grad_norm": 29.752051413867918, "learning_rate": 5.405689905896422e-06, "loss": 0.2673135757446289, "step": 47145 }, { "epoch": 0.4076921081529775, "grad_norm": 27.9211181272703, "learning_rate": 5.405568177360488e-06, "loss": 0.1781219482421875, "step": 47150 }, { "epoch": 0.40773534167452075, "grad_norm": 0.8767816378106146, "learning_rate": 5.405446437730288e-06, "loss": 0.09688720703125, "step": 47155 }, { "epoch": 0.40777857519606403, "grad_norm": 1.1958466928796068, "learning_rate": 5.405324687006381e-06, "loss": 0.1281707763671875, "step": 47160 }, { "epoch": 0.40782180871760726, "grad_norm": 5.43829341888938, "learning_rate": 5.405202925189329e-06, "loss": 0.2475189208984375, "step": 47165 }, { "epoch": 0.40786504223915054, "grad_norm": 3.391108340325165, "learning_rate": 5.405081152279694e-06, "loss": 0.04311294555664062, "step": 47170 }, { "epoch": 0.40790827576069383, "grad_norm": 1.772905389327602, "learning_rate": 5.404959368278036e-06, "loss": 0.08461685180664062, "step": 47175 }, { "epoch": 0.40795150928223706, "grad_norm": 0.8849051088120595, "learning_rate": 5.40483757318492e-06, "loss": 0.2237548828125, "step": 47180 }, { "epoch": 0.40799474280378034, "grad_norm": 52.56139488539261, "learning_rate": 5.404715767000904e-06, "loss": 0.27599639892578126, "step": 47185 }, { "epoch": 0.4080379763253236, "grad_norm": 14.645637650225051, "learning_rate": 5.404593949726552e-06, "loss": 0.1742706298828125, "step": 47190 }, { "epoch": 0.40808120984686685, "grad_norm": 3.413186733551852, "learning_rate": 5.404472121362425e-06, "loss": 0.030873870849609374, "step": 47195 }, { "epoch": 0.40812444336841014, "grad_norm": 8.513377606727905, "learning_rate": 5.404350281909085e-06, "loss": 0.11714458465576172, "step": 47200 }, { "epoch": 0.40816767688995337, "grad_norm": 29.92088119153848, "learning_rate": 5.404228431367094e-06, "loss": 0.193157958984375, "step": 47205 }, { "epoch": 0.40821091041149665, "grad_norm": 8.237951322277544, "learning_rate": 5.404106569737014e-06, "loss": 0.21845855712890624, "step": 47210 }, { "epoch": 0.40825414393303994, "grad_norm": 36.0365977599554, "learning_rate": 5.4039846970194065e-06, "loss": 0.2978782653808594, "step": 47215 }, { "epoch": 0.40829737745458317, "grad_norm": 5.564307984921338, "learning_rate": 5.403862813214834e-06, "loss": 0.14238548278808594, "step": 47220 }, { "epoch": 0.40834061097612645, "grad_norm": 2.2773502749444305, "learning_rate": 5.403740918323859e-06, "loss": 0.026499176025390626, "step": 47225 }, { "epoch": 0.40838384449766973, "grad_norm": 27.076309470207917, "learning_rate": 5.403619012347044e-06, "loss": 0.390350341796875, "step": 47230 }, { "epoch": 0.40842707801921296, "grad_norm": 7.138022053831748, "learning_rate": 5.403497095284949e-06, "loss": 0.1684906005859375, "step": 47235 }, { "epoch": 0.40847031154075625, "grad_norm": 12.208062333063836, "learning_rate": 5.403375167138139e-06, "loss": 0.05475959777832031, "step": 47240 }, { "epoch": 0.4085135450622995, "grad_norm": 15.509086039225519, "learning_rate": 5.403253227907174e-06, "loss": 0.0600616455078125, "step": 47245 }, { "epoch": 0.40855677858384276, "grad_norm": 30.956454591937103, "learning_rate": 5.403131277592619e-06, "loss": 0.29643936157226564, "step": 47250 }, { "epoch": 0.40860001210538605, "grad_norm": 8.592283917742614, "learning_rate": 5.403009316195034e-06, "loss": 0.1628887176513672, "step": 47255 }, { "epoch": 0.4086432456269293, "grad_norm": 2.9847417238182103, "learning_rate": 5.402887343714981e-06, "loss": 0.17199935913085937, "step": 47260 }, { "epoch": 0.40868647914847256, "grad_norm": 4.511898149847598, "learning_rate": 5.402765360153026e-06, "loss": 0.14860305786132813, "step": 47265 }, { "epoch": 0.40872971267001584, "grad_norm": 12.15736834531255, "learning_rate": 5.4026433655097286e-06, "loss": 0.24230728149414063, "step": 47270 }, { "epoch": 0.4087729461915591, "grad_norm": 29.043721935811686, "learning_rate": 5.402521359785652e-06, "loss": 0.331597900390625, "step": 47275 }, { "epoch": 0.40881617971310236, "grad_norm": 14.869004727854684, "learning_rate": 5.40239934298136e-06, "loss": 0.14420547485351562, "step": 47280 }, { "epoch": 0.40885941323464564, "grad_norm": 4.578570228546097, "learning_rate": 5.402277315097415e-06, "loss": 0.26216583251953124, "step": 47285 }, { "epoch": 0.40890264675618887, "grad_norm": 7.159434192027318, "learning_rate": 5.402155276134378e-06, "loss": 0.2026595115661621, "step": 47290 }, { "epoch": 0.40894588027773215, "grad_norm": 61.64040883910072, "learning_rate": 5.402033226092812e-06, "loss": 0.285675048828125, "step": 47295 }, { "epoch": 0.4089891137992754, "grad_norm": 15.877047170690409, "learning_rate": 5.401911164973283e-06, "loss": 0.17875137329101562, "step": 47300 }, { "epoch": 0.40903234732081867, "grad_norm": 29.926476905773526, "learning_rate": 5.401789092776352e-06, "loss": 0.60703125, "step": 47305 }, { "epoch": 0.40907558084236195, "grad_norm": 4.4086321666925805, "learning_rate": 5.401667009502581e-06, "loss": 0.4241912841796875, "step": 47310 }, { "epoch": 0.4091188143639052, "grad_norm": 0.27404392080522616, "learning_rate": 5.401544915152535e-06, "loss": 0.4280120849609375, "step": 47315 }, { "epoch": 0.40916204788544847, "grad_norm": 8.612319147757855, "learning_rate": 5.401422809726775e-06, "loss": 0.316754150390625, "step": 47320 }, { "epoch": 0.40920528140699175, "grad_norm": 1.7293091787093, "learning_rate": 5.401300693225866e-06, "loss": 0.05440216064453125, "step": 47325 }, { "epoch": 0.409248514928535, "grad_norm": 9.645367106350323, "learning_rate": 5.40117856565037e-06, "loss": 0.18226165771484376, "step": 47330 }, { "epoch": 0.40929174845007826, "grad_norm": 6.467372982472913, "learning_rate": 5.40105642700085e-06, "loss": 0.297381591796875, "step": 47335 }, { "epoch": 0.4093349819716215, "grad_norm": 29.597898720623753, "learning_rate": 5.400934277277871e-06, "loss": 0.263934326171875, "step": 47340 }, { "epoch": 0.4093782154931648, "grad_norm": 13.545040988980993, "learning_rate": 5.400812116481994e-06, "loss": 0.25670166015625, "step": 47345 }, { "epoch": 0.40942144901470806, "grad_norm": 10.455294264762353, "learning_rate": 5.400689944613784e-06, "loss": 0.15720977783203124, "step": 47350 }, { "epoch": 0.4094646825362513, "grad_norm": 13.900204976373258, "learning_rate": 5.400567761673805e-06, "loss": 0.1726806640625, "step": 47355 }, { "epoch": 0.4095079160577946, "grad_norm": 1.1314843054592592, "learning_rate": 5.400445567662619e-06, "loss": 0.209564208984375, "step": 47360 }, { "epoch": 0.40955114957933786, "grad_norm": 27.341438158477413, "learning_rate": 5.400323362580791e-06, "loss": 0.414044189453125, "step": 47365 }, { "epoch": 0.4095943831008811, "grad_norm": 1.4223967849531083, "learning_rate": 5.400201146428883e-06, "loss": 0.561553955078125, "step": 47370 }, { "epoch": 0.40963761662242437, "grad_norm": 15.4723347848354, "learning_rate": 5.400078919207459e-06, "loss": 0.16759414672851564, "step": 47375 }, { "epoch": 0.4096808501439676, "grad_norm": 3.74214673447196, "learning_rate": 5.399956680917084e-06, "loss": 0.20857200622558594, "step": 47380 }, { "epoch": 0.4097240836655109, "grad_norm": 11.454103019540467, "learning_rate": 5.3998344315583205e-06, "loss": 0.123529052734375, "step": 47385 }, { "epoch": 0.40976731718705417, "grad_norm": 9.591375753370302, "learning_rate": 5.399712171131732e-06, "loss": 0.1727466583251953, "step": 47390 }, { "epoch": 0.4098105507085974, "grad_norm": 14.995713495377966, "learning_rate": 5.399589899637883e-06, "loss": 0.17964706420898438, "step": 47395 }, { "epoch": 0.4098537842301407, "grad_norm": 23.21006428797451, "learning_rate": 5.399467617077339e-06, "loss": 0.8502113342285156, "step": 47400 }, { "epoch": 0.40989701775168397, "grad_norm": 0.497713018951399, "learning_rate": 5.399345323450661e-06, "loss": 0.25684127807617185, "step": 47405 }, { "epoch": 0.4099402512732272, "grad_norm": 10.97427159445519, "learning_rate": 5.399223018758415e-06, "loss": 0.23380889892578124, "step": 47410 }, { "epoch": 0.4099834847947705, "grad_norm": 12.962387213431473, "learning_rate": 5.399100703001164e-06, "loss": 0.14286956787109376, "step": 47415 }, { "epoch": 0.4100267183163137, "grad_norm": 0.8968591490845063, "learning_rate": 5.3989783761794725e-06, "loss": 0.1847240447998047, "step": 47420 }, { "epoch": 0.410069951837857, "grad_norm": 9.317029413419263, "learning_rate": 5.398856038293905e-06, "loss": 0.20111465454101562, "step": 47425 }, { "epoch": 0.4101131853594003, "grad_norm": 28.214121479415866, "learning_rate": 5.3987336893450255e-06, "loss": 0.1786224365234375, "step": 47430 }, { "epoch": 0.4101564188809435, "grad_norm": 11.517481758034378, "learning_rate": 5.3986113293333974e-06, "loss": 0.4484466552734375, "step": 47435 }, { "epoch": 0.4101996524024868, "grad_norm": 0.4642777063286122, "learning_rate": 5.398488958259587e-06, "loss": 0.10297393798828125, "step": 47440 }, { "epoch": 0.4102428859240301, "grad_norm": 6.703087867120369, "learning_rate": 5.398366576124157e-06, "loss": 0.08568801879882812, "step": 47445 }, { "epoch": 0.4102861194455733, "grad_norm": 30.481136361662568, "learning_rate": 5.398244182927672e-06, "loss": 0.2982627868652344, "step": 47450 }, { "epoch": 0.4103293529671166, "grad_norm": 29.220423074950613, "learning_rate": 5.398121778670697e-06, "loss": 0.1785888671875, "step": 47455 }, { "epoch": 0.4103725864886599, "grad_norm": 4.124632436297828, "learning_rate": 5.397999363353796e-06, "loss": 0.044492340087890624, "step": 47460 }, { "epoch": 0.4104158200102031, "grad_norm": 14.003018455694983, "learning_rate": 5.397876936977534e-06, "loss": 0.09214649200439454, "step": 47465 }, { "epoch": 0.4104590535317464, "grad_norm": 18.41219279194675, "learning_rate": 5.397754499542475e-06, "loss": 0.16936492919921875, "step": 47470 }, { "epoch": 0.4105022870532896, "grad_norm": 2.030429512558372, "learning_rate": 5.397632051049186e-06, "loss": 0.13919219970703126, "step": 47475 }, { "epoch": 0.4105455205748329, "grad_norm": 31.166278280410694, "learning_rate": 5.397509591498227e-06, "loss": 0.35643310546875, "step": 47480 }, { "epoch": 0.4105887540963762, "grad_norm": 10.504223527543296, "learning_rate": 5.397387120890167e-06, "loss": 0.15203857421875, "step": 47485 }, { "epoch": 0.4106319876179194, "grad_norm": 19.906272737890326, "learning_rate": 5.397264639225569e-06, "loss": 0.2745639801025391, "step": 47490 }, { "epoch": 0.4106752211394627, "grad_norm": 13.679120716744823, "learning_rate": 5.397142146504999e-06, "loss": 0.07603759765625, "step": 47495 }, { "epoch": 0.410718454661006, "grad_norm": 1.1241325261632462, "learning_rate": 5.397019642729021e-06, "loss": 0.07782516479492188, "step": 47500 }, { "epoch": 0.4107616881825492, "grad_norm": 0.21584567672390856, "learning_rate": 5.3968971278982e-06, "loss": 0.16926727294921876, "step": 47505 }, { "epoch": 0.4108049217040925, "grad_norm": 16.377612269780865, "learning_rate": 5.3967746020131025e-06, "loss": 0.21140289306640625, "step": 47510 }, { "epoch": 0.4108481552256357, "grad_norm": 24.37752142401969, "learning_rate": 5.39665206507429e-06, "loss": 0.2982421875, "step": 47515 }, { "epoch": 0.410891388747179, "grad_norm": 8.669190013585004, "learning_rate": 5.396529517082332e-06, "loss": 0.0515625, "step": 47520 }, { "epoch": 0.4109346222687223, "grad_norm": 45.71097058030223, "learning_rate": 5.396406958037791e-06, "loss": 0.21686553955078125, "step": 47525 }, { "epoch": 0.4109778557902655, "grad_norm": 52.63875099751974, "learning_rate": 5.396284387941232e-06, "loss": 0.4069999694824219, "step": 47530 }, { "epoch": 0.4110210893118088, "grad_norm": 35.85636428775196, "learning_rate": 5.3961618067932225e-06, "loss": 0.18504180908203124, "step": 47535 }, { "epoch": 0.4110643228333521, "grad_norm": 2.057251310567495, "learning_rate": 5.396039214594325e-06, "loss": 0.056562042236328124, "step": 47540 }, { "epoch": 0.4111075563548953, "grad_norm": 26.298624566958463, "learning_rate": 5.395916611345107e-06, "loss": 0.4835479736328125, "step": 47545 }, { "epoch": 0.4111507898764386, "grad_norm": 6.940863978303095, "learning_rate": 5.395793997046134e-06, "loss": 0.0886138916015625, "step": 47550 }, { "epoch": 0.41119402339798183, "grad_norm": 10.886156907478524, "learning_rate": 5.395671371697971e-06, "loss": 0.15648193359375, "step": 47555 }, { "epoch": 0.4112372569195251, "grad_norm": 10.492541772141937, "learning_rate": 5.395548735301182e-06, "loss": 0.35703125, "step": 47560 }, { "epoch": 0.4112804904410684, "grad_norm": 11.261587598748049, "learning_rate": 5.395426087856334e-06, "loss": 0.11177978515625, "step": 47565 }, { "epoch": 0.41132372396261163, "grad_norm": 35.556903265075064, "learning_rate": 5.395303429363993e-06, "loss": 0.7778091430664062, "step": 47570 }, { "epoch": 0.4113669574841549, "grad_norm": 24.537530211487812, "learning_rate": 5.3951807598247244e-06, "loss": 0.26024856567382815, "step": 47575 }, { "epoch": 0.4114101910056982, "grad_norm": 0.16709900186109491, "learning_rate": 5.395058079239093e-06, "loss": 0.1090972900390625, "step": 47580 }, { "epoch": 0.41145342452724143, "grad_norm": 5.596910025156153, "learning_rate": 5.394935387607666e-06, "loss": 0.057568359375, "step": 47585 }, { "epoch": 0.4114966580487847, "grad_norm": 3.243865814323527, "learning_rate": 5.394812684931008e-06, "loss": 0.14328765869140625, "step": 47590 }, { "epoch": 0.41153989157032794, "grad_norm": 23.72579127987703, "learning_rate": 5.394689971209687e-06, "loss": 0.0805755615234375, "step": 47595 }, { "epoch": 0.4115831250918712, "grad_norm": 2.3436412310347654, "learning_rate": 5.394567246444266e-06, "loss": 0.2815589904785156, "step": 47600 }, { "epoch": 0.4116263586134145, "grad_norm": 3.400832669186298, "learning_rate": 5.394444510635314e-06, "loss": 0.3360809326171875, "step": 47605 }, { "epoch": 0.41166959213495774, "grad_norm": 0.7322364568433133, "learning_rate": 5.394321763783394e-06, "loss": 0.13366851806640626, "step": 47610 }, { "epoch": 0.411712825656501, "grad_norm": 23.004030746904267, "learning_rate": 5.394199005889074e-06, "loss": 0.24852256774902343, "step": 47615 }, { "epoch": 0.4117560591780443, "grad_norm": 24.594439283841158, "learning_rate": 5.39407623695292e-06, "loss": 0.17326202392578124, "step": 47620 }, { "epoch": 0.41179929269958754, "grad_norm": 12.127203286500412, "learning_rate": 5.3939534569754974e-06, "loss": 0.1088836669921875, "step": 47625 }, { "epoch": 0.4118425262211308, "grad_norm": 0.7896248592193905, "learning_rate": 5.393830665957374e-06, "loss": 0.12652664184570311, "step": 47630 }, { "epoch": 0.4118857597426741, "grad_norm": 1.222403573934527, "learning_rate": 5.393707863899114e-06, "loss": 0.12577247619628906, "step": 47635 }, { "epoch": 0.41192899326421734, "grad_norm": 21.47120275244497, "learning_rate": 5.393585050801286e-06, "loss": 0.10240631103515625, "step": 47640 }, { "epoch": 0.4119722267857606, "grad_norm": 17.919846590060224, "learning_rate": 5.393462226664453e-06, "loss": 0.41741943359375, "step": 47645 }, { "epoch": 0.41201546030730385, "grad_norm": 2.112829868070975, "learning_rate": 5.393339391489186e-06, "loss": 0.11145172119140626, "step": 47650 }, { "epoch": 0.41205869382884713, "grad_norm": 0.8411198105675619, "learning_rate": 5.3932165452760485e-06, "loss": 0.195941162109375, "step": 47655 }, { "epoch": 0.4121019273503904, "grad_norm": 10.099471895598025, "learning_rate": 5.3930936880256076e-06, "loss": 0.3707672119140625, "step": 47660 }, { "epoch": 0.41214516087193365, "grad_norm": 5.227441615549836, "learning_rate": 5.39297081973843e-06, "loss": 0.300927734375, "step": 47665 }, { "epoch": 0.41218839439347693, "grad_norm": 5.467688586800921, "learning_rate": 5.392847940415082e-06, "loss": 0.248553466796875, "step": 47670 }, { "epoch": 0.4122316279150202, "grad_norm": 5.6419322432773615, "learning_rate": 5.392725050056132e-06, "loss": 0.1516265869140625, "step": 47675 }, { "epoch": 0.41227486143656344, "grad_norm": 5.222218470040416, "learning_rate": 5.392602148662145e-06, "loss": 0.1569580078125, "step": 47680 }, { "epoch": 0.41231809495810673, "grad_norm": 7.752874959498346, "learning_rate": 5.392479236233688e-06, "loss": 0.21187515258789064, "step": 47685 }, { "epoch": 0.41236132847964996, "grad_norm": 0.30838114930731586, "learning_rate": 5.392356312771329e-06, "loss": 0.4886589050292969, "step": 47690 }, { "epoch": 0.41240456200119324, "grad_norm": 55.463278985702615, "learning_rate": 5.3922333782756325e-06, "loss": 0.35928192138671877, "step": 47695 }, { "epoch": 0.4124477955227365, "grad_norm": 8.15117000776534, "learning_rate": 5.392110432747167e-06, "loss": 0.09578704833984375, "step": 47700 }, { "epoch": 0.41249102904427976, "grad_norm": 20.261965577069855, "learning_rate": 5.391987476186501e-06, "loss": 0.17192535400390624, "step": 47705 }, { "epoch": 0.41253426256582304, "grad_norm": 5.887230188264224, "learning_rate": 5.3918645085942e-06, "loss": 0.13030548095703126, "step": 47710 }, { "epoch": 0.4125774960873663, "grad_norm": 7.342724705664473, "learning_rate": 5.39174152997083e-06, "loss": 0.06733512878417969, "step": 47715 }, { "epoch": 0.41262072960890955, "grad_norm": 0.41152374435797867, "learning_rate": 5.391618540316961e-06, "loss": 0.07345657348632813, "step": 47720 }, { "epoch": 0.41266396313045284, "grad_norm": 29.45504471555402, "learning_rate": 5.391495539633157e-06, "loss": 0.220416259765625, "step": 47725 }, { "epoch": 0.41270719665199607, "grad_norm": 36.187730109974865, "learning_rate": 5.3913725279199885e-06, "loss": 0.1420513153076172, "step": 47730 }, { "epoch": 0.41275043017353935, "grad_norm": 2.3658655574418095, "learning_rate": 5.3912495051780205e-06, "loss": 0.1229248046875, "step": 47735 }, { "epoch": 0.41279366369508264, "grad_norm": 1.732596727463453, "learning_rate": 5.39112647140782e-06, "loss": 0.05384902954101563, "step": 47740 }, { "epoch": 0.41283689721662586, "grad_norm": 1.0231590979718643, "learning_rate": 5.391003426609957e-06, "loss": 0.26710052490234376, "step": 47745 }, { "epoch": 0.41288013073816915, "grad_norm": 0.30895810849670546, "learning_rate": 5.390880370784997e-06, "loss": 0.36279830932617185, "step": 47750 }, { "epoch": 0.41292336425971243, "grad_norm": 5.908672451075166, "learning_rate": 5.390757303933507e-06, "loss": 0.04153900146484375, "step": 47755 }, { "epoch": 0.41296659778125566, "grad_norm": 5.722797975859519, "learning_rate": 5.390634226056058e-06, "loss": 0.13358612060546876, "step": 47760 }, { "epoch": 0.41300983130279895, "grad_norm": 10.615007696471425, "learning_rate": 5.390511137153213e-06, "loss": 0.10655593872070312, "step": 47765 }, { "epoch": 0.4130530648243422, "grad_norm": 0.5357247055111181, "learning_rate": 5.390388037225543e-06, "loss": 0.0636962890625, "step": 47770 }, { "epoch": 0.41309629834588546, "grad_norm": 1.6290592037652782, "learning_rate": 5.390264926273614e-06, "loss": 0.1471435546875, "step": 47775 }, { "epoch": 0.41313953186742874, "grad_norm": 15.00886676554821, "learning_rate": 5.390141804297994e-06, "loss": 0.3207651138305664, "step": 47780 }, { "epoch": 0.413182765388972, "grad_norm": 2.1945919004591947, "learning_rate": 5.390018671299252e-06, "loss": 0.142132568359375, "step": 47785 }, { "epoch": 0.41322599891051526, "grad_norm": 9.178243305450154, "learning_rate": 5.389895527277955e-06, "loss": 0.2260467529296875, "step": 47790 }, { "epoch": 0.41326923243205854, "grad_norm": 7.274687846310166, "learning_rate": 5.38977237223467e-06, "loss": 0.2301158905029297, "step": 47795 }, { "epoch": 0.41331246595360177, "grad_norm": 5.463590259757079, "learning_rate": 5.389649206169967e-06, "loss": 0.2890380859375, "step": 47800 }, { "epoch": 0.41335569947514506, "grad_norm": 0.440105560561431, "learning_rate": 5.389526029084413e-06, "loss": 0.1617431640625, "step": 47805 }, { "epoch": 0.41339893299668834, "grad_norm": 19.814785617358407, "learning_rate": 5.389402840978576e-06, "loss": 0.23155975341796875, "step": 47810 }, { "epoch": 0.41344216651823157, "grad_norm": 36.398519975983994, "learning_rate": 5.3892796418530235e-06, "loss": 0.6833076477050781, "step": 47815 }, { "epoch": 0.41348540003977485, "grad_norm": 1.7691269995277021, "learning_rate": 5.389156431708325e-06, "loss": 0.09036369323730468, "step": 47820 }, { "epoch": 0.4135286335613181, "grad_norm": 18.396790877288115, "learning_rate": 5.389033210545049e-06, "loss": 0.150384521484375, "step": 47825 }, { "epoch": 0.41357186708286137, "grad_norm": 9.651094696702762, "learning_rate": 5.388909978363762e-06, "loss": 0.35456008911132814, "step": 47830 }, { "epoch": 0.41361510060440465, "grad_norm": 12.756941239432171, "learning_rate": 5.388786735165033e-06, "loss": 0.15990982055664063, "step": 47835 }, { "epoch": 0.4136583341259479, "grad_norm": 3.8252946582041156, "learning_rate": 5.388663480949431e-06, "loss": 0.17564544677734376, "step": 47840 }, { "epoch": 0.41370156764749116, "grad_norm": 4.029194860907862, "learning_rate": 5.388540215717524e-06, "loss": 0.06718940734863281, "step": 47845 }, { "epoch": 0.41374480116903445, "grad_norm": 9.896740413284837, "learning_rate": 5.388416939469881e-06, "loss": 0.37871685028076174, "step": 47850 }, { "epoch": 0.4137880346905777, "grad_norm": 0.7282580785621091, "learning_rate": 5.38829365220707e-06, "loss": 0.1592071533203125, "step": 47855 }, { "epoch": 0.41383126821212096, "grad_norm": 0.9548353377389004, "learning_rate": 5.38817035392966e-06, "loss": 0.24706268310546875, "step": 47860 }, { "epoch": 0.4138745017336642, "grad_norm": 8.673580710919772, "learning_rate": 5.388047044638218e-06, "loss": 0.0751708984375, "step": 47865 }, { "epoch": 0.4139177352552075, "grad_norm": 20.574835387533135, "learning_rate": 5.387923724333316e-06, "loss": 0.502490234375, "step": 47870 }, { "epoch": 0.41396096877675076, "grad_norm": 5.195394816018146, "learning_rate": 5.387800393015519e-06, "loss": 0.04202117919921875, "step": 47875 }, { "epoch": 0.414004202298294, "grad_norm": 0.9724516573322455, "learning_rate": 5.387677050685398e-06, "loss": 0.17160491943359374, "step": 47880 }, { "epoch": 0.4140474358198373, "grad_norm": 4.9159086874427915, "learning_rate": 5.387553697343522e-06, "loss": 0.25988616943359377, "step": 47885 }, { "epoch": 0.41409066934138056, "grad_norm": 0.40239452004443205, "learning_rate": 5.387430332990459e-06, "loss": 0.10889739990234375, "step": 47890 }, { "epoch": 0.4141339028629238, "grad_norm": 7.085107476296256, "learning_rate": 5.387306957626778e-06, "loss": 0.5761199951171875, "step": 47895 }, { "epoch": 0.41417713638446707, "grad_norm": 10.202010609664725, "learning_rate": 5.387183571253049e-06, "loss": 0.09440574645996094, "step": 47900 }, { "epoch": 0.4142203699060103, "grad_norm": 4.71721793755036, "learning_rate": 5.38706017386984e-06, "loss": 0.19040069580078126, "step": 47905 }, { "epoch": 0.4142636034275536, "grad_norm": 0.8464236399174081, "learning_rate": 5.38693676547772e-06, "loss": 0.30943756103515624, "step": 47910 }, { "epoch": 0.41430683694909687, "grad_norm": 13.691288476653893, "learning_rate": 5.3868133460772585e-06, "loss": 0.23440399169921874, "step": 47915 }, { "epoch": 0.4143500704706401, "grad_norm": 13.226581031924509, "learning_rate": 5.386689915669025e-06, "loss": 0.03840484619140625, "step": 47920 }, { "epoch": 0.4143933039921834, "grad_norm": 4.023451073494493, "learning_rate": 5.3865664742535885e-06, "loss": 0.18244800567626954, "step": 47925 }, { "epoch": 0.41443653751372667, "grad_norm": 28.121325071749425, "learning_rate": 5.386443021831519e-06, "loss": 0.2663414001464844, "step": 47930 }, { "epoch": 0.4144797710352699, "grad_norm": 6.2385673515485545, "learning_rate": 5.386319558403384e-06, "loss": 0.1500091552734375, "step": 47935 }, { "epoch": 0.4145230045568132, "grad_norm": 10.142458389667798, "learning_rate": 5.386196083969754e-06, "loss": 0.06174545288085938, "step": 47940 }, { "epoch": 0.4145662380783564, "grad_norm": 0.9065364573092537, "learning_rate": 5.3860725985312e-06, "loss": 0.061783218383789064, "step": 47945 }, { "epoch": 0.4146094715998997, "grad_norm": 5.398015044158015, "learning_rate": 5.385949102088289e-06, "loss": 0.321636962890625, "step": 47950 }, { "epoch": 0.414652705121443, "grad_norm": 10.686618680573746, "learning_rate": 5.3858255946415915e-06, "loss": 0.11916580200195312, "step": 47955 }, { "epoch": 0.4146959386429862, "grad_norm": 19.796294131854403, "learning_rate": 5.385702076191677e-06, "loss": 0.133514404296875, "step": 47960 }, { "epoch": 0.4147391721645295, "grad_norm": 25.490149764870765, "learning_rate": 5.3855785467391155e-06, "loss": 0.226934814453125, "step": 47965 }, { "epoch": 0.4147824056860728, "grad_norm": 10.478009848061122, "learning_rate": 5.385455006284477e-06, "loss": 0.15075340270996093, "step": 47970 }, { "epoch": 0.414825639207616, "grad_norm": 17.374809853470467, "learning_rate": 5.38533145482833e-06, "loss": 0.11982421875, "step": 47975 }, { "epoch": 0.4148688727291593, "grad_norm": 1.8507359781405268, "learning_rate": 5.385207892371246e-06, "loss": 0.13263168334960937, "step": 47980 }, { "epoch": 0.4149121062507025, "grad_norm": 40.20243208985112, "learning_rate": 5.385084318913794e-06, "loss": 0.30587158203125, "step": 47985 }, { "epoch": 0.4149553397722458, "grad_norm": 9.135492050764588, "learning_rate": 5.3849607344565425e-06, "loss": 0.0533905029296875, "step": 47990 }, { "epoch": 0.4149985732937891, "grad_norm": 0.3805567269376627, "learning_rate": 5.384837139000064e-06, "loss": 0.2799468994140625, "step": 47995 }, { "epoch": 0.4150418068153323, "grad_norm": 0.7245610846126471, "learning_rate": 5.384713532544927e-06, "loss": 0.02008514404296875, "step": 48000 }, { "epoch": 0.4150850403368756, "grad_norm": 7.675853067104411, "learning_rate": 5.384589915091702e-06, "loss": 0.06804580688476562, "step": 48005 }, { "epoch": 0.4151282738584189, "grad_norm": 68.49901303170691, "learning_rate": 5.384466286640959e-06, "loss": 0.36416091918945315, "step": 48010 }, { "epoch": 0.4151715073799621, "grad_norm": 5.144576443016999, "learning_rate": 5.384342647193268e-06, "loss": 0.3541839599609375, "step": 48015 }, { "epoch": 0.4152147409015054, "grad_norm": 33.492380449466815, "learning_rate": 5.3842189967492e-06, "loss": 0.5456546783447266, "step": 48020 }, { "epoch": 0.4152579744230487, "grad_norm": 23.18558983352477, "learning_rate": 5.384095335309323e-06, "loss": 0.20009765625, "step": 48025 }, { "epoch": 0.4153012079445919, "grad_norm": 0.24094977742599866, "learning_rate": 5.383971662874211e-06, "loss": 0.1589996337890625, "step": 48030 }, { "epoch": 0.4153444414661352, "grad_norm": 15.283078326383325, "learning_rate": 5.383847979444432e-06, "loss": 0.18464202880859376, "step": 48035 }, { "epoch": 0.4153876749876784, "grad_norm": 22.14125230778901, "learning_rate": 5.3837242850205555e-06, "loss": 0.214691162109375, "step": 48040 }, { "epoch": 0.4154309085092217, "grad_norm": 2.946387268523239, "learning_rate": 5.3836005796031545e-06, "loss": 0.0990936279296875, "step": 48045 }, { "epoch": 0.415474142030765, "grad_norm": 66.72295050451172, "learning_rate": 5.383476863192797e-06, "loss": 0.31996307373046873, "step": 48050 }, { "epoch": 0.4155173755523082, "grad_norm": 0.12863559926370746, "learning_rate": 5.383353135790056e-06, "loss": 0.07201156616210938, "step": 48055 }, { "epoch": 0.4155606090738515, "grad_norm": 16.127927195421, "learning_rate": 5.383229397395501e-06, "loss": 0.2562744140625, "step": 48060 }, { "epoch": 0.4156038425953948, "grad_norm": 7.281042635850806, "learning_rate": 5.383105648009702e-06, "loss": 0.08704681396484375, "step": 48065 }, { "epoch": 0.415647076116938, "grad_norm": 12.249041962661815, "learning_rate": 5.38298188763323e-06, "loss": 0.07349853515625, "step": 48070 }, { "epoch": 0.4156903096384813, "grad_norm": 2.3312195331404433, "learning_rate": 5.382858116266657e-06, "loss": 0.12617111206054688, "step": 48075 }, { "epoch": 0.41573354316002453, "grad_norm": 0.2939704337644901, "learning_rate": 5.382734333910552e-06, "loss": 0.2038543701171875, "step": 48080 }, { "epoch": 0.4157767766815678, "grad_norm": 6.128229727861244, "learning_rate": 5.382610540565487e-06, "loss": 0.094525146484375, "step": 48085 }, { "epoch": 0.4158200102031111, "grad_norm": 1.3905310696246733, "learning_rate": 5.3824867362320335e-06, "loss": 0.0238372802734375, "step": 48090 }, { "epoch": 0.41586324372465433, "grad_norm": 4.27223669119259, "learning_rate": 5.382362920910761e-06, "loss": 0.070660400390625, "step": 48095 }, { "epoch": 0.4159064772461976, "grad_norm": 10.0032184127061, "learning_rate": 5.382239094602242e-06, "loss": 0.1001434326171875, "step": 48100 }, { "epoch": 0.4159497107677409, "grad_norm": 1.633136040919863, "learning_rate": 5.3821152573070455e-06, "loss": 0.1045166015625, "step": 48105 }, { "epoch": 0.41599294428928413, "grad_norm": 4.984756964191833, "learning_rate": 5.381991409025745e-06, "loss": 0.1630615234375, "step": 48110 }, { "epoch": 0.4160361778108274, "grad_norm": 29.28263730281253, "learning_rate": 5.38186754975891e-06, "loss": 0.1477264404296875, "step": 48115 }, { "epoch": 0.41607941133237064, "grad_norm": 0.33587625516712954, "learning_rate": 5.381743679507112e-06, "loss": 0.04154815673828125, "step": 48120 }, { "epoch": 0.4161226448539139, "grad_norm": 24.52046901414856, "learning_rate": 5.381619798270924e-06, "loss": 0.425836181640625, "step": 48125 }, { "epoch": 0.4161658783754572, "grad_norm": 17.64726039608274, "learning_rate": 5.3814959060509145e-06, "loss": 0.24353485107421874, "step": 48130 }, { "epoch": 0.41620911189700044, "grad_norm": 9.391935811462066, "learning_rate": 5.381372002847657e-06, "loss": 0.09025115966796875, "step": 48135 }, { "epoch": 0.4162523454185437, "grad_norm": 5.230728158759349, "learning_rate": 5.381248088661723e-06, "loss": 0.05433578491210937, "step": 48140 }, { "epoch": 0.416295578940087, "grad_norm": 21.009044454161376, "learning_rate": 5.381124163493682e-06, "loss": 0.0728668212890625, "step": 48145 }, { "epoch": 0.41633881246163024, "grad_norm": 72.28570399347501, "learning_rate": 5.381000227344106e-06, "loss": 0.374072265625, "step": 48150 }, { "epoch": 0.4163820459831735, "grad_norm": 34.17052332564619, "learning_rate": 5.380876280213568e-06, "loss": 0.262158203125, "step": 48155 }, { "epoch": 0.41642527950471675, "grad_norm": 0.5763978902974357, "learning_rate": 5.380752322102641e-06, "loss": 0.07277297973632812, "step": 48160 }, { "epoch": 0.41646851302626003, "grad_norm": 4.827671768828529, "learning_rate": 5.380628353011893e-06, "loss": 0.10288848876953124, "step": 48165 }, { "epoch": 0.4165117465478033, "grad_norm": 1.4843620428808932, "learning_rate": 5.380504372941897e-06, "loss": 0.10431365966796875, "step": 48170 }, { "epoch": 0.41655498006934655, "grad_norm": 14.329448541886887, "learning_rate": 5.380380381893226e-06, "loss": 0.12571029663085936, "step": 48175 }, { "epoch": 0.41659821359088983, "grad_norm": 4.209252324787454, "learning_rate": 5.3802563798664506e-06, "loss": 0.23634109497070313, "step": 48180 }, { "epoch": 0.4166414471124331, "grad_norm": 3.391286684044985, "learning_rate": 5.380132366862143e-06, "loss": 0.1631256103515625, "step": 48185 }, { "epoch": 0.41668468063397635, "grad_norm": 2.7898672734541163, "learning_rate": 5.3800083428808754e-06, "loss": 0.17276229858398437, "step": 48190 }, { "epoch": 0.41672791415551963, "grad_norm": 4.544900047257715, "learning_rate": 5.379884307923219e-06, "loss": 0.11847991943359375, "step": 48195 }, { "epoch": 0.4167711476770629, "grad_norm": 2.851582777794088, "learning_rate": 5.379760261989747e-06, "loss": 0.1988494873046875, "step": 48200 }, { "epoch": 0.41681438119860614, "grad_norm": 8.922360272547396, "learning_rate": 5.379636205081031e-06, "loss": 0.136016845703125, "step": 48205 }, { "epoch": 0.4168576147201494, "grad_norm": 26.571935659831592, "learning_rate": 5.379512137197644e-06, "loss": 0.16607513427734374, "step": 48210 }, { "epoch": 0.41690084824169266, "grad_norm": 2.6026256908447434, "learning_rate": 5.379388058340156e-06, "loss": 0.25580835342407227, "step": 48215 }, { "epoch": 0.41694408176323594, "grad_norm": 46.97625507672296, "learning_rate": 5.3792639685091405e-06, "loss": 0.48223876953125, "step": 48220 }, { "epoch": 0.4169873152847792, "grad_norm": 12.688875801431257, "learning_rate": 5.379139867705171e-06, "loss": 0.0646820068359375, "step": 48225 }, { "epoch": 0.41703054880632245, "grad_norm": 12.276256160158171, "learning_rate": 5.3790157559288175e-06, "loss": 0.26214599609375, "step": 48230 }, { "epoch": 0.41707378232786574, "grad_norm": 15.710996570614586, "learning_rate": 5.378891633180654e-06, "loss": 0.1069305419921875, "step": 48235 }, { "epoch": 0.417117015849409, "grad_norm": 24.4350090553633, "learning_rate": 5.378767499461253e-06, "loss": 0.2550201416015625, "step": 48240 }, { "epoch": 0.41716024937095225, "grad_norm": 2.201772348946407, "learning_rate": 5.378643354771185e-06, "loss": 0.23464508056640626, "step": 48245 }, { "epoch": 0.41720348289249554, "grad_norm": 5.29063081895081, "learning_rate": 5.378519199111025e-06, "loss": 0.1331390380859375, "step": 48250 }, { "epoch": 0.41724671641403877, "grad_norm": 0.7825865002700703, "learning_rate": 5.378395032481345e-06, "loss": 0.18846893310546875, "step": 48255 }, { "epoch": 0.41728994993558205, "grad_norm": 0.6532794968915692, "learning_rate": 5.378270854882716e-06, "loss": 0.24623565673828124, "step": 48260 }, { "epoch": 0.41733318345712533, "grad_norm": 19.66249892544041, "learning_rate": 5.378146666315713e-06, "loss": 0.5084556579589844, "step": 48265 }, { "epoch": 0.41737641697866856, "grad_norm": 2.7498509478543673, "learning_rate": 5.3780224667809085e-06, "loss": 0.18785018920898439, "step": 48270 }, { "epoch": 0.41741965050021185, "grad_norm": 3.502128924844685, "learning_rate": 5.377898256278872e-06, "loss": 0.03795166015625, "step": 48275 }, { "epoch": 0.41746288402175513, "grad_norm": 5.673253211569263, "learning_rate": 5.377774034810181e-06, "loss": 0.1427215576171875, "step": 48280 }, { "epoch": 0.41750611754329836, "grad_norm": 0.7673827552937642, "learning_rate": 5.377649802375406e-06, "loss": 0.0589080810546875, "step": 48285 }, { "epoch": 0.41754935106484165, "grad_norm": 0.674365246058974, "learning_rate": 5.377525558975119e-06, "loss": 0.08554840087890625, "step": 48290 }, { "epoch": 0.4175925845863849, "grad_norm": 2.393149502111299, "learning_rate": 5.377401304609896e-06, "loss": 0.117877197265625, "step": 48295 }, { "epoch": 0.41763581810792816, "grad_norm": 14.20949225753329, "learning_rate": 5.377277039280307e-06, "loss": 0.15269241333007813, "step": 48300 }, { "epoch": 0.41767905162947144, "grad_norm": 0.5250489398982541, "learning_rate": 5.377152762986927e-06, "loss": 0.21032791137695311, "step": 48305 }, { "epoch": 0.41772228515101467, "grad_norm": 0.4918934694166296, "learning_rate": 5.3770284757303275e-06, "loss": 0.0517669677734375, "step": 48310 }, { "epoch": 0.41776551867255796, "grad_norm": 44.34938704348792, "learning_rate": 5.376904177511084e-06, "loss": 0.5942306041717529, "step": 48315 }, { "epoch": 0.41780875219410124, "grad_norm": 2.259898179353893, "learning_rate": 5.376779868329767e-06, "loss": 0.17299289703369142, "step": 48320 }, { "epoch": 0.41785198571564447, "grad_norm": 17.406118818152184, "learning_rate": 5.376655548186953e-06, "loss": 0.09111251831054687, "step": 48325 }, { "epoch": 0.41789521923718775, "grad_norm": 0.32932512364706557, "learning_rate": 5.376531217083212e-06, "loss": 0.12773284912109376, "step": 48330 }, { "epoch": 0.417938452758731, "grad_norm": 8.618201803097834, "learning_rate": 5.3764068750191204e-06, "loss": 0.096868896484375, "step": 48335 }, { "epoch": 0.41798168628027427, "grad_norm": 33.25484235421268, "learning_rate": 5.3762825219952486e-06, "loss": 0.32825164794921874, "step": 48340 }, { "epoch": 0.41802491980181755, "grad_norm": 15.534281897138548, "learning_rate": 5.376158158012173e-06, "loss": 0.0994333267211914, "step": 48345 }, { "epoch": 0.4180681533233608, "grad_norm": 4.316786934235023, "learning_rate": 5.376033783070466e-06, "loss": 0.11125373840332031, "step": 48350 }, { "epoch": 0.41811138684490406, "grad_norm": 0.8025580871133527, "learning_rate": 5.375909397170701e-06, "loss": 0.1758575439453125, "step": 48355 }, { "epoch": 0.41815462036644735, "grad_norm": 14.318562779685227, "learning_rate": 5.375785000313451e-06, "loss": 0.3245361328125, "step": 48360 }, { "epoch": 0.4181978538879906, "grad_norm": 1.2944936683803876, "learning_rate": 5.3756605924992914e-06, "loss": 0.25786285400390624, "step": 48365 }, { "epoch": 0.41824108740953386, "grad_norm": 28.1522969750525, "learning_rate": 5.375536173728795e-06, "loss": 0.23489990234375, "step": 48370 }, { "epoch": 0.41828432093107715, "grad_norm": 1.0497266104511909, "learning_rate": 5.375411744002536e-06, "loss": 0.12333450317382813, "step": 48375 }, { "epoch": 0.4183275544526204, "grad_norm": 5.499149887917257, "learning_rate": 5.375287303321087e-06, "loss": 0.06219825744628906, "step": 48380 }, { "epoch": 0.41837078797416366, "grad_norm": 1.2742000636076893, "learning_rate": 5.375162851685023e-06, "loss": 0.17187881469726562, "step": 48385 }, { "epoch": 0.4184140214957069, "grad_norm": 2.46806419401039, "learning_rate": 5.375038389094918e-06, "loss": 0.08227462768554687, "step": 48390 }, { "epoch": 0.4184572550172502, "grad_norm": 5.126777761736869, "learning_rate": 5.374913915551346e-06, "loss": 0.15431251525878906, "step": 48395 }, { "epoch": 0.41850048853879346, "grad_norm": 9.424535970657516, "learning_rate": 5.374789431054881e-06, "loss": 0.333990478515625, "step": 48400 }, { "epoch": 0.4185437220603367, "grad_norm": 8.985425205384834, "learning_rate": 5.374664935606096e-06, "loss": 0.15320892333984376, "step": 48405 }, { "epoch": 0.41858695558187997, "grad_norm": 1.5238700946008017, "learning_rate": 5.374540429205568e-06, "loss": 0.0557708740234375, "step": 48410 }, { "epoch": 0.41863018910342326, "grad_norm": 6.064510652458811, "learning_rate": 5.374415911853867e-06, "loss": 0.13660049438476562, "step": 48415 }, { "epoch": 0.4186734226249665, "grad_norm": 1.4219690807030332, "learning_rate": 5.374291383551571e-06, "loss": 0.34859619140625, "step": 48420 }, { "epoch": 0.41871665614650977, "grad_norm": 8.971896365189453, "learning_rate": 5.374166844299254e-06, "loss": 0.32407379150390625, "step": 48425 }, { "epoch": 0.418759889668053, "grad_norm": 7.40214070853429, "learning_rate": 5.374042294097487e-06, "loss": 0.047429656982421874, "step": 48430 }, { "epoch": 0.4188031231895963, "grad_norm": 2.2031553109711326, "learning_rate": 5.373917732946848e-06, "loss": 0.2622528076171875, "step": 48435 }, { "epoch": 0.41884635671113957, "grad_norm": 14.850980964066823, "learning_rate": 5.3737931608479095e-06, "loss": 0.3306671142578125, "step": 48440 }, { "epoch": 0.4188895902326828, "grad_norm": 34.91127784466634, "learning_rate": 5.373668577801247e-06, "loss": 0.43642005920410154, "step": 48445 }, { "epoch": 0.4189328237542261, "grad_norm": 2.864926735442042, "learning_rate": 5.373543983807435e-06, "loss": 0.1025634765625, "step": 48450 }, { "epoch": 0.41897605727576936, "grad_norm": 7.599264760069823, "learning_rate": 5.373419378867048e-06, "loss": 0.06829605102539063, "step": 48455 }, { "epoch": 0.4190192907973126, "grad_norm": 1.0087874708507447, "learning_rate": 5.373294762980661e-06, "loss": 0.03948211669921875, "step": 48460 }, { "epoch": 0.4190625243188559, "grad_norm": 28.912613788131726, "learning_rate": 5.373170136148847e-06, "loss": 0.07109451293945312, "step": 48465 }, { "epoch": 0.4191057578403991, "grad_norm": 14.448857801627966, "learning_rate": 5.3730454983721835e-06, "loss": 0.07188644409179687, "step": 48470 }, { "epoch": 0.4191489913619424, "grad_norm": 19.744876489015052, "learning_rate": 5.372920849651242e-06, "loss": 0.20830078125, "step": 48475 }, { "epoch": 0.4191922248834857, "grad_norm": 4.3132403845812695, "learning_rate": 5.3727961899866e-06, "loss": 0.1010772705078125, "step": 48480 }, { "epoch": 0.4192354584050289, "grad_norm": 14.210121430443492, "learning_rate": 5.372671519378833e-06, "loss": 0.11201133728027343, "step": 48485 }, { "epoch": 0.4192786919265722, "grad_norm": 6.739637701161141, "learning_rate": 5.372546837828512e-06, "loss": 0.047012710571289064, "step": 48490 }, { "epoch": 0.4193219254481155, "grad_norm": 22.38692040468527, "learning_rate": 5.372422145336217e-06, "loss": 0.24859619140625, "step": 48495 }, { "epoch": 0.4193651589696587, "grad_norm": 9.686298435481982, "learning_rate": 5.372297441902519e-06, "loss": 0.08523178100585938, "step": 48500 }, { "epoch": 0.419408392491202, "grad_norm": 71.26845008934431, "learning_rate": 5.372172727527997e-06, "loss": 0.48323745727539064, "step": 48505 }, { "epoch": 0.4194516260127452, "grad_norm": 3.592126343637349, "learning_rate": 5.372048002213222e-06, "loss": 0.4051300048828125, "step": 48510 }, { "epoch": 0.4194948595342885, "grad_norm": 10.953203111386992, "learning_rate": 5.371923265958771e-06, "loss": 0.26692657470703124, "step": 48515 }, { "epoch": 0.4195380930558318, "grad_norm": 2.4677925460706267, "learning_rate": 5.371798518765221e-06, "loss": 0.02450714111328125, "step": 48520 }, { "epoch": 0.419581326577375, "grad_norm": 4.3898971777625055, "learning_rate": 5.3716737606331455e-06, "loss": 0.1552734375, "step": 48525 }, { "epoch": 0.4196245600989183, "grad_norm": 10.211918356048306, "learning_rate": 5.371548991563119e-06, "loss": 0.23114395141601562, "step": 48530 }, { "epoch": 0.4196677936204616, "grad_norm": 3.485148275822201, "learning_rate": 5.3714242115557195e-06, "loss": 0.40965728759765624, "step": 48535 }, { "epoch": 0.4197110271420048, "grad_norm": 18.99595536312448, "learning_rate": 5.371299420611519e-06, "loss": 0.09710845947265626, "step": 48540 }, { "epoch": 0.4197542606635481, "grad_norm": 0.343682789796472, "learning_rate": 5.371174618731097e-06, "loss": 0.10222148895263672, "step": 48545 }, { "epoch": 0.4197974941850914, "grad_norm": 0.2533490856301123, "learning_rate": 5.371049805915027e-06, "loss": 0.07027702331542969, "step": 48550 }, { "epoch": 0.4198407277066346, "grad_norm": 5.76605533788665, "learning_rate": 5.370924982163885e-06, "loss": 0.19874076843261718, "step": 48555 }, { "epoch": 0.4198839612281779, "grad_norm": 10.947034964411463, "learning_rate": 5.370800147478246e-06, "loss": 0.1596099853515625, "step": 48560 }, { "epoch": 0.4199271947497211, "grad_norm": 3.3985492759246148, "learning_rate": 5.370675301858686e-06, "loss": 0.16910552978515625, "step": 48565 }, { "epoch": 0.4199704282712644, "grad_norm": 2.4933146259559407, "learning_rate": 5.370550445305781e-06, "loss": 0.2128753662109375, "step": 48570 }, { "epoch": 0.4200136617928077, "grad_norm": 10.561089096789717, "learning_rate": 5.370425577820107e-06, "loss": 0.09704723358154296, "step": 48575 }, { "epoch": 0.4200568953143509, "grad_norm": 3.192719307723856, "learning_rate": 5.37030069940224e-06, "loss": 0.08622512817382813, "step": 48580 }, { "epoch": 0.4201001288358942, "grad_norm": 12.2285805541456, "learning_rate": 5.370175810052755e-06, "loss": 0.251416015625, "step": 48585 }, { "epoch": 0.4201433623574375, "grad_norm": 37.84558642950072, "learning_rate": 5.370050909772229e-06, "loss": 0.13362884521484375, "step": 48590 }, { "epoch": 0.4201865958789807, "grad_norm": 16.29110340101704, "learning_rate": 5.369925998561238e-06, "loss": 0.28201751708984374, "step": 48595 }, { "epoch": 0.420229829400524, "grad_norm": 21.110651441023712, "learning_rate": 5.369801076420356e-06, "loss": 0.20810470581054688, "step": 48600 }, { "epoch": 0.42027306292206723, "grad_norm": 0.7906686725088106, "learning_rate": 5.3696761433501626e-06, "loss": 0.08717727661132812, "step": 48605 }, { "epoch": 0.4203162964436105, "grad_norm": 11.801228304265377, "learning_rate": 5.3695511993512314e-06, "loss": 0.437554931640625, "step": 48610 }, { "epoch": 0.4203595299651538, "grad_norm": 0.9921259520813045, "learning_rate": 5.36942624442414e-06, "loss": 0.14301700592041017, "step": 48615 }, { "epoch": 0.42040276348669703, "grad_norm": 0.4169919904590316, "learning_rate": 5.369301278569464e-06, "loss": 0.16676483154296876, "step": 48620 }, { "epoch": 0.4204459970082403, "grad_norm": 2.0562940082830767, "learning_rate": 5.36917630178778e-06, "loss": 0.18133392333984374, "step": 48625 }, { "epoch": 0.4204892305297836, "grad_norm": 8.726651951470851, "learning_rate": 5.369051314079663e-06, "loss": 0.1709381103515625, "step": 48630 }, { "epoch": 0.4205324640513268, "grad_norm": 45.26454097606495, "learning_rate": 5.368926315445692e-06, "loss": 0.3243888854980469, "step": 48635 }, { "epoch": 0.4205756975728701, "grad_norm": 6.092609328819526, "learning_rate": 5.368801305886442e-06, "loss": 0.1117034912109375, "step": 48640 }, { "epoch": 0.42061893109441334, "grad_norm": 6.161779990298926, "learning_rate": 5.368676285402489e-06, "loss": 0.10281944274902344, "step": 48645 }, { "epoch": 0.4206621646159566, "grad_norm": 0.039532745878404496, "learning_rate": 5.368551253994411e-06, "loss": 0.15846595764160157, "step": 48650 }, { "epoch": 0.4207053981374999, "grad_norm": 9.502889472528683, "learning_rate": 5.368426211662784e-06, "loss": 0.3072906494140625, "step": 48655 }, { "epoch": 0.42074863165904314, "grad_norm": 35.60157908705918, "learning_rate": 5.3683011584081836e-06, "loss": 0.28159255981445314, "step": 48660 }, { "epoch": 0.4207918651805864, "grad_norm": 3.6272859244432287, "learning_rate": 5.368176094231189e-06, "loss": 0.2009765625, "step": 48665 }, { "epoch": 0.4208350987021297, "grad_norm": 1.0929853318323282, "learning_rate": 5.368051019132374e-06, "loss": 0.13524169921875, "step": 48670 }, { "epoch": 0.42087833222367294, "grad_norm": 4.293215318785196, "learning_rate": 5.367925933112318e-06, "loss": 0.038840103149414065, "step": 48675 }, { "epoch": 0.4209215657452162, "grad_norm": 10.546805825448685, "learning_rate": 5.367800836171597e-06, "loss": 0.3243400573730469, "step": 48680 }, { "epoch": 0.42096479926675945, "grad_norm": 6.664465474969581, "learning_rate": 5.367675728310787e-06, "loss": 0.12433547973632812, "step": 48685 }, { "epoch": 0.42100803278830273, "grad_norm": 27.056558747853217, "learning_rate": 5.367550609530465e-06, "loss": 0.260009765625, "step": 48690 }, { "epoch": 0.421051266309846, "grad_norm": 15.442301348898969, "learning_rate": 5.36742547983121e-06, "loss": 0.2648712158203125, "step": 48695 }, { "epoch": 0.42109449983138925, "grad_norm": 0.6151493330353628, "learning_rate": 5.367300339213598e-06, "loss": 0.28626480102539065, "step": 48700 }, { "epoch": 0.42113773335293253, "grad_norm": 0.9435227410687332, "learning_rate": 5.367175187678206e-06, "loss": 0.16744384765625, "step": 48705 }, { "epoch": 0.4211809668744758, "grad_norm": 10.974087346511173, "learning_rate": 5.367050025225611e-06, "loss": 0.15498104095458984, "step": 48710 }, { "epoch": 0.42122420039601904, "grad_norm": 25.903109029807908, "learning_rate": 5.366924851856389e-06, "loss": 0.333868408203125, "step": 48715 }, { "epoch": 0.42126743391756233, "grad_norm": 22.909776679837854, "learning_rate": 5.36679966757112e-06, "loss": 0.264581298828125, "step": 48720 }, { "epoch": 0.42131066743910556, "grad_norm": 2.868071190027785, "learning_rate": 5.366674472370381e-06, "loss": 0.17643508911132813, "step": 48725 }, { "epoch": 0.42135390096064884, "grad_norm": 0.35468259539814734, "learning_rate": 5.366549266254747e-06, "loss": 0.12037353515625, "step": 48730 }, { "epoch": 0.4213971344821921, "grad_norm": 1.8746647448763623, "learning_rate": 5.366424049224798e-06, "loss": 0.1105316162109375, "step": 48735 }, { "epoch": 0.42144036800373536, "grad_norm": 8.142236823204062, "learning_rate": 5.3662988212811095e-06, "loss": 0.11075439453125, "step": 48740 }, { "epoch": 0.42148360152527864, "grad_norm": 3.9453885850181076, "learning_rate": 5.366173582424261e-06, "loss": 0.0267181396484375, "step": 48745 }, { "epoch": 0.4215268350468219, "grad_norm": 24.977786286901992, "learning_rate": 5.366048332654827e-06, "loss": 0.16832351684570312, "step": 48750 }, { "epoch": 0.42157006856836515, "grad_norm": 2.567903860999427, "learning_rate": 5.365923071973389e-06, "loss": 0.182611083984375, "step": 48755 }, { "epoch": 0.42161330208990844, "grad_norm": 24.845689196626935, "learning_rate": 5.365797800380522e-06, "loss": 0.11560821533203125, "step": 48760 }, { "epoch": 0.4216565356114517, "grad_norm": 6.538152137768772, "learning_rate": 5.365672517876805e-06, "loss": 0.23180084228515624, "step": 48765 }, { "epoch": 0.42169976913299495, "grad_norm": 3.34580655708944, "learning_rate": 5.365547224462815e-06, "loss": 0.081591796875, "step": 48770 }, { "epoch": 0.42174300265453823, "grad_norm": 2.565356229784726, "learning_rate": 5.3654219201391305e-06, "loss": 0.184423828125, "step": 48775 }, { "epoch": 0.42178623617608146, "grad_norm": 11.542654640115105, "learning_rate": 5.3652966049063296e-06, "loss": 0.10995330810546874, "step": 48780 }, { "epoch": 0.42182946969762475, "grad_norm": 7.143509776006069, "learning_rate": 5.365171278764989e-06, "loss": 0.0375152587890625, "step": 48785 }, { "epoch": 0.42187270321916803, "grad_norm": 23.852632984112045, "learning_rate": 5.365045941715687e-06, "loss": 0.08684349060058594, "step": 48790 }, { "epoch": 0.42191593674071126, "grad_norm": 36.55474011046085, "learning_rate": 5.364920593759004e-06, "loss": 0.16559925079345703, "step": 48795 }, { "epoch": 0.42195917026225455, "grad_norm": 23.772262726373743, "learning_rate": 5.364795234895514e-06, "loss": 0.15311279296875, "step": 48800 }, { "epoch": 0.42200240378379783, "grad_norm": 0.45015737692262886, "learning_rate": 5.364669865125798e-06, "loss": 0.0705902099609375, "step": 48805 }, { "epoch": 0.42204563730534106, "grad_norm": 0.1461572401100519, "learning_rate": 5.364544484450434e-06, "loss": 0.0486968994140625, "step": 48810 }, { "epoch": 0.42208887082688434, "grad_norm": 10.678897283337653, "learning_rate": 5.3644190928699995e-06, "loss": 0.19963302612304687, "step": 48815 }, { "epoch": 0.4221321043484276, "grad_norm": 30.622743826816507, "learning_rate": 5.364293690385073e-06, "loss": 0.5018102645874023, "step": 48820 }, { "epoch": 0.42217533786997086, "grad_norm": 4.452527993816735, "learning_rate": 5.364168276996233e-06, "loss": 0.28198699951171874, "step": 48825 }, { "epoch": 0.42221857139151414, "grad_norm": 3.537867582383246, "learning_rate": 5.364042852704058e-06, "loss": 0.2012392044067383, "step": 48830 }, { "epoch": 0.42226180491305737, "grad_norm": 5.242517922349908, "learning_rate": 5.3639174175091265e-06, "loss": 0.127069091796875, "step": 48835 }, { "epoch": 0.42230503843460065, "grad_norm": 2.9127322645439095, "learning_rate": 5.3637919714120155e-06, "loss": 0.24923458099365234, "step": 48840 }, { "epoch": 0.42234827195614394, "grad_norm": 8.749044424430117, "learning_rate": 5.363666514413306e-06, "loss": 0.27529296875, "step": 48845 }, { "epoch": 0.42239150547768717, "grad_norm": 27.12413063271184, "learning_rate": 5.363541046513576e-06, "loss": 0.3373594284057617, "step": 48850 }, { "epoch": 0.42243473899923045, "grad_norm": 31.561414660386735, "learning_rate": 5.363415567713402e-06, "loss": 0.1553863525390625, "step": 48855 }, { "epoch": 0.4224779725207737, "grad_norm": 2.3457365399045784, "learning_rate": 5.363290078013365e-06, "loss": 0.0626220703125, "step": 48860 }, { "epoch": 0.42252120604231697, "grad_norm": 0.062378475848327904, "learning_rate": 5.3631645774140436e-06, "loss": 0.047664833068847653, "step": 48865 }, { "epoch": 0.42256443956386025, "grad_norm": 13.87972138538957, "learning_rate": 5.363039065916014e-06, "loss": 0.30532684326171877, "step": 48870 }, { "epoch": 0.4226076730854035, "grad_norm": 2.1522072212632875, "learning_rate": 5.362913543519859e-06, "loss": 0.5181365966796875, "step": 48875 }, { "epoch": 0.42265090660694676, "grad_norm": 1.5657887754179085, "learning_rate": 5.3627880102261545e-06, "loss": 0.05601634979248047, "step": 48880 }, { "epoch": 0.42269414012849005, "grad_norm": 0.3767513563723882, "learning_rate": 5.362662466035481e-06, "loss": 0.11365966796875, "step": 48885 }, { "epoch": 0.4227373736500333, "grad_norm": 4.75593252800932, "learning_rate": 5.362536910948417e-06, "loss": 0.08800201416015625, "step": 48890 }, { "epoch": 0.42278060717157656, "grad_norm": 6.522338336891661, "learning_rate": 5.362411344965541e-06, "loss": 0.19056549072265624, "step": 48895 }, { "epoch": 0.4228238406931198, "grad_norm": 4.475083724263929, "learning_rate": 5.362285768087433e-06, "loss": 0.6242401123046875, "step": 48900 }, { "epoch": 0.4228670742146631, "grad_norm": 0.1391169716968481, "learning_rate": 5.362160180314671e-06, "loss": 0.181292724609375, "step": 48905 }, { "epoch": 0.42291030773620636, "grad_norm": 23.94902730062781, "learning_rate": 5.362034581647836e-06, "loss": 0.1990631103515625, "step": 48910 }, { "epoch": 0.4229535412577496, "grad_norm": 1.9650649298829859, "learning_rate": 5.3619089720875055e-06, "loss": 0.039215087890625, "step": 48915 }, { "epoch": 0.4229967747792929, "grad_norm": 20.002569144142317, "learning_rate": 5.36178335163426e-06, "loss": 0.19862213134765624, "step": 48920 }, { "epoch": 0.42304000830083616, "grad_norm": 3.1891383699876537, "learning_rate": 5.361657720288678e-06, "loss": 0.08207931518554687, "step": 48925 }, { "epoch": 0.4230832418223794, "grad_norm": 5.621286770454488, "learning_rate": 5.3615320780513394e-06, "loss": 0.0382110595703125, "step": 48930 }, { "epoch": 0.42312647534392267, "grad_norm": 4.063202137974767, "learning_rate": 5.361406424922824e-06, "loss": 0.07081756591796876, "step": 48935 }, { "epoch": 0.42316970886546595, "grad_norm": 8.93290872254875, "learning_rate": 5.36128076090371e-06, "loss": 0.10940723419189453, "step": 48940 }, { "epoch": 0.4232129423870092, "grad_norm": 30.097535139273162, "learning_rate": 5.3611550859945785e-06, "loss": 0.30941162109375, "step": 48945 }, { "epoch": 0.42325617590855247, "grad_norm": 4.0970391305934974, "learning_rate": 5.3610294001960084e-06, "loss": 0.09401016235351563, "step": 48950 }, { "epoch": 0.4232994094300957, "grad_norm": 2.2867185121462086, "learning_rate": 5.360903703508579e-06, "loss": 0.06674365997314453, "step": 48955 }, { "epoch": 0.423342642951639, "grad_norm": 15.805006854577996, "learning_rate": 5.36077799593287e-06, "loss": 0.09000244140625, "step": 48960 }, { "epoch": 0.42338587647318227, "grad_norm": 13.941420061620084, "learning_rate": 5.360652277469462e-06, "loss": 0.1823566436767578, "step": 48965 }, { "epoch": 0.4234291099947255, "grad_norm": 29.839064643292573, "learning_rate": 5.360526548118934e-06, "loss": 0.25881309509277345, "step": 48970 }, { "epoch": 0.4234723435162688, "grad_norm": 11.584135807552647, "learning_rate": 5.360400807881867e-06, "loss": 0.27070159912109376, "step": 48975 }, { "epoch": 0.42351557703781206, "grad_norm": 29.912314774847697, "learning_rate": 5.3602750567588385e-06, "loss": 0.17341766357421876, "step": 48980 }, { "epoch": 0.4235588105593553, "grad_norm": 0.6364189812968031, "learning_rate": 5.360149294750431e-06, "loss": 0.12457656860351562, "step": 48985 }, { "epoch": 0.4236020440808986, "grad_norm": 0.22731525402048353, "learning_rate": 5.360023521857223e-06, "loss": 0.07642974853515624, "step": 48990 }, { "epoch": 0.4236452776024418, "grad_norm": 7.365263403527309, "learning_rate": 5.359897738079797e-06, "loss": 0.0953826904296875, "step": 48995 }, { "epoch": 0.4236885111239851, "grad_norm": 13.204209367212268, "learning_rate": 5.3597719434187285e-06, "loss": 0.074798583984375, "step": 49000 }, { "epoch": 0.4237317446455284, "grad_norm": 10.210199141900816, "learning_rate": 5.359646137874601e-06, "loss": 0.04886131286621094, "step": 49005 }, { "epoch": 0.4237749781670716, "grad_norm": 7.593930594458023, "learning_rate": 5.3595203214479945e-06, "loss": 0.072552490234375, "step": 49010 }, { "epoch": 0.4238182116886149, "grad_norm": 0.7990524329413664, "learning_rate": 5.359394494139489e-06, "loss": 0.17041435241699218, "step": 49015 }, { "epoch": 0.42386144521015817, "grad_norm": 3.399943353674323, "learning_rate": 5.3592686559496645e-06, "loss": 0.23611602783203126, "step": 49020 }, { "epoch": 0.4239046787317014, "grad_norm": 9.810223763530148, "learning_rate": 5.3591428068791005e-06, "loss": 0.31491947174072266, "step": 49025 }, { "epoch": 0.4239479122532447, "grad_norm": 1.1356217642143835, "learning_rate": 5.359016946928379e-06, "loss": 0.13575592041015624, "step": 49030 }, { "epoch": 0.4239911457747879, "grad_norm": 0.6609547457432228, "learning_rate": 5.3588910760980795e-06, "loss": 0.14140472412109376, "step": 49035 }, { "epoch": 0.4240343792963312, "grad_norm": 7.15100387037088, "learning_rate": 5.358765194388783e-06, "loss": 0.0686920166015625, "step": 49040 }, { "epoch": 0.4240776128178745, "grad_norm": 16.097665093760348, "learning_rate": 5.3586393018010695e-06, "loss": 0.059236907958984376, "step": 49045 }, { "epoch": 0.4241208463394177, "grad_norm": 8.947870302916533, "learning_rate": 5.358513398335521e-06, "loss": 0.32078857421875, "step": 49050 }, { "epoch": 0.424164079860961, "grad_norm": 4.119154667644559, "learning_rate": 5.3583874839927165e-06, "loss": 0.0921356201171875, "step": 49055 }, { "epoch": 0.4242073133825043, "grad_norm": 3.286899033042754, "learning_rate": 5.358261558773237e-06, "loss": 0.13483734130859376, "step": 49060 }, { "epoch": 0.4242505469040475, "grad_norm": 2.0958236642276975, "learning_rate": 5.358135622677664e-06, "loss": 0.0130126953125, "step": 49065 }, { "epoch": 0.4242937804255908, "grad_norm": 2.8481622964423248, "learning_rate": 5.358009675706578e-06, "loss": 0.09256820678710938, "step": 49070 }, { "epoch": 0.424337013947134, "grad_norm": 1.357576511625735, "learning_rate": 5.3578837178605585e-06, "loss": 0.2632659912109375, "step": 49075 }, { "epoch": 0.4243802474686773, "grad_norm": 5.868389788772937, "learning_rate": 5.357757749140189e-06, "loss": 0.15185813903808593, "step": 49080 }, { "epoch": 0.4244234809902206, "grad_norm": 2.2661043648782324, "learning_rate": 5.357631769546049e-06, "loss": 0.02575225830078125, "step": 49085 }, { "epoch": 0.4244667145117638, "grad_norm": 14.661212642150264, "learning_rate": 5.357505779078719e-06, "loss": 0.13883056640625, "step": 49090 }, { "epoch": 0.4245099480333071, "grad_norm": 2.985778575726632, "learning_rate": 5.357379777738781e-06, "loss": 0.10321502685546875, "step": 49095 }, { "epoch": 0.4245531815548504, "grad_norm": 4.104723098407341, "learning_rate": 5.357253765526815e-06, "loss": 0.24532699584960938, "step": 49100 }, { "epoch": 0.4245964150763936, "grad_norm": 1.894414543985667, "learning_rate": 5.357127742443404e-06, "loss": 0.06774749755859374, "step": 49105 }, { "epoch": 0.4246396485979369, "grad_norm": 8.99275960741876, "learning_rate": 5.357001708489128e-06, "loss": 0.40802001953125, "step": 49110 }, { "epoch": 0.4246828821194802, "grad_norm": 27.26649130148118, "learning_rate": 5.3568756636645675e-06, "loss": 0.6159698486328125, "step": 49115 }, { "epoch": 0.4247261156410234, "grad_norm": 10.611351767544877, "learning_rate": 5.356749607970306e-06, "loss": 0.1162933349609375, "step": 49120 }, { "epoch": 0.4247693491625667, "grad_norm": 6.704178626948088, "learning_rate": 5.3566235414069225e-06, "loss": 0.061981201171875, "step": 49125 }, { "epoch": 0.42481258268410993, "grad_norm": 9.11672004071212, "learning_rate": 5.356497463975e-06, "loss": 0.572503662109375, "step": 49130 }, { "epoch": 0.4248558162056532, "grad_norm": 39.46407579708512, "learning_rate": 5.356371375675119e-06, "loss": 0.351641845703125, "step": 49135 }, { "epoch": 0.4248990497271965, "grad_norm": 29.116294317161522, "learning_rate": 5.356245276507862e-06, "loss": 0.4670433044433594, "step": 49140 }, { "epoch": 0.4249422832487397, "grad_norm": 14.8971156250242, "learning_rate": 5.35611916647381e-06, "loss": 0.22562255859375, "step": 49145 }, { "epoch": 0.424985516770283, "grad_norm": 8.675361269539355, "learning_rate": 5.355993045573544e-06, "loss": 0.06926040649414063, "step": 49150 }, { "epoch": 0.4250287502918263, "grad_norm": 5.533047822358799, "learning_rate": 5.355866913807646e-06, "loss": 0.08593063354492188, "step": 49155 }, { "epoch": 0.4250719838133695, "grad_norm": 1.1482452675326855, "learning_rate": 5.355740771176699e-06, "loss": 0.0825042724609375, "step": 49160 }, { "epoch": 0.4251152173349128, "grad_norm": 24.182953128077823, "learning_rate": 5.355614617681283e-06, "loss": 0.2350128173828125, "step": 49165 }, { "epoch": 0.42515845085645604, "grad_norm": 1.9513759816225578, "learning_rate": 5.355488453321981e-06, "loss": 0.13569793701171876, "step": 49170 }, { "epoch": 0.4252016843779993, "grad_norm": 5.681667499670318, "learning_rate": 5.355362278099374e-06, "loss": 0.13138427734375, "step": 49175 }, { "epoch": 0.4252449178995426, "grad_norm": 14.195804286890697, "learning_rate": 5.355236092014045e-06, "loss": 0.234942626953125, "step": 49180 }, { "epoch": 0.42528815142108584, "grad_norm": 0.6156934870042264, "learning_rate": 5.355109895066574e-06, "loss": 0.45244140625, "step": 49185 }, { "epoch": 0.4253313849426291, "grad_norm": 6.825752114612405, "learning_rate": 5.354983687257545e-06, "loss": 0.211553955078125, "step": 49190 }, { "epoch": 0.4253746184641724, "grad_norm": 25.986502530073, "learning_rate": 5.354857468587539e-06, "loss": 0.380645751953125, "step": 49195 }, { "epoch": 0.42541785198571563, "grad_norm": 2.0319890871013753, "learning_rate": 5.354731239057139e-06, "loss": 0.3345947265625, "step": 49200 }, { "epoch": 0.4254610855072589, "grad_norm": 1.5367866128072372, "learning_rate": 5.354604998666927e-06, "loss": 0.0872802734375, "step": 49205 }, { "epoch": 0.42550431902880215, "grad_norm": 15.888306289812999, "learning_rate": 5.354478747417483e-06, "loss": 0.17704925537109376, "step": 49210 }, { "epoch": 0.42554755255034543, "grad_norm": 4.70672338710646, "learning_rate": 5.354352485309393e-06, "loss": 0.25621795654296875, "step": 49215 }, { "epoch": 0.4255907860718887, "grad_norm": 2.34387593417154, "learning_rate": 5.354226212343236e-06, "loss": 0.0739593505859375, "step": 49220 }, { "epoch": 0.42563401959343194, "grad_norm": 26.02466287269495, "learning_rate": 5.354099928519596e-06, "loss": 0.08116073608398437, "step": 49225 }, { "epoch": 0.42567725311497523, "grad_norm": 0.9350924712219246, "learning_rate": 5.353973633839056e-06, "loss": 0.160723876953125, "step": 49230 }, { "epoch": 0.4257204866365185, "grad_norm": 4.868019301519911, "learning_rate": 5.353847328302197e-06, "loss": 0.07953948974609375, "step": 49235 }, { "epoch": 0.42576372015806174, "grad_norm": 3.439961298016058, "learning_rate": 5.353721011909602e-06, "loss": 0.13011856079101564, "step": 49240 }, { "epoch": 0.425806953679605, "grad_norm": 25.553471582367777, "learning_rate": 5.353594684661853e-06, "loss": 0.241064453125, "step": 49245 }, { "epoch": 0.42585018720114826, "grad_norm": 27.263249707063995, "learning_rate": 5.3534683465595345e-06, "loss": 0.11681747436523438, "step": 49250 }, { "epoch": 0.42589342072269154, "grad_norm": 1.8880543600123094, "learning_rate": 5.353341997603227e-06, "loss": 0.10568351745605468, "step": 49255 }, { "epoch": 0.4259366542442348, "grad_norm": 0.9564771524575747, "learning_rate": 5.353215637793515e-06, "loss": 0.2141998291015625, "step": 49260 }, { "epoch": 0.42597988776577805, "grad_norm": 1.7005661370265537, "learning_rate": 5.35308926713098e-06, "loss": 0.3794647216796875, "step": 49265 }, { "epoch": 0.42602312128732134, "grad_norm": 0.7383953800944978, "learning_rate": 5.352962885616205e-06, "loss": 0.093634033203125, "step": 49270 }, { "epoch": 0.4260663548088646, "grad_norm": 3.509298713435007, "learning_rate": 5.352836493249774e-06, "loss": 0.37933349609375, "step": 49275 }, { "epoch": 0.42610958833040785, "grad_norm": 0.5889288687153322, "learning_rate": 5.352710090032268e-06, "loss": 0.05994701385498047, "step": 49280 }, { "epoch": 0.42615282185195114, "grad_norm": 34.535616083503946, "learning_rate": 5.352583675964272e-06, "loss": 0.11448211669921875, "step": 49285 }, { "epoch": 0.4261960553734944, "grad_norm": 0.811552725811715, "learning_rate": 5.3524572510463675e-06, "loss": 0.3156410217285156, "step": 49290 }, { "epoch": 0.42623928889503765, "grad_norm": 0.46281813510346437, "learning_rate": 5.352330815279137e-06, "loss": 0.146575927734375, "step": 49295 }, { "epoch": 0.42628252241658093, "grad_norm": 7.925727130184398, "learning_rate": 5.352204368663166e-06, "loss": 0.1267791748046875, "step": 49300 }, { "epoch": 0.42632575593812416, "grad_norm": 11.760474614598264, "learning_rate": 5.352077911199035e-06, "loss": 0.1060791015625, "step": 49305 }, { "epoch": 0.42636898945966745, "grad_norm": 15.431028868710158, "learning_rate": 5.351951442887329e-06, "loss": 0.07894973754882813, "step": 49310 }, { "epoch": 0.42641222298121073, "grad_norm": 12.106168142739909, "learning_rate": 5.3518249637286315e-06, "loss": 0.2488800048828125, "step": 49315 }, { "epoch": 0.42645545650275396, "grad_norm": 1.097358328522286, "learning_rate": 5.351698473723524e-06, "loss": 0.2999481201171875, "step": 49320 }, { "epoch": 0.42649869002429724, "grad_norm": 16.01644487867623, "learning_rate": 5.351571972872592e-06, "loss": 0.37978515625, "step": 49325 }, { "epoch": 0.42654192354584053, "grad_norm": 16.3021155623421, "learning_rate": 5.351445461176417e-06, "loss": 0.23549346923828124, "step": 49330 }, { "epoch": 0.42658515706738376, "grad_norm": 4.554374399953104, "learning_rate": 5.351318938635584e-06, "loss": 0.052407455444335935, "step": 49335 }, { "epoch": 0.42662839058892704, "grad_norm": 2.6277517448137107, "learning_rate": 5.351192405250675e-06, "loss": 0.3665802001953125, "step": 49340 }, { "epoch": 0.42667162411047027, "grad_norm": 32.98036085325871, "learning_rate": 5.351065861022275e-06, "loss": 0.3493000030517578, "step": 49345 }, { "epoch": 0.42671485763201356, "grad_norm": 2.777928868201427, "learning_rate": 5.350939305950968e-06, "loss": 0.0624542236328125, "step": 49350 }, { "epoch": 0.42675809115355684, "grad_norm": 8.905778672751751, "learning_rate": 5.350812740037335e-06, "loss": 0.12786788940429689, "step": 49355 }, { "epoch": 0.42680132467510007, "grad_norm": 0.7190965034431058, "learning_rate": 5.350686163281962e-06, "loss": 0.11405410766601562, "step": 49360 }, { "epoch": 0.42684455819664335, "grad_norm": 1.5052310057943938, "learning_rate": 5.350559575685432e-06, "loss": 0.18720245361328125, "step": 49365 }, { "epoch": 0.42688779171818664, "grad_norm": 25.874991005342164, "learning_rate": 5.350432977248329e-06, "loss": 0.11692676544189454, "step": 49370 }, { "epoch": 0.42693102523972987, "grad_norm": 23.28480593865102, "learning_rate": 5.350306367971237e-06, "loss": 0.1903045654296875, "step": 49375 }, { "epoch": 0.42697425876127315, "grad_norm": 20.263765729253198, "learning_rate": 5.350179747854739e-06, "loss": 0.1173095703125, "step": 49380 }, { "epoch": 0.4270174922828164, "grad_norm": 8.310542016619493, "learning_rate": 5.35005311689942e-06, "loss": 0.21895294189453124, "step": 49385 }, { "epoch": 0.42706072580435966, "grad_norm": 1.511298480602914, "learning_rate": 5.349926475105864e-06, "loss": 0.09806976318359376, "step": 49390 }, { "epoch": 0.42710395932590295, "grad_norm": 4.79715136862765, "learning_rate": 5.349799822474655e-06, "loss": 0.24678955078125, "step": 49395 }, { "epoch": 0.4271471928474462, "grad_norm": 0.471558086029848, "learning_rate": 5.349673159006376e-06, "loss": 0.35061798095703123, "step": 49400 }, { "epoch": 0.42719042636898946, "grad_norm": 4.134655508935678, "learning_rate": 5.3495464847016125e-06, "loss": 0.10857620239257812, "step": 49405 }, { "epoch": 0.42723365989053275, "grad_norm": 12.016554882532137, "learning_rate": 5.349419799560948e-06, "loss": 0.27904739379882815, "step": 49410 }, { "epoch": 0.427276893412076, "grad_norm": 0.15618996958880185, "learning_rate": 5.349293103584967e-06, "loss": 0.13929519653320313, "step": 49415 }, { "epoch": 0.42732012693361926, "grad_norm": 10.26083877395492, "learning_rate": 5.349166396774254e-06, "loss": 0.3736747741699219, "step": 49420 }, { "epoch": 0.4273633604551625, "grad_norm": 2.9756420904113985, "learning_rate": 5.349039679129392e-06, "loss": 0.10055313110351563, "step": 49425 }, { "epoch": 0.4274065939767058, "grad_norm": 1.1501066906817616, "learning_rate": 5.348912950650969e-06, "loss": 0.2684478759765625, "step": 49430 }, { "epoch": 0.42744982749824906, "grad_norm": 8.675261570160366, "learning_rate": 5.348786211339565e-06, "loss": 0.11656665802001953, "step": 49435 }, { "epoch": 0.4274930610197923, "grad_norm": 18.72322239184233, "learning_rate": 5.3486594611957675e-06, "loss": 0.18509521484375, "step": 49440 }, { "epoch": 0.42753629454133557, "grad_norm": 8.07738916562591, "learning_rate": 5.348532700220159e-06, "loss": 0.22644691467285155, "step": 49445 }, { "epoch": 0.42757952806287886, "grad_norm": 22.077199205138673, "learning_rate": 5.3484059284133265e-06, "loss": 0.30382614135742186, "step": 49450 }, { "epoch": 0.4276227615844221, "grad_norm": 6.692280266012515, "learning_rate": 5.348279145775853e-06, "loss": 0.0447357177734375, "step": 49455 }, { "epoch": 0.42766599510596537, "grad_norm": 10.983599595519644, "learning_rate": 5.348152352308324e-06, "loss": 0.045977783203125, "step": 49460 }, { "epoch": 0.4277092286275086, "grad_norm": 22.63756254503497, "learning_rate": 5.3480255480113225e-06, "loss": 0.1450531005859375, "step": 49465 }, { "epoch": 0.4277524621490519, "grad_norm": 0.2608267669926414, "learning_rate": 5.347898732885435e-06, "loss": 0.067108154296875, "step": 49470 }, { "epoch": 0.42779569567059517, "grad_norm": 7.931673840026527, "learning_rate": 5.347771906931248e-06, "loss": 0.1169342041015625, "step": 49475 }, { "epoch": 0.4278389291921384, "grad_norm": 17.87551287038922, "learning_rate": 5.347645070149342e-06, "loss": 0.25250091552734377, "step": 49480 }, { "epoch": 0.4278821627136817, "grad_norm": 5.137153776578333, "learning_rate": 5.347518222540306e-06, "loss": 0.11264381408691407, "step": 49485 }, { "epoch": 0.42792539623522496, "grad_norm": 20.82612666094172, "learning_rate": 5.3473913641047215e-06, "loss": 0.143475341796875, "step": 49490 }, { "epoch": 0.4279686297567682, "grad_norm": 5.880888486872098, "learning_rate": 5.347264494843177e-06, "loss": 0.10655784606933594, "step": 49495 }, { "epoch": 0.4280118632783115, "grad_norm": 0.47437588723654117, "learning_rate": 5.347137614756256e-06, "loss": 0.11059722900390626, "step": 49500 }, { "epoch": 0.42805509679985476, "grad_norm": 0.6721740601164206, "learning_rate": 5.347010723844543e-06, "loss": 0.023015594482421874, "step": 49505 }, { "epoch": 0.428098330321398, "grad_norm": 0.9200499120028219, "learning_rate": 5.346883822108624e-06, "loss": 0.327301025390625, "step": 49510 }, { "epoch": 0.4281415638429413, "grad_norm": 2.4922917582707407, "learning_rate": 5.3467569095490845e-06, "loss": 0.10503997802734374, "step": 49515 }, { "epoch": 0.4281847973644845, "grad_norm": 2.865903893575353, "learning_rate": 5.34662998616651e-06, "loss": 0.18742141723632813, "step": 49520 }, { "epoch": 0.4282280308860278, "grad_norm": 5.589252772957028, "learning_rate": 5.346503051961485e-06, "loss": 0.06794204711914062, "step": 49525 }, { "epoch": 0.4282712644075711, "grad_norm": 20.45690077236147, "learning_rate": 5.346376106934594e-06, "loss": 0.07731170654296875, "step": 49530 }, { "epoch": 0.4283144979291143, "grad_norm": 164.89091557890168, "learning_rate": 5.346249151086425e-06, "loss": 0.5129119873046875, "step": 49535 }, { "epoch": 0.4283577314506576, "grad_norm": 3.572821872457535, "learning_rate": 5.346122184417562e-06, "loss": 0.06519775390625, "step": 49540 }, { "epoch": 0.42840096497220087, "grad_norm": 2.026837269152825, "learning_rate": 5.345995206928591e-06, "loss": 0.23243408203125, "step": 49545 }, { "epoch": 0.4284441984937441, "grad_norm": 35.330925812078355, "learning_rate": 5.345868218620097e-06, "loss": 0.17958812713623046, "step": 49550 }, { "epoch": 0.4284874320152874, "grad_norm": 6.620586739440521, "learning_rate": 5.345741219492667e-06, "loss": 0.140509033203125, "step": 49555 }, { "epoch": 0.4285306655368306, "grad_norm": 4.980773641558111, "learning_rate": 5.345614209546885e-06, "loss": 0.07177581787109374, "step": 49560 }, { "epoch": 0.4285738990583739, "grad_norm": 4.049804885382833, "learning_rate": 5.345487188783337e-06, "loss": 0.06232376098632812, "step": 49565 }, { "epoch": 0.4286171325799172, "grad_norm": 1.9351455234677528, "learning_rate": 5.345360157202609e-06, "loss": 0.185662841796875, "step": 49570 }, { "epoch": 0.4286603661014604, "grad_norm": 2.8481558116811962, "learning_rate": 5.345233114805289e-06, "loss": 0.022556304931640625, "step": 49575 }, { "epoch": 0.4287035996230037, "grad_norm": 6.268386721239525, "learning_rate": 5.34510606159196e-06, "loss": 0.31872100830078126, "step": 49580 }, { "epoch": 0.428746833144547, "grad_norm": 18.856108026561646, "learning_rate": 5.34497899756321e-06, "loss": 0.16688461303710939, "step": 49585 }, { "epoch": 0.4287900666660902, "grad_norm": 1.547823198387081, "learning_rate": 5.344851922719623e-06, "loss": 0.11861152648925781, "step": 49590 }, { "epoch": 0.4288333001876335, "grad_norm": 2.4574252561007763, "learning_rate": 5.344724837061786e-06, "loss": 0.1296234130859375, "step": 49595 }, { "epoch": 0.4288765337091767, "grad_norm": 25.57404197014814, "learning_rate": 5.344597740590286e-06, "loss": 0.12423667907714844, "step": 49600 }, { "epoch": 0.42891976723072, "grad_norm": 33.740092389414635, "learning_rate": 5.344470633305708e-06, "loss": 0.40818634033203127, "step": 49605 }, { "epoch": 0.4289630007522633, "grad_norm": 3.7695153881839465, "learning_rate": 5.344343515208639e-06, "loss": 0.21760149002075196, "step": 49610 }, { "epoch": 0.4290062342738065, "grad_norm": 38.50494987175199, "learning_rate": 5.3442163862996645e-06, "loss": 0.22387161254882812, "step": 49615 }, { "epoch": 0.4290494677953498, "grad_norm": 0.26155918946675955, "learning_rate": 5.344089246579371e-06, "loss": 0.16635894775390625, "step": 49620 }, { "epoch": 0.4290927013168931, "grad_norm": 4.999056871855604, "learning_rate": 5.343962096048345e-06, "loss": 0.4065155029296875, "step": 49625 }, { "epoch": 0.4291359348384363, "grad_norm": 27.366391600112973, "learning_rate": 5.343834934707173e-06, "loss": 0.3789031982421875, "step": 49630 }, { "epoch": 0.4291791683599796, "grad_norm": 4.767479087611217, "learning_rate": 5.3437077625564415e-06, "loss": 0.2710540771484375, "step": 49635 }, { "epoch": 0.42922240188152283, "grad_norm": 10.245475618970884, "learning_rate": 5.343580579596738e-06, "loss": 0.19004364013671876, "step": 49640 }, { "epoch": 0.4292656354030661, "grad_norm": 1.883988701721078, "learning_rate": 5.343453385828646e-06, "loss": 0.033856964111328124, "step": 49645 }, { "epoch": 0.4293088689246094, "grad_norm": 3.615605206814626, "learning_rate": 5.343326181252755e-06, "loss": 0.25201568603515623, "step": 49650 }, { "epoch": 0.42935210244615263, "grad_norm": 7.463820736982764, "learning_rate": 5.343198965869652e-06, "loss": 0.0765838623046875, "step": 49655 }, { "epoch": 0.4293953359676959, "grad_norm": 14.380738241454964, "learning_rate": 5.34307173967992e-06, "loss": 0.2503326416015625, "step": 49660 }, { "epoch": 0.4294385694892392, "grad_norm": 16.103214404055564, "learning_rate": 5.3429445026841485e-06, "loss": 0.29744873046875, "step": 49665 }, { "epoch": 0.4294818030107824, "grad_norm": 0.57593308800504, "learning_rate": 5.342817254882925e-06, "loss": 0.055157470703125, "step": 49670 }, { "epoch": 0.4295250365323257, "grad_norm": 0.7702676786163009, "learning_rate": 5.342689996276835e-06, "loss": 0.24418182373046876, "step": 49675 }, { "epoch": 0.429568270053869, "grad_norm": 0.7372332910111365, "learning_rate": 5.3425627268664656e-06, "loss": 0.027100372314453124, "step": 49680 }, { "epoch": 0.4296115035754122, "grad_norm": 0.5773286890889753, "learning_rate": 5.342435446652404e-06, "loss": 0.1754669189453125, "step": 49685 }, { "epoch": 0.4296547370969555, "grad_norm": 1.7953593175630675, "learning_rate": 5.342308155635238e-06, "loss": 0.08187255859375, "step": 49690 }, { "epoch": 0.42969797061849874, "grad_norm": 10.679805155344976, "learning_rate": 5.342180853815552e-06, "loss": 0.222528076171875, "step": 49695 }, { "epoch": 0.429741204140042, "grad_norm": 27.881803341972397, "learning_rate": 5.342053541193937e-06, "loss": 0.5006950378417969, "step": 49700 }, { "epoch": 0.4297844376615853, "grad_norm": 3.898093119416215, "learning_rate": 5.3419262177709755e-06, "loss": 0.07064018249511719, "step": 49705 }, { "epoch": 0.42982767118312853, "grad_norm": 4.027092564176058, "learning_rate": 5.341798883547259e-06, "loss": 0.090765380859375, "step": 49710 }, { "epoch": 0.4298709047046718, "grad_norm": 1.4152376298701703, "learning_rate": 5.341671538523372e-06, "loss": 0.10266380310058594, "step": 49715 }, { "epoch": 0.4299141382262151, "grad_norm": 24.000032273759786, "learning_rate": 5.341544182699904e-06, "loss": 0.26493301391601565, "step": 49720 }, { "epoch": 0.42995737174775833, "grad_norm": 4.8328855848324395, "learning_rate": 5.34141681607744e-06, "loss": 0.15576629638671874, "step": 49725 }, { "epoch": 0.4300006052693016, "grad_norm": 28.927116131511365, "learning_rate": 5.34128943865657e-06, "loss": 0.11771621704101562, "step": 49730 }, { "epoch": 0.43004383879084485, "grad_norm": 19.062176160896033, "learning_rate": 5.341162050437879e-06, "loss": 0.181353759765625, "step": 49735 }, { "epoch": 0.43008707231238813, "grad_norm": 12.191821189143397, "learning_rate": 5.341034651421955e-06, "loss": 0.10634307861328125, "step": 49740 }, { "epoch": 0.4301303058339314, "grad_norm": 21.214442716836782, "learning_rate": 5.3409072416093854e-06, "loss": 0.1656280517578125, "step": 49745 }, { "epoch": 0.43017353935547464, "grad_norm": 6.706666557585141, "learning_rate": 5.34077982100076e-06, "loss": 0.13623046875, "step": 49750 }, { "epoch": 0.43021677287701793, "grad_norm": 0.4354171999153345, "learning_rate": 5.340652389596665e-06, "loss": 0.2498138427734375, "step": 49755 }, { "epoch": 0.4302600063985612, "grad_norm": 16.03521491135773, "learning_rate": 5.340524947397686e-06, "loss": 0.1988525390625, "step": 49760 }, { "epoch": 0.43030323992010444, "grad_norm": 28.980912967593987, "learning_rate": 5.340397494404415e-06, "loss": 0.18259506225585936, "step": 49765 }, { "epoch": 0.4303464734416477, "grad_norm": 26.46690884978245, "learning_rate": 5.3402700306174365e-06, "loss": 0.6249988555908204, "step": 49770 }, { "epoch": 0.43038970696319095, "grad_norm": 16.790710163847837, "learning_rate": 5.34014255603734e-06, "loss": 0.15609130859375, "step": 49775 }, { "epoch": 0.43043294048473424, "grad_norm": 2.9554143124672607, "learning_rate": 5.340015070664713e-06, "loss": 0.0219696044921875, "step": 49780 }, { "epoch": 0.4304761740062775, "grad_norm": 1.2414262127759355, "learning_rate": 5.339887574500142e-06, "loss": 0.16281204223632811, "step": 49785 }, { "epoch": 0.43051940752782075, "grad_norm": 75.95281358235445, "learning_rate": 5.339760067544217e-06, "loss": 0.46688232421875, "step": 49790 }, { "epoch": 0.43056264104936404, "grad_norm": 1.307923194248045, "learning_rate": 5.3396325497975255e-06, "loss": 0.09495086669921875, "step": 49795 }, { "epoch": 0.4306058745709073, "grad_norm": 10.011988639916675, "learning_rate": 5.3395050212606555e-06, "loss": 0.074078369140625, "step": 49800 }, { "epoch": 0.43064910809245055, "grad_norm": 24.51689932558825, "learning_rate": 5.339377481934194e-06, "loss": 0.14846343994140626, "step": 49805 }, { "epoch": 0.43069234161399383, "grad_norm": 7.483439440414308, "learning_rate": 5.3392499318187315e-06, "loss": 0.0329010009765625, "step": 49810 }, { "epoch": 0.43073557513553706, "grad_norm": 16.69708340224177, "learning_rate": 5.339122370914854e-06, "loss": 0.2128875732421875, "step": 49815 }, { "epoch": 0.43077880865708035, "grad_norm": 16.283236890723444, "learning_rate": 5.338994799223152e-06, "loss": 0.177734375, "step": 49820 }, { "epoch": 0.43082204217862363, "grad_norm": 6.8510929391004165, "learning_rate": 5.338867216744212e-06, "loss": 0.09879302978515625, "step": 49825 }, { "epoch": 0.43086527570016686, "grad_norm": 0.3274241939635283, "learning_rate": 5.338739623478623e-06, "loss": 0.23521728515625, "step": 49830 }, { "epoch": 0.43090850922171015, "grad_norm": 11.033248084845093, "learning_rate": 5.338612019426974e-06, "loss": 0.19815025329589844, "step": 49835 }, { "epoch": 0.43095174274325343, "grad_norm": 3.7794576308550654, "learning_rate": 5.338484404589852e-06, "loss": 0.3904296875, "step": 49840 }, { "epoch": 0.43099497626479666, "grad_norm": 4.024413506745147, "learning_rate": 5.338356778967848e-06, "loss": 0.060595703125, "step": 49845 }, { "epoch": 0.43103820978633994, "grad_norm": 11.443778216975376, "learning_rate": 5.338229142561547e-06, "loss": 0.26207275390625, "step": 49850 }, { "epoch": 0.4310814433078832, "grad_norm": 2.05855985913387, "learning_rate": 5.3381014953715415e-06, "loss": 0.0989349365234375, "step": 49855 }, { "epoch": 0.43112467682942646, "grad_norm": 2.519411095373522, "learning_rate": 5.337973837398418e-06, "loss": 0.170208740234375, "step": 49860 }, { "epoch": 0.43116791035096974, "grad_norm": 0.13542901604949212, "learning_rate": 5.337846168642766e-06, "loss": 0.0457305908203125, "step": 49865 }, { "epoch": 0.43121114387251297, "grad_norm": 0.6135905679864789, "learning_rate": 5.337718489105174e-06, "loss": 0.323828125, "step": 49870 }, { "epoch": 0.43125437739405625, "grad_norm": 3.0728783512852575, "learning_rate": 5.3375907987862315e-06, "loss": 0.1854999542236328, "step": 49875 }, { "epoch": 0.43129761091559954, "grad_norm": 0.6578121426633746, "learning_rate": 5.3374630976865266e-06, "loss": 0.3281829833984375, "step": 49880 }, { "epoch": 0.43134084443714277, "grad_norm": 17.301184538642687, "learning_rate": 5.337335385806647e-06, "loss": 0.357769775390625, "step": 49885 }, { "epoch": 0.43138407795868605, "grad_norm": 37.07575029333251, "learning_rate": 5.3372076631471845e-06, "loss": 0.305035400390625, "step": 49890 }, { "epoch": 0.43142731148022934, "grad_norm": 0.2134896166571156, "learning_rate": 5.3370799297087265e-06, "loss": 0.23580322265625, "step": 49895 }, { "epoch": 0.43147054500177257, "grad_norm": 2.1194379097788474, "learning_rate": 5.3369521854918614e-06, "loss": 0.24000244140625, "step": 49900 }, { "epoch": 0.43151377852331585, "grad_norm": 36.226254160927084, "learning_rate": 5.336824430497181e-06, "loss": 0.2606838226318359, "step": 49905 }, { "epoch": 0.4315570120448591, "grad_norm": 0.7491516703468266, "learning_rate": 5.336696664725272e-06, "loss": 0.11060638427734375, "step": 49910 }, { "epoch": 0.43160024556640236, "grad_norm": 3.0422554822243058, "learning_rate": 5.336568888176724e-06, "loss": 0.17860260009765624, "step": 49915 }, { "epoch": 0.43164347908794565, "grad_norm": 4.51672314203297, "learning_rate": 5.336441100852127e-06, "loss": 0.1689361572265625, "step": 49920 }, { "epoch": 0.4316867126094889, "grad_norm": 6.4557087091919225, "learning_rate": 5.33631330275207e-06, "loss": 0.12574462890625, "step": 49925 }, { "epoch": 0.43172994613103216, "grad_norm": 17.591956295716017, "learning_rate": 5.336185493877143e-06, "loss": 0.28558349609375, "step": 49930 }, { "epoch": 0.43177317965257545, "grad_norm": 19.296798279775988, "learning_rate": 5.336057674227935e-06, "loss": 0.23720779418945312, "step": 49935 }, { "epoch": 0.4318164131741187, "grad_norm": 1.0144061287559365, "learning_rate": 5.3359298438050345e-06, "loss": 0.317803955078125, "step": 49940 }, { "epoch": 0.43185964669566196, "grad_norm": 0.5083514151469403, "learning_rate": 5.335802002609033e-06, "loss": 0.22099647521972657, "step": 49945 }, { "epoch": 0.4319028802172052, "grad_norm": 1.805312221008229, "learning_rate": 5.335674150640519e-06, "loss": 0.04253463745117188, "step": 49950 }, { "epoch": 0.43194611373874847, "grad_norm": 3.4248322138204195, "learning_rate": 5.335546287900082e-06, "loss": 0.059661865234375, "step": 49955 }, { "epoch": 0.43198934726029176, "grad_norm": 2.1817117342020356, "learning_rate": 5.335418414388311e-06, "loss": 0.2941741943359375, "step": 49960 }, { "epoch": 0.432032580781835, "grad_norm": 14.190716380133207, "learning_rate": 5.3352905301057975e-06, "loss": 0.13727645874023436, "step": 49965 }, { "epoch": 0.43207581430337827, "grad_norm": 6.765953393988783, "learning_rate": 5.33516263505313e-06, "loss": 0.29603271484375, "step": 49970 }, { "epoch": 0.43211904782492155, "grad_norm": 1.568434610891301, "learning_rate": 5.335034729230899e-06, "loss": 0.2799163818359375, "step": 49975 }, { "epoch": 0.4321622813464648, "grad_norm": 20.277559882194463, "learning_rate": 5.334906812639695e-06, "loss": 0.17987442016601562, "step": 49980 }, { "epoch": 0.43220551486800807, "grad_norm": 2.5180205863964242, "learning_rate": 5.334778885280105e-06, "loss": 0.08702239990234376, "step": 49985 }, { "epoch": 0.4322487483895513, "grad_norm": 0.39796246836328025, "learning_rate": 5.334650947152723e-06, "loss": 0.15127334594726563, "step": 49990 }, { "epoch": 0.4322919819110946, "grad_norm": 15.644506760222802, "learning_rate": 5.334522998258136e-06, "loss": 0.07010765075683593, "step": 49995 }, { "epoch": 0.43233521543263786, "grad_norm": 17.11631208206834, "learning_rate": 5.334395038596936e-06, "loss": 0.06564712524414062, "step": 50000 }, { "epoch": 0.4323784489541811, "grad_norm": 16.329105285875972, "learning_rate": 5.334267068169712e-06, "loss": 0.1686279296875, "step": 50005 }, { "epoch": 0.4324216824757244, "grad_norm": 24.6413481164837, "learning_rate": 5.334139086977054e-06, "loss": 0.37219390869140623, "step": 50010 }, { "epoch": 0.43246491599726766, "grad_norm": 0.5884518852065346, "learning_rate": 5.334011095019554e-06, "loss": 0.3291046142578125, "step": 50015 }, { "epoch": 0.4325081495188109, "grad_norm": 15.056537802741486, "learning_rate": 5.333883092297801e-06, "loss": 0.2341632843017578, "step": 50020 }, { "epoch": 0.4325513830403542, "grad_norm": 14.244199242408506, "learning_rate": 5.333755078812384e-06, "loss": 0.08370933532714844, "step": 50025 }, { "epoch": 0.43259461656189746, "grad_norm": 1.1304320911168129, "learning_rate": 5.333627054563896e-06, "loss": 0.24490966796875, "step": 50030 }, { "epoch": 0.4326378500834407, "grad_norm": 17.059283341896435, "learning_rate": 5.333499019552925e-06, "loss": 0.2090728759765625, "step": 50035 }, { "epoch": 0.432681083604984, "grad_norm": 1.1787310143088832, "learning_rate": 5.333370973780065e-06, "loss": 0.1338470458984375, "step": 50040 }, { "epoch": 0.4327243171265272, "grad_norm": 9.779612142588174, "learning_rate": 5.333242917245903e-06, "loss": 0.056499481201171875, "step": 50045 }, { "epoch": 0.4327675506480705, "grad_norm": 1.203723705228621, "learning_rate": 5.33311484995103e-06, "loss": 0.17972564697265625, "step": 50050 }, { "epoch": 0.43281078416961377, "grad_norm": 6.420472140926915, "learning_rate": 5.332986771896037e-06, "loss": 0.22974853515625, "step": 50055 }, { "epoch": 0.432854017691157, "grad_norm": 28.055596395410788, "learning_rate": 5.332858683081517e-06, "loss": 0.28475112915039064, "step": 50060 }, { "epoch": 0.4328972512127003, "grad_norm": 1.6575621570310435, "learning_rate": 5.332730583508058e-06, "loss": 0.29687652587890623, "step": 50065 }, { "epoch": 0.43294048473424357, "grad_norm": 0.6573159642912856, "learning_rate": 5.332602473176252e-06, "loss": 0.0492401123046875, "step": 50070 }, { "epoch": 0.4329837182557868, "grad_norm": 0.1895684469186362, "learning_rate": 5.332474352086689e-06, "loss": 0.04037017822265625, "step": 50075 }, { "epoch": 0.4330269517773301, "grad_norm": 1.349112729693872, "learning_rate": 5.332346220239961e-06, "loss": 0.14392776489257814, "step": 50080 }, { "epoch": 0.4330701852988733, "grad_norm": 9.46257939018291, "learning_rate": 5.3322180776366585e-06, "loss": 0.14625396728515624, "step": 50085 }, { "epoch": 0.4331134188204166, "grad_norm": 1.4585078481324545, "learning_rate": 5.3320899242773716e-06, "loss": 0.1843597412109375, "step": 50090 }, { "epoch": 0.4331566523419599, "grad_norm": 3.697680127701567, "learning_rate": 5.331961760162692e-06, "loss": 0.311248779296875, "step": 50095 }, { "epoch": 0.4331998858635031, "grad_norm": 19.97886115363688, "learning_rate": 5.331833585293212e-06, "loss": 0.07776756286621093, "step": 50100 }, { "epoch": 0.4332431193850464, "grad_norm": 41.371016690599944, "learning_rate": 5.331705399669521e-06, "loss": 0.8070411682128906, "step": 50105 }, { "epoch": 0.4332863529065897, "grad_norm": 31.168436184281187, "learning_rate": 5.33157720329221e-06, "loss": 0.43757781982421873, "step": 50110 }, { "epoch": 0.4333295864281329, "grad_norm": 2.230798709814631, "learning_rate": 5.331448996161871e-06, "loss": 0.12845306396484374, "step": 50115 }, { "epoch": 0.4333728199496762, "grad_norm": 24.677306560007807, "learning_rate": 5.3313207782790955e-06, "loss": 0.12551422119140626, "step": 50120 }, { "epoch": 0.4334160534712194, "grad_norm": 3.4749018099689013, "learning_rate": 5.331192549644475e-06, "loss": 0.1374744415283203, "step": 50125 }, { "epoch": 0.4334592869927627, "grad_norm": 3.449236832404694, "learning_rate": 5.3310643102586e-06, "loss": 0.18898468017578124, "step": 50130 }, { "epoch": 0.433502520514306, "grad_norm": 15.569421966777869, "learning_rate": 5.3309360601220625e-06, "loss": 0.07367172241210937, "step": 50135 }, { "epoch": 0.4335457540358492, "grad_norm": 9.541802868647038, "learning_rate": 5.330807799235454e-06, "loss": 0.29845733642578126, "step": 50140 }, { "epoch": 0.4335889875573925, "grad_norm": 5.1727080642004415, "learning_rate": 5.330679527599365e-06, "loss": 0.12880439758300782, "step": 50145 }, { "epoch": 0.4336322210789358, "grad_norm": 0.5001147485521618, "learning_rate": 5.3305512452143894e-06, "loss": 0.06856803894042969, "step": 50150 }, { "epoch": 0.433675454600479, "grad_norm": 6.643418086885186, "learning_rate": 5.330422952081116e-06, "loss": 0.443463134765625, "step": 50155 }, { "epoch": 0.4337186881220223, "grad_norm": 13.08768125842318, "learning_rate": 5.330294648200139e-06, "loss": 0.1722747802734375, "step": 50160 }, { "epoch": 0.43376192164356553, "grad_norm": 4.2037203601663204, "learning_rate": 5.330166333572049e-06, "loss": 0.176806640625, "step": 50165 }, { "epoch": 0.4338051551651088, "grad_norm": 28.99938394325032, "learning_rate": 5.330038008197437e-06, "loss": 0.3834075927734375, "step": 50170 }, { "epoch": 0.4338483886866521, "grad_norm": 3.44754424420312, "learning_rate": 5.329909672076895e-06, "loss": 0.43711395263671876, "step": 50175 }, { "epoch": 0.4338916222081953, "grad_norm": 7.937290335106569, "learning_rate": 5.329781325211017e-06, "loss": 0.11527099609375, "step": 50180 }, { "epoch": 0.4339348557297386, "grad_norm": 23.43178273247363, "learning_rate": 5.329652967600393e-06, "loss": 0.20913848876953126, "step": 50185 }, { "epoch": 0.4339780892512819, "grad_norm": 20.02855587022471, "learning_rate": 5.329524599245615e-06, "loss": 0.2325439453125, "step": 50190 }, { "epoch": 0.4340213227728251, "grad_norm": 19.836423366465674, "learning_rate": 5.329396220147275e-06, "loss": 0.2693813323974609, "step": 50195 }, { "epoch": 0.4340645562943684, "grad_norm": 3.0070911069049466, "learning_rate": 5.3292678303059664e-06, "loss": 0.0754119873046875, "step": 50200 }, { "epoch": 0.43410778981591164, "grad_norm": 2.2958756161569482, "learning_rate": 5.3291394297222795e-06, "loss": 0.14578857421875, "step": 50205 }, { "epoch": 0.4341510233374549, "grad_norm": 29.94556953146304, "learning_rate": 5.329011018396808e-06, "loss": 0.300653076171875, "step": 50210 }, { "epoch": 0.4341942568589982, "grad_norm": 15.033742082476573, "learning_rate": 5.328882596330143e-06, "loss": 0.07087020874023438, "step": 50215 }, { "epoch": 0.43423749038054144, "grad_norm": 15.223412363922426, "learning_rate": 5.328754163522877e-06, "loss": 0.3202484130859375, "step": 50220 }, { "epoch": 0.4342807239020847, "grad_norm": 5.072867616977507, "learning_rate": 5.328625719975603e-06, "loss": 0.441851806640625, "step": 50225 }, { "epoch": 0.434323957423628, "grad_norm": 2.4866770581146285, "learning_rate": 5.3284972656889135e-06, "loss": 0.027677154541015624, "step": 50230 }, { "epoch": 0.43436719094517123, "grad_norm": 10.886797795166071, "learning_rate": 5.3283688006634e-06, "loss": 0.03934135437011719, "step": 50235 }, { "epoch": 0.4344104244667145, "grad_norm": 7.506482992709317, "learning_rate": 5.328240324899656e-06, "loss": 0.11087646484375, "step": 50240 }, { "epoch": 0.4344536579882578, "grad_norm": 21.09474197131581, "learning_rate": 5.328111838398272e-06, "loss": 0.2819000244140625, "step": 50245 }, { "epoch": 0.43449689150980103, "grad_norm": 5.507634794707332, "learning_rate": 5.327983341159843e-06, "loss": 0.226251220703125, "step": 50250 }, { "epoch": 0.4345401250313443, "grad_norm": 5.343140301137884, "learning_rate": 5.32785483318496e-06, "loss": 0.05497722625732422, "step": 50255 }, { "epoch": 0.43458335855288754, "grad_norm": 36.94493735499508, "learning_rate": 5.327726314474216e-06, "loss": 0.36237945556640627, "step": 50260 }, { "epoch": 0.43462659207443083, "grad_norm": 3.3171369498697625, "learning_rate": 5.327597785028204e-06, "loss": 0.12277984619140625, "step": 50265 }, { "epoch": 0.4346698255959741, "grad_norm": 12.39695031620033, "learning_rate": 5.327469244847517e-06, "loss": 0.3165679931640625, "step": 50270 }, { "epoch": 0.43471305911751734, "grad_norm": 2.91391015628754, "learning_rate": 5.3273406939327475e-06, "loss": 0.07400360107421874, "step": 50275 }, { "epoch": 0.4347562926390606, "grad_norm": 2.1600327164752775, "learning_rate": 5.327212132284488e-06, "loss": 0.1022003173828125, "step": 50280 }, { "epoch": 0.4347995261606039, "grad_norm": 53.186126893143985, "learning_rate": 5.327083559903332e-06, "loss": 0.32683258056640624, "step": 50285 }, { "epoch": 0.43484275968214714, "grad_norm": 2.737156549202941, "learning_rate": 5.326954976789872e-06, "loss": 0.2912384033203125, "step": 50290 }, { "epoch": 0.4348859932036904, "grad_norm": 4.627589296149999, "learning_rate": 5.3268263829447015e-06, "loss": 0.13720855712890626, "step": 50295 }, { "epoch": 0.43492922672523365, "grad_norm": 16.175386540349173, "learning_rate": 5.326697778368414e-06, "loss": 0.166705322265625, "step": 50300 }, { "epoch": 0.43497246024677694, "grad_norm": 12.200894401677887, "learning_rate": 5.326569163061601e-06, "loss": 0.2410736083984375, "step": 50305 }, { "epoch": 0.4350156937683202, "grad_norm": 1.7240100719655824, "learning_rate": 5.3264405370248565e-06, "loss": 0.1946807861328125, "step": 50310 }, { "epoch": 0.43505892728986345, "grad_norm": 2.4288424957523613, "learning_rate": 5.326311900258774e-06, "loss": 0.12052001953125, "step": 50315 }, { "epoch": 0.43510216081140674, "grad_norm": 8.594274181810174, "learning_rate": 5.326183252763947e-06, "loss": 0.17100830078125, "step": 50320 }, { "epoch": 0.43514539433295, "grad_norm": 10.372054285370034, "learning_rate": 5.326054594540968e-06, "loss": 0.19785919189453124, "step": 50325 }, { "epoch": 0.43518862785449325, "grad_norm": 7.897044878218246, "learning_rate": 5.325925925590431e-06, "loss": 0.0972564697265625, "step": 50330 }, { "epoch": 0.43523186137603653, "grad_norm": 0.22056323328120356, "learning_rate": 5.325797245912929e-06, "loss": 0.38272705078125, "step": 50335 }, { "epoch": 0.43527509489757976, "grad_norm": 1.970437813472822, "learning_rate": 5.325668555509055e-06, "loss": 0.0688690185546875, "step": 50340 }, { "epoch": 0.43531832841912305, "grad_norm": 19.667406890242663, "learning_rate": 5.325539854379404e-06, "loss": 0.15965576171875, "step": 50345 }, { "epoch": 0.43536156194066633, "grad_norm": 5.159209403455846, "learning_rate": 5.325411142524568e-06, "loss": 0.0572662353515625, "step": 50350 }, { "epoch": 0.43540479546220956, "grad_norm": 1.8068102821955185, "learning_rate": 5.325282419945142e-06, "loss": 0.3080137252807617, "step": 50355 }, { "epoch": 0.43544802898375284, "grad_norm": 3.6923183407496776, "learning_rate": 5.325153686641719e-06, "loss": 0.07841033935546875, "step": 50360 }, { "epoch": 0.43549126250529613, "grad_norm": 48.421765611801305, "learning_rate": 5.3250249426148925e-06, "loss": 0.4046875, "step": 50365 }, { "epoch": 0.43553449602683936, "grad_norm": 1.5388920786322438, "learning_rate": 5.324896187865256e-06, "loss": 0.154888916015625, "step": 50370 }, { "epoch": 0.43557772954838264, "grad_norm": 15.159268704168822, "learning_rate": 5.324767422393404e-06, "loss": 0.13545608520507812, "step": 50375 }, { "epoch": 0.43562096306992587, "grad_norm": 7.289755016961822, "learning_rate": 5.32463864619993e-06, "loss": 0.46646728515625, "step": 50380 }, { "epoch": 0.43566419659146915, "grad_norm": 3.5311870051961955, "learning_rate": 5.324509859285428e-06, "loss": 0.14344940185546876, "step": 50385 }, { "epoch": 0.43570743011301244, "grad_norm": 0.019054116692095162, "learning_rate": 5.324381061650492e-06, "loss": 0.2743377685546875, "step": 50390 }, { "epoch": 0.43575066363455567, "grad_norm": 7.731324854850651, "learning_rate": 5.324252253295716e-06, "loss": 0.09292678833007813, "step": 50395 }, { "epoch": 0.43579389715609895, "grad_norm": 4.448048873161584, "learning_rate": 5.324123434221693e-06, "loss": 0.1250579833984375, "step": 50400 }, { "epoch": 0.43583713067764224, "grad_norm": 9.910559359039793, "learning_rate": 5.323994604429019e-06, "loss": 0.1100341796875, "step": 50405 }, { "epoch": 0.43588036419918547, "grad_norm": 3.418788133429993, "learning_rate": 5.3238657639182874e-06, "loss": 0.2791015625, "step": 50410 }, { "epoch": 0.43592359772072875, "grad_norm": 4.245830828711862, "learning_rate": 5.323736912690092e-06, "loss": 0.33397216796875, "step": 50415 }, { "epoch": 0.43596683124227203, "grad_norm": 4.6348951221245605, "learning_rate": 5.323608050745027e-06, "loss": 0.111395263671875, "step": 50420 }, { "epoch": 0.43601006476381526, "grad_norm": 28.48509119832951, "learning_rate": 5.323479178083687e-06, "loss": 0.7175994873046875, "step": 50425 }, { "epoch": 0.43605329828535855, "grad_norm": 13.849760383422003, "learning_rate": 5.323350294706667e-06, "loss": 0.222821044921875, "step": 50430 }, { "epoch": 0.4360965318069018, "grad_norm": 16.173147790196367, "learning_rate": 5.32322140061456e-06, "loss": 0.1936492919921875, "step": 50435 }, { "epoch": 0.43613976532844506, "grad_norm": 1.692493672184526, "learning_rate": 5.323092495807961e-06, "loss": 0.0586151123046875, "step": 50440 }, { "epoch": 0.43618299884998835, "grad_norm": 9.505086192339979, "learning_rate": 5.322963580287466e-06, "loss": 0.112109375, "step": 50445 }, { "epoch": 0.4362262323715316, "grad_norm": 9.698585302145984, "learning_rate": 5.322834654053668e-06, "loss": 0.16510066986083985, "step": 50450 }, { "epoch": 0.43626946589307486, "grad_norm": 2.1640677441131793, "learning_rate": 5.322705717107161e-06, "loss": 0.1670013427734375, "step": 50455 }, { "epoch": 0.43631269941461814, "grad_norm": 7.481157165779212, "learning_rate": 5.32257676944854e-06, "loss": 0.29864501953125, "step": 50460 }, { "epoch": 0.4363559329361614, "grad_norm": 10.783487750959496, "learning_rate": 5.322447811078401e-06, "loss": 0.22774505615234375, "step": 50465 }, { "epoch": 0.43639916645770466, "grad_norm": 17.35662397515103, "learning_rate": 5.322318841997338e-06, "loss": 0.25924072265625, "step": 50470 }, { "epoch": 0.4364423999792479, "grad_norm": 28.552703804419725, "learning_rate": 5.322189862205945e-06, "loss": 0.2033294677734375, "step": 50475 }, { "epoch": 0.43648563350079117, "grad_norm": 8.616931870945786, "learning_rate": 5.322060871704818e-06, "loss": 0.188720703125, "step": 50480 }, { "epoch": 0.43652886702233445, "grad_norm": 2.7945997408976475, "learning_rate": 5.321931870494552e-06, "loss": 0.15059051513671876, "step": 50485 }, { "epoch": 0.4365721005438777, "grad_norm": 15.173977049183359, "learning_rate": 5.321802858575741e-06, "loss": 0.09093017578125, "step": 50490 }, { "epoch": 0.43661533406542097, "grad_norm": 6.9178772275111, "learning_rate": 5.32167383594898e-06, "loss": 0.4247894287109375, "step": 50495 }, { "epoch": 0.43665856758696425, "grad_norm": 11.53061654979869, "learning_rate": 5.321544802614864e-06, "loss": 0.2156951904296875, "step": 50500 }, { "epoch": 0.4367018011085075, "grad_norm": 14.434449264360522, "learning_rate": 5.32141575857399e-06, "loss": 0.24637451171875, "step": 50505 }, { "epoch": 0.43674503463005077, "grad_norm": 4.292825170116138, "learning_rate": 5.3212867038269506e-06, "loss": 0.157647705078125, "step": 50510 }, { "epoch": 0.436788268151594, "grad_norm": 4.325522912804539, "learning_rate": 5.321157638374343e-06, "loss": 0.32332763671875, "step": 50515 }, { "epoch": 0.4368315016731373, "grad_norm": 4.692700903645104, "learning_rate": 5.32102856221676e-06, "loss": 0.056298828125, "step": 50520 }, { "epoch": 0.43687473519468056, "grad_norm": 4.399495176345126, "learning_rate": 5.320899475354799e-06, "loss": 0.145440673828125, "step": 50525 }, { "epoch": 0.4369179687162238, "grad_norm": 1.5888412997892334, "learning_rate": 5.320770377789055e-06, "loss": 0.0970458984375, "step": 50530 }, { "epoch": 0.4369612022377671, "grad_norm": 3.9160761829536104, "learning_rate": 5.320641269520122e-06, "loss": 0.21341552734375, "step": 50535 }, { "epoch": 0.43700443575931036, "grad_norm": 23.39307523196371, "learning_rate": 5.320512150548597e-06, "loss": 0.12904815673828124, "step": 50540 }, { "epoch": 0.4370476692808536, "grad_norm": 45.881182708137, "learning_rate": 5.320383020875075e-06, "loss": 0.2846954345703125, "step": 50545 }, { "epoch": 0.4370909028023969, "grad_norm": 32.9581087004743, "learning_rate": 5.320253880500153e-06, "loss": 0.23775177001953124, "step": 50550 }, { "epoch": 0.4371341363239401, "grad_norm": 2.413736258157776, "learning_rate": 5.320124729424424e-06, "loss": 0.21507110595703124, "step": 50555 }, { "epoch": 0.4371773698454834, "grad_norm": 23.930486740446536, "learning_rate": 5.319995567648485e-06, "loss": 0.20511093139648437, "step": 50560 }, { "epoch": 0.4372206033670267, "grad_norm": 41.35073871095312, "learning_rate": 5.319866395172931e-06, "loss": 0.343408203125, "step": 50565 }, { "epoch": 0.4372638368885699, "grad_norm": 1.6358120979960489, "learning_rate": 5.319737211998358e-06, "loss": 0.04457244873046875, "step": 50570 }, { "epoch": 0.4373070704101132, "grad_norm": 6.335921735336692, "learning_rate": 5.3196080181253625e-06, "loss": 0.0735076904296875, "step": 50575 }, { "epoch": 0.43735030393165647, "grad_norm": 5.611028451939467, "learning_rate": 5.3194788135545395e-06, "loss": 0.07164649963378907, "step": 50580 }, { "epoch": 0.4373935374531997, "grad_norm": 0.7820586393226028, "learning_rate": 5.319349598286486e-06, "loss": 0.1562713623046875, "step": 50585 }, { "epoch": 0.437436770974743, "grad_norm": 5.860899633505042, "learning_rate": 5.319220372321795e-06, "loss": 0.1776031494140625, "step": 50590 }, { "epoch": 0.43748000449628627, "grad_norm": 12.959754427098188, "learning_rate": 5.319091135661066e-06, "loss": 0.158026123046875, "step": 50595 }, { "epoch": 0.4375232380178295, "grad_norm": 0.6389695034074444, "learning_rate": 5.318961888304894e-06, "loss": 0.11138801574707032, "step": 50600 }, { "epoch": 0.4375664715393728, "grad_norm": 13.40783253573087, "learning_rate": 5.318832630253874e-06, "loss": 0.2234405517578125, "step": 50605 }, { "epoch": 0.437609705060916, "grad_norm": 4.0670279170334975, "learning_rate": 5.318703361508604e-06, "loss": 0.292822265625, "step": 50610 }, { "epoch": 0.4376529385824593, "grad_norm": 2.704187035273917, "learning_rate": 5.318574082069677e-06, "loss": 0.04290351867675781, "step": 50615 }, { "epoch": 0.4376961721040026, "grad_norm": 8.063293593609096, "learning_rate": 5.318444791937693e-06, "loss": 0.0677642822265625, "step": 50620 }, { "epoch": 0.4377394056255458, "grad_norm": 74.90984610741535, "learning_rate": 5.318315491113246e-06, "loss": 0.561669921875, "step": 50625 }, { "epoch": 0.4377826391470891, "grad_norm": 3.519950121864673, "learning_rate": 5.318186179596932e-06, "loss": 0.5324066162109375, "step": 50630 }, { "epoch": 0.4378258726686324, "grad_norm": 25.361559320039504, "learning_rate": 5.3180568573893485e-06, "loss": 0.12073135375976562, "step": 50635 }, { "epoch": 0.4378691061901756, "grad_norm": 1.1584188024942412, "learning_rate": 5.317927524491092e-06, "loss": 0.13606719970703124, "step": 50640 }, { "epoch": 0.4379123397117189, "grad_norm": 12.13571267724085, "learning_rate": 5.317798180902758e-06, "loss": 0.12872390747070311, "step": 50645 }, { "epoch": 0.4379555732332621, "grad_norm": 2.033841060618434, "learning_rate": 5.3176688266249444e-06, "loss": 0.059993743896484375, "step": 50650 }, { "epoch": 0.4379988067548054, "grad_norm": 31.696245405482493, "learning_rate": 5.3175394616582465e-06, "loss": 0.199658203125, "step": 50655 }, { "epoch": 0.4380420402763487, "grad_norm": 15.843191429195734, "learning_rate": 5.3174100860032616e-06, "loss": 0.16993751525878906, "step": 50660 }, { "epoch": 0.4380852737978919, "grad_norm": 1.6567408998557343, "learning_rate": 5.317280699660585e-06, "loss": 0.1144866943359375, "step": 50665 }, { "epoch": 0.4381285073194352, "grad_norm": 0.06507300173604272, "learning_rate": 5.317151302630816e-06, "loss": 0.09998931884765624, "step": 50670 }, { "epoch": 0.4381717408409785, "grad_norm": 1.0802207300985562, "learning_rate": 5.317021894914549e-06, "loss": 0.04766845703125, "step": 50675 }, { "epoch": 0.4382149743625217, "grad_norm": 11.272189030815245, "learning_rate": 5.316892476512383e-06, "loss": 0.13753776550292968, "step": 50680 }, { "epoch": 0.438258207884065, "grad_norm": 5.365732463966222, "learning_rate": 5.316763047424913e-06, "loss": 0.463922119140625, "step": 50685 }, { "epoch": 0.4383014414056082, "grad_norm": 12.050068603893646, "learning_rate": 5.316633607652736e-06, "loss": 0.21111831665039063, "step": 50690 }, { "epoch": 0.4383446749271515, "grad_norm": 9.012260489305756, "learning_rate": 5.31650415719645e-06, "loss": 0.18983688354492187, "step": 50695 }, { "epoch": 0.4383879084486948, "grad_norm": 4.2126778307989206, "learning_rate": 5.316374696056651e-06, "loss": 0.722900390625, "step": 50700 }, { "epoch": 0.438431141970238, "grad_norm": 4.431763346086748, "learning_rate": 5.3162452242339375e-06, "loss": 0.0929931640625, "step": 50705 }, { "epoch": 0.4384743754917813, "grad_norm": 4.796084996160768, "learning_rate": 5.316115741728905e-06, "loss": 0.099700927734375, "step": 50710 }, { "epoch": 0.4385176090133246, "grad_norm": 2.4696412958972003, "learning_rate": 5.315986248542151e-06, "loss": 0.12231597900390626, "step": 50715 }, { "epoch": 0.4385608425348678, "grad_norm": 27.97594538041676, "learning_rate": 5.315856744674274e-06, "loss": 0.138494873046875, "step": 50720 }, { "epoch": 0.4386040760564111, "grad_norm": 0.9219352010264996, "learning_rate": 5.3157272301258705e-06, "loss": 0.10689315795898438, "step": 50725 }, { "epoch": 0.43864730957795434, "grad_norm": 13.302396119533777, "learning_rate": 5.315597704897537e-06, "loss": 0.4214984893798828, "step": 50730 }, { "epoch": 0.4386905430994976, "grad_norm": 22.508756510167984, "learning_rate": 5.315468168989871e-06, "loss": 0.2258575439453125, "step": 50735 }, { "epoch": 0.4387337766210409, "grad_norm": 0.43705479684622545, "learning_rate": 5.315338622403471e-06, "loss": 0.080499267578125, "step": 50740 }, { "epoch": 0.43877701014258413, "grad_norm": 7.571428876035522, "learning_rate": 5.3152090651389334e-06, "loss": 0.3026123046875, "step": 50745 }, { "epoch": 0.4388202436641274, "grad_norm": 0.7429247378494237, "learning_rate": 5.315079497196857e-06, "loss": 0.10843963623046875, "step": 50750 }, { "epoch": 0.4388634771856707, "grad_norm": 2.7184430621576916, "learning_rate": 5.314949918577838e-06, "loss": 0.079498291015625, "step": 50755 }, { "epoch": 0.43890671070721393, "grad_norm": 14.203314616757686, "learning_rate": 5.314820329282474e-06, "loss": 0.07061614990234374, "step": 50760 }, { "epoch": 0.4389499442287572, "grad_norm": 0.08793088339289515, "learning_rate": 5.314690729311365e-06, "loss": 0.32965888977050783, "step": 50765 }, { "epoch": 0.4389931777503005, "grad_norm": 2.7142341552908293, "learning_rate": 5.314561118665105e-06, "loss": 0.06386642456054688, "step": 50770 }, { "epoch": 0.43903641127184373, "grad_norm": 7.586248088140446, "learning_rate": 5.314431497344295e-06, "loss": 0.20377578735351562, "step": 50775 }, { "epoch": 0.439079644793387, "grad_norm": 3.616536663541019, "learning_rate": 5.31430186534953e-06, "loss": 0.053022003173828124, "step": 50780 }, { "epoch": 0.43912287831493024, "grad_norm": 18.088407799231486, "learning_rate": 5.31417222268141e-06, "loss": 0.46569976806640623, "step": 50785 }, { "epoch": 0.4391661118364735, "grad_norm": 19.818968119229176, "learning_rate": 5.314042569340533e-06, "loss": 0.16675949096679688, "step": 50790 }, { "epoch": 0.4392093453580168, "grad_norm": 1.7681924742742112, "learning_rate": 5.313912905327495e-06, "loss": 0.0396270751953125, "step": 50795 }, { "epoch": 0.43925257887956004, "grad_norm": 10.242890837967845, "learning_rate": 5.313783230642896e-06, "loss": 0.11188278198242188, "step": 50800 }, { "epoch": 0.4392958124011033, "grad_norm": 2.3529140204799153, "learning_rate": 5.313653545287332e-06, "loss": 0.42287750244140626, "step": 50805 }, { "epoch": 0.4393390459226466, "grad_norm": 1.7925272586833951, "learning_rate": 5.313523849261403e-06, "loss": 0.0968841552734375, "step": 50810 }, { "epoch": 0.43938227944418984, "grad_norm": 6.077870280736835, "learning_rate": 5.313394142565707e-06, "loss": 0.1153329849243164, "step": 50815 }, { "epoch": 0.4394255129657331, "grad_norm": 34.35869147891601, "learning_rate": 5.3132644252008404e-06, "loss": 0.15479793548583984, "step": 50820 }, { "epoch": 0.43946874648727635, "grad_norm": 0.18996973419629845, "learning_rate": 5.313134697167403e-06, "loss": 0.13140716552734374, "step": 50825 }, { "epoch": 0.43951198000881964, "grad_norm": 1.983443145317102, "learning_rate": 5.313004958465994e-06, "loss": 0.13344268798828124, "step": 50830 }, { "epoch": 0.4395552135303629, "grad_norm": 0.581197727122752, "learning_rate": 5.31287520909721e-06, "loss": 0.1320465087890625, "step": 50835 }, { "epoch": 0.43959844705190615, "grad_norm": 6.196289622550735, "learning_rate": 5.31274544906165e-06, "loss": 0.18962554931640624, "step": 50840 }, { "epoch": 0.43964168057344943, "grad_norm": 4.370497936847818, "learning_rate": 5.312615678359911e-06, "loss": 0.23608245849609374, "step": 50845 }, { "epoch": 0.4396849140949927, "grad_norm": 13.041780661600379, "learning_rate": 5.312485896992593e-06, "loss": 0.0866058349609375, "step": 50850 }, { "epoch": 0.43972814761653595, "grad_norm": 3.120766872277138, "learning_rate": 5.312356104960296e-06, "loss": 0.1169921875, "step": 50855 }, { "epoch": 0.43977138113807923, "grad_norm": 0.27130788787107885, "learning_rate": 5.312226302263616e-06, "loss": 0.03269500732421875, "step": 50860 }, { "epoch": 0.43981461465962246, "grad_norm": 2.267383509568558, "learning_rate": 5.312096488903153e-06, "loss": 0.08865814208984375, "step": 50865 }, { "epoch": 0.43985784818116574, "grad_norm": 7.124724971512196, "learning_rate": 5.311966664879504e-06, "loss": 0.2683601379394531, "step": 50870 }, { "epoch": 0.43990108170270903, "grad_norm": 1.870396600995566, "learning_rate": 5.3118368301932706e-06, "loss": 0.1708667755126953, "step": 50875 }, { "epoch": 0.43994431522425226, "grad_norm": 22.265273542597985, "learning_rate": 5.3117069848450494e-06, "loss": 0.24776458740234375, "step": 50880 }, { "epoch": 0.43998754874579554, "grad_norm": 27.27290737907795, "learning_rate": 5.31157712883544e-06, "loss": 0.42824592590332033, "step": 50885 }, { "epoch": 0.4400307822673388, "grad_norm": 16.105678796008977, "learning_rate": 5.31144726216504e-06, "loss": 0.0764068603515625, "step": 50890 }, { "epoch": 0.44007401578888206, "grad_norm": 19.437055476523657, "learning_rate": 5.311317384834452e-06, "loss": 0.28353271484375, "step": 50895 }, { "epoch": 0.44011724931042534, "grad_norm": 2.4206133748014627, "learning_rate": 5.31118749684427e-06, "loss": 0.05573616027832031, "step": 50900 }, { "epoch": 0.44016048283196857, "grad_norm": 0.5329130660418734, "learning_rate": 5.311057598195096e-06, "loss": 0.44871063232421876, "step": 50905 }, { "epoch": 0.44020371635351185, "grad_norm": 0.7872578563504905, "learning_rate": 5.3109276888875295e-06, "loss": 0.266864013671875, "step": 50910 }, { "epoch": 0.44024694987505514, "grad_norm": 12.427024382509602, "learning_rate": 5.3107977689221686e-06, "loss": 0.170361328125, "step": 50915 }, { "epoch": 0.44029018339659837, "grad_norm": 3.140538478278267, "learning_rate": 5.310667838299612e-06, "loss": 0.1795318603515625, "step": 50920 }, { "epoch": 0.44033341691814165, "grad_norm": 2.1395899038917268, "learning_rate": 5.31053789702046e-06, "loss": 0.17689895629882812, "step": 50925 }, { "epoch": 0.44037665043968494, "grad_norm": 6.538908862627098, "learning_rate": 5.31040794508531e-06, "loss": 0.2477203369140625, "step": 50930 }, { "epoch": 0.44041988396122816, "grad_norm": 6.449224920808832, "learning_rate": 5.310277982494764e-06, "loss": 0.48250732421875, "step": 50935 }, { "epoch": 0.44046311748277145, "grad_norm": 21.55867434247172, "learning_rate": 5.310148009249419e-06, "loss": 0.6079833984375, "step": 50940 }, { "epoch": 0.4405063510043147, "grad_norm": 15.916474061176146, "learning_rate": 5.310018025349877e-06, "loss": 0.23040390014648438, "step": 50945 }, { "epoch": 0.44054958452585796, "grad_norm": 73.15011204828177, "learning_rate": 5.309888030796736e-06, "loss": 0.632025146484375, "step": 50950 }, { "epoch": 0.44059281804740125, "grad_norm": 8.997796021078925, "learning_rate": 5.309758025590595e-06, "loss": 0.06751708984375, "step": 50955 }, { "epoch": 0.4406360515689445, "grad_norm": 29.871982403390632, "learning_rate": 5.3096280097320535e-06, "loss": 0.16984786987304687, "step": 50960 }, { "epoch": 0.44067928509048776, "grad_norm": 16.18630865641065, "learning_rate": 5.309497983221712e-06, "loss": 0.40474853515625, "step": 50965 }, { "epoch": 0.44072251861203104, "grad_norm": 9.70507931976622, "learning_rate": 5.30936794606017e-06, "loss": 0.3509796142578125, "step": 50970 }, { "epoch": 0.4407657521335743, "grad_norm": 8.024828888692673, "learning_rate": 5.3092378982480265e-06, "loss": 0.1780242919921875, "step": 50975 }, { "epoch": 0.44080898565511756, "grad_norm": 7.253293287072224, "learning_rate": 5.309107839785883e-06, "loss": 0.4049388885498047, "step": 50980 }, { "epoch": 0.44085221917666084, "grad_norm": 0.6686394408399562, "learning_rate": 5.308977770674337e-06, "loss": 0.31998138427734374, "step": 50985 }, { "epoch": 0.44089545269820407, "grad_norm": 5.059077145524216, "learning_rate": 5.30884769091399e-06, "loss": 0.42063217163085936, "step": 50990 }, { "epoch": 0.44093868621974736, "grad_norm": 7.134241392448302, "learning_rate": 5.308717600505442e-06, "loss": 0.14644737243652345, "step": 50995 }, { "epoch": 0.4409819197412906, "grad_norm": 37.615062746422495, "learning_rate": 5.3085874994492925e-06, "loss": 0.18441619873046874, "step": 51000 }, { "epoch": 0.44102515326283387, "grad_norm": 37.182447531513134, "learning_rate": 5.308457387746141e-06, "loss": 0.37697563171386717, "step": 51005 }, { "epoch": 0.44106838678437715, "grad_norm": 0.5020365417661562, "learning_rate": 5.308327265396587e-06, "loss": 0.083551025390625, "step": 51010 }, { "epoch": 0.4411116203059204, "grad_norm": 0.8969209448142316, "learning_rate": 5.308197132401233e-06, "loss": 0.07761516571044921, "step": 51015 }, { "epoch": 0.44115485382746367, "grad_norm": 3.280551626002569, "learning_rate": 5.308066988760677e-06, "loss": 0.061229705810546875, "step": 51020 }, { "epoch": 0.44119808734900695, "grad_norm": 0.5056841260103293, "learning_rate": 5.307936834475522e-06, "loss": 0.326617431640625, "step": 51025 }, { "epoch": 0.4412413208705502, "grad_norm": 16.63873637293999, "learning_rate": 5.307806669546364e-06, "loss": 0.30654296875, "step": 51030 }, { "epoch": 0.44128455439209346, "grad_norm": 8.283826542803855, "learning_rate": 5.307676493973806e-06, "loss": 0.09117202758789063, "step": 51035 }, { "epoch": 0.4413277879136367, "grad_norm": 2.636895080320321, "learning_rate": 5.307546307758448e-06, "loss": 0.19113426208496093, "step": 51040 }, { "epoch": 0.44137102143518, "grad_norm": 0.3361272473845844, "learning_rate": 5.30741611090089e-06, "loss": 0.1217041015625, "step": 51045 }, { "epoch": 0.44141425495672326, "grad_norm": 1.6890806273948054, "learning_rate": 5.3072859034017335e-06, "loss": 0.38533172607421873, "step": 51050 }, { "epoch": 0.4414574884782665, "grad_norm": 6.355632534245113, "learning_rate": 5.307155685261578e-06, "loss": 0.07388687133789062, "step": 51055 }, { "epoch": 0.4415007219998098, "grad_norm": 43.96704395001065, "learning_rate": 5.307025456481024e-06, "loss": 0.24201736450195313, "step": 51060 }, { "epoch": 0.44154395552135306, "grad_norm": 4.78206318096377, "learning_rate": 5.306895217060674e-06, "loss": 0.10308799743652344, "step": 51065 }, { "epoch": 0.4415871890428963, "grad_norm": 0.5062048166161857, "learning_rate": 5.306764967001125e-06, "loss": 0.016801834106445312, "step": 51070 }, { "epoch": 0.4416304225644396, "grad_norm": 11.031333980315736, "learning_rate": 5.306634706302981e-06, "loss": 0.05809783935546875, "step": 51075 }, { "epoch": 0.4416736560859828, "grad_norm": 13.166804776624549, "learning_rate": 5.306504434966841e-06, "loss": 0.12495269775390624, "step": 51080 }, { "epoch": 0.4417168896075261, "grad_norm": 17.251687590597523, "learning_rate": 5.306374152993307e-06, "loss": 0.2839202880859375, "step": 51085 }, { "epoch": 0.44176012312906937, "grad_norm": 0.6297572801095191, "learning_rate": 5.306243860382978e-06, "loss": 0.07535858154296875, "step": 51090 }, { "epoch": 0.4418033566506126, "grad_norm": 2.291916781758061, "learning_rate": 5.306113557136458e-06, "loss": 0.2573066711425781, "step": 51095 }, { "epoch": 0.4418465901721559, "grad_norm": 19.59763429491133, "learning_rate": 5.3059832432543445e-06, "loss": 0.1966278076171875, "step": 51100 }, { "epoch": 0.44188982369369917, "grad_norm": 27.30672773549893, "learning_rate": 5.3058529187372405e-06, "loss": 0.2265380859375, "step": 51105 }, { "epoch": 0.4419330572152424, "grad_norm": 2.2299847475606835, "learning_rate": 5.305722583585747e-06, "loss": 0.04725341796875, "step": 51110 }, { "epoch": 0.4419762907367857, "grad_norm": 2.398129129549824, "learning_rate": 5.305592237800463e-06, "loss": 0.25234832763671877, "step": 51115 }, { "epoch": 0.4420195242583289, "grad_norm": 2.1031302423077953, "learning_rate": 5.305461881381994e-06, "loss": 0.16078338623046876, "step": 51120 }, { "epoch": 0.4420627577798722, "grad_norm": 4.290460707983218, "learning_rate": 5.305331514330936e-06, "loss": 0.05900421142578125, "step": 51125 }, { "epoch": 0.4421059913014155, "grad_norm": 0.8849419753650044, "learning_rate": 5.305201136647894e-06, "loss": 0.04004974365234375, "step": 51130 }, { "epoch": 0.4421492248229587, "grad_norm": 6.051344038470659, "learning_rate": 5.305070748333468e-06, "loss": 0.0945709228515625, "step": 51135 }, { "epoch": 0.442192458344502, "grad_norm": 11.776377187895841, "learning_rate": 5.304940349388259e-06, "loss": 0.2165313720703125, "step": 51140 }, { "epoch": 0.4422356918660453, "grad_norm": 32.22475912732325, "learning_rate": 5.30480993981287e-06, "loss": 0.33037567138671875, "step": 51145 }, { "epoch": 0.4422789253875885, "grad_norm": 3.3845415164254744, "learning_rate": 5.3046795196079e-06, "loss": 0.10305023193359375, "step": 51150 }, { "epoch": 0.4423221589091318, "grad_norm": 0.702298120057108, "learning_rate": 5.304549088773951e-06, "loss": 0.18818435668945313, "step": 51155 }, { "epoch": 0.4423653924306751, "grad_norm": 44.1214164074273, "learning_rate": 5.304418647311627e-06, "loss": 0.286395263671875, "step": 51160 }, { "epoch": 0.4424086259522183, "grad_norm": 5.059534693963146, "learning_rate": 5.304288195221527e-06, "loss": 0.03648300170898437, "step": 51165 }, { "epoch": 0.4424518594737616, "grad_norm": 10.515432393397322, "learning_rate": 5.3041577325042535e-06, "loss": 0.28095703125, "step": 51170 }, { "epoch": 0.4424950929953048, "grad_norm": 0.4707047819747574, "learning_rate": 5.304027259160409e-06, "loss": 0.16674156188964845, "step": 51175 }, { "epoch": 0.4425383265168481, "grad_norm": 11.826687648220785, "learning_rate": 5.303896775190593e-06, "loss": 0.06574745178222656, "step": 51180 }, { "epoch": 0.4425815600383914, "grad_norm": 9.339281095176785, "learning_rate": 5.303766280595409e-06, "loss": 0.1713470458984375, "step": 51185 }, { "epoch": 0.4426247935599346, "grad_norm": 1.056078355971625, "learning_rate": 5.3036357753754586e-06, "loss": 0.038124370574951175, "step": 51190 }, { "epoch": 0.4426680270814779, "grad_norm": 0.4796617175573511, "learning_rate": 5.303505259531344e-06, "loss": 0.22856292724609376, "step": 51195 }, { "epoch": 0.4427112606030212, "grad_norm": 53.70384482182128, "learning_rate": 5.303374733063666e-06, "loss": 0.3397979736328125, "step": 51200 }, { "epoch": 0.4427544941245644, "grad_norm": 8.320791006872378, "learning_rate": 5.303244195973027e-06, "loss": 0.222113037109375, "step": 51205 }, { "epoch": 0.4427977276461077, "grad_norm": 19.28605313032439, "learning_rate": 5.3031136482600294e-06, "loss": 0.14373817443847656, "step": 51210 }, { "epoch": 0.4428409611676509, "grad_norm": 13.267362403088635, "learning_rate": 5.302983089925277e-06, "loss": 0.11071319580078125, "step": 51215 }, { "epoch": 0.4428841946891942, "grad_norm": 7.35107761950081, "learning_rate": 5.302852520969368e-06, "loss": 0.173028564453125, "step": 51220 }, { "epoch": 0.4429274282107375, "grad_norm": 14.974267865088962, "learning_rate": 5.302721941392907e-06, "loss": 0.13161163330078124, "step": 51225 }, { "epoch": 0.4429706617322807, "grad_norm": 27.036335120460667, "learning_rate": 5.302591351196496e-06, "loss": 0.11909866333007812, "step": 51230 }, { "epoch": 0.443013895253824, "grad_norm": 12.724402931670616, "learning_rate": 5.302460750380738e-06, "loss": 0.1982940673828125, "step": 51235 }, { "epoch": 0.4430571287753673, "grad_norm": 0.4905116467581636, "learning_rate": 5.302330138946233e-06, "loss": 0.31932220458984373, "step": 51240 }, { "epoch": 0.4431003622969105, "grad_norm": 61.82271555073896, "learning_rate": 5.302199516893586e-06, "loss": 0.4447021484375, "step": 51245 }, { "epoch": 0.4431435958184538, "grad_norm": 20.9034564671512, "learning_rate": 5.302068884223398e-06, "loss": 0.2771446228027344, "step": 51250 }, { "epoch": 0.44318682933999703, "grad_norm": 14.633956087065524, "learning_rate": 5.301938240936271e-06, "loss": 0.1411865234375, "step": 51255 }, { "epoch": 0.4432300628615403, "grad_norm": 7.152975923824578, "learning_rate": 5.30180758703281e-06, "loss": 0.09401779174804688, "step": 51260 }, { "epoch": 0.4432732963830836, "grad_norm": 2.022455925190351, "learning_rate": 5.301676922513614e-06, "loss": 0.06562652587890624, "step": 51265 }, { "epoch": 0.44331652990462683, "grad_norm": 0.39166424113440435, "learning_rate": 5.301546247379288e-06, "loss": 0.01771240234375, "step": 51270 }, { "epoch": 0.4433597634261701, "grad_norm": 1.1874487670148302, "learning_rate": 5.3014155616304345e-06, "loss": 0.27802581787109376, "step": 51275 }, { "epoch": 0.4434029969477134, "grad_norm": 14.472657849663017, "learning_rate": 5.301284865267655e-06, "loss": 0.14030532836914061, "step": 51280 }, { "epoch": 0.44344623046925663, "grad_norm": 4.882973733400918, "learning_rate": 5.301154158291553e-06, "loss": 0.0362091064453125, "step": 51285 }, { "epoch": 0.4434894639907999, "grad_norm": 3.6151905000646485, "learning_rate": 5.301023440702732e-06, "loss": 0.3492450714111328, "step": 51290 }, { "epoch": 0.44353269751234314, "grad_norm": 3.068768103414985, "learning_rate": 5.300892712501794e-06, "loss": 0.21491622924804688, "step": 51295 }, { "epoch": 0.44357593103388643, "grad_norm": 3.1688137757289194, "learning_rate": 5.300761973689342e-06, "loss": 0.082720947265625, "step": 51300 }, { "epoch": 0.4436191645554297, "grad_norm": 21.127463355667363, "learning_rate": 5.30063122426598e-06, "loss": 0.36368255615234374, "step": 51305 }, { "epoch": 0.44366239807697294, "grad_norm": 0.44263996848309034, "learning_rate": 5.300500464232309e-06, "loss": 0.1103912353515625, "step": 51310 }, { "epoch": 0.4437056315985162, "grad_norm": 7.489850092941041, "learning_rate": 5.300369693588934e-06, "loss": 0.077203369140625, "step": 51315 }, { "epoch": 0.4437488651200595, "grad_norm": 25.325269658404835, "learning_rate": 5.300238912336456e-06, "loss": 0.22301788330078126, "step": 51320 }, { "epoch": 0.44379209864160274, "grad_norm": 1.3462868086375015, "learning_rate": 5.30010812047548e-06, "loss": 0.17993392944335937, "step": 51325 }, { "epoch": 0.443835332163146, "grad_norm": 19.848336516823068, "learning_rate": 5.299977318006609e-06, "loss": 0.33089141845703124, "step": 51330 }, { "epoch": 0.4438785656846893, "grad_norm": 22.018854296149712, "learning_rate": 5.299846504930445e-06, "loss": 0.4886589050292969, "step": 51335 }, { "epoch": 0.44392179920623254, "grad_norm": 1.1932524042871684, "learning_rate": 5.299715681247593e-06, "loss": 0.32379302978515623, "step": 51340 }, { "epoch": 0.4439650327277758, "grad_norm": 10.04167639406654, "learning_rate": 5.299584846958655e-06, "loss": 0.1892578125, "step": 51345 }, { "epoch": 0.44400826624931905, "grad_norm": 8.903941896608766, "learning_rate": 5.299454002064235e-06, "loss": 0.12322235107421875, "step": 51350 }, { "epoch": 0.44405149977086233, "grad_norm": 2.5257755264581947, "learning_rate": 5.299323146564936e-06, "loss": 0.30629425048828124, "step": 51355 }, { "epoch": 0.4440947332924056, "grad_norm": 0.6825936185455448, "learning_rate": 5.299192280461363e-06, "loss": 0.03407745361328125, "step": 51360 }, { "epoch": 0.44413796681394885, "grad_norm": 25.949702960042597, "learning_rate": 5.299061403754117e-06, "loss": 0.22379188537597655, "step": 51365 }, { "epoch": 0.44418120033549213, "grad_norm": 10.217422635554385, "learning_rate": 5.298930516443803e-06, "loss": 0.11193714141845704, "step": 51370 }, { "epoch": 0.4442244338570354, "grad_norm": 45.42320317669624, "learning_rate": 5.298799618531025e-06, "loss": 0.21117181777954103, "step": 51375 }, { "epoch": 0.44426766737857865, "grad_norm": 6.298176454230298, "learning_rate": 5.2986687100163875e-06, "loss": 0.04349517822265625, "step": 51380 }, { "epoch": 0.44431090090012193, "grad_norm": 34.01685341362304, "learning_rate": 5.298537790900491e-06, "loss": 0.16528778076171874, "step": 51385 }, { "epoch": 0.44435413442166516, "grad_norm": 8.334510460988255, "learning_rate": 5.298406861183943e-06, "loss": 0.184393310546875, "step": 51390 }, { "epoch": 0.44439736794320844, "grad_norm": 19.625662551393656, "learning_rate": 5.2982759208673444e-06, "loss": 0.2308380126953125, "step": 51395 }, { "epoch": 0.44444060146475173, "grad_norm": 7.84507811628457, "learning_rate": 5.298144969951301e-06, "loss": 0.09400558471679688, "step": 51400 }, { "epoch": 0.44448383498629496, "grad_norm": 5.838289286283415, "learning_rate": 5.298014008436416e-06, "loss": 0.30460052490234374, "step": 51405 }, { "epoch": 0.44452706850783824, "grad_norm": 17.755209272729562, "learning_rate": 5.2978830363232926e-06, "loss": 0.43386383056640626, "step": 51410 }, { "epoch": 0.4445703020293815, "grad_norm": 6.1204994357932465, "learning_rate": 5.297752053612537e-06, "loss": 0.0580078125, "step": 51415 }, { "epoch": 0.44461353555092475, "grad_norm": 0.19203558639782176, "learning_rate": 5.297621060304751e-06, "loss": 0.13881607055664064, "step": 51420 }, { "epoch": 0.44465676907246804, "grad_norm": 5.959725991453586, "learning_rate": 5.29749005640054e-06, "loss": 0.27552146911621095, "step": 51425 }, { "epoch": 0.44470000259401127, "grad_norm": 1.8154718776290584, "learning_rate": 5.297359041900508e-06, "loss": 0.13143463134765626, "step": 51430 }, { "epoch": 0.44474323611555455, "grad_norm": 1.4797843444230048, "learning_rate": 5.297228016805259e-06, "loss": 0.12205810546875, "step": 51435 }, { "epoch": 0.44478646963709784, "grad_norm": 10.466761874752045, "learning_rate": 5.297096981115398e-06, "loss": 0.32126197814941404, "step": 51440 }, { "epoch": 0.44482970315864107, "grad_norm": 20.9093863095009, "learning_rate": 5.296965934831528e-06, "loss": 0.08914566040039062, "step": 51445 }, { "epoch": 0.44487293668018435, "grad_norm": 28.84059372334747, "learning_rate": 5.296834877954253e-06, "loss": 0.289447021484375, "step": 51450 }, { "epoch": 0.44491617020172763, "grad_norm": 51.73396766860297, "learning_rate": 5.29670381048418e-06, "loss": 0.3655670166015625, "step": 51455 }, { "epoch": 0.44495940372327086, "grad_norm": 21.89490390161203, "learning_rate": 5.296572732421911e-06, "loss": 0.23726806640625, "step": 51460 }, { "epoch": 0.44500263724481415, "grad_norm": 4.9796969925762085, "learning_rate": 5.296441643768053e-06, "loss": 0.11979217529296875, "step": 51465 }, { "epoch": 0.4450458707663574, "grad_norm": 7.633254724784863, "learning_rate": 5.296310544523208e-06, "loss": 0.0685455322265625, "step": 51470 }, { "epoch": 0.44508910428790066, "grad_norm": 4.533118260031235, "learning_rate": 5.296179434687982e-06, "loss": 0.23627548217773436, "step": 51475 }, { "epoch": 0.44513233780944395, "grad_norm": 6.38979819389903, "learning_rate": 5.296048314262979e-06, "loss": 0.12304840087890626, "step": 51480 }, { "epoch": 0.4451755713309872, "grad_norm": 12.885066302624708, "learning_rate": 5.295917183248804e-06, "loss": 0.071514892578125, "step": 51485 }, { "epoch": 0.44521880485253046, "grad_norm": 4.876629851755953, "learning_rate": 5.2957860416460615e-06, "loss": 0.143359375, "step": 51490 }, { "epoch": 0.44526203837407374, "grad_norm": 16.376385564894797, "learning_rate": 5.295654889455357e-06, "loss": 0.14546356201171876, "step": 51495 }, { "epoch": 0.44530527189561697, "grad_norm": 17.41524140827338, "learning_rate": 5.295523726677296e-06, "loss": 0.2609870910644531, "step": 51500 }, { "epoch": 0.44534850541716026, "grad_norm": 1.9707902127478518, "learning_rate": 5.295392553312481e-06, "loss": 0.2588615417480469, "step": 51505 }, { "epoch": 0.44539173893870354, "grad_norm": 3.3653144183036305, "learning_rate": 5.2952613693615195e-06, "loss": 0.3116935729980469, "step": 51510 }, { "epoch": 0.44543497246024677, "grad_norm": 2.026222628512586, "learning_rate": 5.295130174825014e-06, "loss": 0.18980865478515624, "step": 51515 }, { "epoch": 0.44547820598179005, "grad_norm": 0.9820564352841479, "learning_rate": 5.294998969703572e-06, "loss": 0.1171356201171875, "step": 51520 }, { "epoch": 0.4455214395033333, "grad_norm": 8.555430992066192, "learning_rate": 5.294867753997797e-06, "loss": 0.29292755126953124, "step": 51525 }, { "epoch": 0.44556467302487657, "grad_norm": 10.302286643871422, "learning_rate": 5.294736527708295e-06, "loss": 0.47065582275390627, "step": 51530 }, { "epoch": 0.44560790654641985, "grad_norm": 11.051632271752018, "learning_rate": 5.294605290835671e-06, "loss": 0.220880126953125, "step": 51535 }, { "epoch": 0.4456511400679631, "grad_norm": 1.0451071763061548, "learning_rate": 5.29447404338053e-06, "loss": 0.03289031982421875, "step": 51540 }, { "epoch": 0.44569437358950637, "grad_norm": 37.07533284730781, "learning_rate": 5.294342785343478e-06, "loss": 0.367535400390625, "step": 51545 }, { "epoch": 0.44573760711104965, "grad_norm": 2.925722033623206, "learning_rate": 5.2942115167251186e-06, "loss": 0.2630462646484375, "step": 51550 }, { "epoch": 0.4457808406325929, "grad_norm": 2.3644782201006973, "learning_rate": 5.294080237526059e-06, "loss": 0.06363677978515625, "step": 51555 }, { "epoch": 0.44582407415413616, "grad_norm": 3.200410127491267, "learning_rate": 5.293948947746905e-06, "loss": 0.070947265625, "step": 51560 }, { "epoch": 0.4458673076756794, "grad_norm": 22.560256938701098, "learning_rate": 5.29381764738826e-06, "loss": 0.160687255859375, "step": 51565 }, { "epoch": 0.4459105411972227, "grad_norm": 3.3440982188974835, "learning_rate": 5.293686336450731e-06, "loss": 0.207818603515625, "step": 51570 }, { "epoch": 0.44595377471876596, "grad_norm": 0.24969152057492938, "learning_rate": 5.293555014934923e-06, "loss": 0.30934715270996094, "step": 51575 }, { "epoch": 0.4459970082403092, "grad_norm": 27.917228844896414, "learning_rate": 5.293423682841442e-06, "loss": 0.1761035919189453, "step": 51580 }, { "epoch": 0.4460402417618525, "grad_norm": 0.954344902282546, "learning_rate": 5.293292340170894e-06, "loss": 0.058380126953125, "step": 51585 }, { "epoch": 0.44608347528339576, "grad_norm": 36.199629267441914, "learning_rate": 5.293160986923884e-06, "loss": 0.29836273193359375, "step": 51590 }, { "epoch": 0.446126708804939, "grad_norm": 11.055544248573343, "learning_rate": 5.293029623101018e-06, "loss": 0.119927978515625, "step": 51595 }, { "epoch": 0.44616994232648227, "grad_norm": 31.308363492046595, "learning_rate": 5.292898248702904e-06, "loss": 0.2210906982421875, "step": 51600 }, { "epoch": 0.4462131758480255, "grad_norm": 8.583608633157976, "learning_rate": 5.292766863730143e-06, "loss": 0.10447883605957031, "step": 51605 }, { "epoch": 0.4462564093695688, "grad_norm": 9.104062823329867, "learning_rate": 5.292635468183345e-06, "loss": 0.49171295166015627, "step": 51610 }, { "epoch": 0.44629964289111207, "grad_norm": 1.092670508551557, "learning_rate": 5.292504062063115e-06, "loss": 0.21410741806030273, "step": 51615 }, { "epoch": 0.4463428764126553, "grad_norm": 11.459699850513275, "learning_rate": 5.292372645370059e-06, "loss": 0.0687957763671875, "step": 51620 }, { "epoch": 0.4463861099341986, "grad_norm": 39.95440576501855, "learning_rate": 5.292241218104782e-06, "loss": 0.3255706787109375, "step": 51625 }, { "epoch": 0.44642934345574187, "grad_norm": 10.444616107997705, "learning_rate": 5.292109780267892e-06, "loss": 0.14816360473632811, "step": 51630 }, { "epoch": 0.4464725769772851, "grad_norm": 2.6325802581728164, "learning_rate": 5.291978331859994e-06, "loss": 0.5240997314453125, "step": 51635 }, { "epoch": 0.4465158104988284, "grad_norm": 4.657560918203397, "learning_rate": 5.291846872881694e-06, "loss": 0.181610107421875, "step": 51640 }, { "epoch": 0.4465590440203716, "grad_norm": 3.388697329556475, "learning_rate": 5.2917154033335994e-06, "loss": 0.03619537353515625, "step": 51645 }, { "epoch": 0.4466022775419149, "grad_norm": 31.02451135196828, "learning_rate": 5.291583923216315e-06, "loss": 0.17789306640625, "step": 51650 }, { "epoch": 0.4466455110634582, "grad_norm": 9.02099762598934, "learning_rate": 5.291452432530449e-06, "loss": 0.058953857421875, "step": 51655 }, { "epoch": 0.4466887445850014, "grad_norm": 3.4802851333877736, "learning_rate": 5.2913209312766065e-06, "loss": 0.30171890258789064, "step": 51660 }, { "epoch": 0.4467319781065447, "grad_norm": 8.833432138053476, "learning_rate": 5.291189419455394e-06, "loss": 0.24147777557373046, "step": 51665 }, { "epoch": 0.446775211628088, "grad_norm": 0.3674571137083815, "learning_rate": 5.291057897067419e-06, "loss": 0.059112548828125, "step": 51670 }, { "epoch": 0.4468184451496312, "grad_norm": 6.524686581006485, "learning_rate": 5.290926364113286e-06, "loss": 0.052593231201171875, "step": 51675 }, { "epoch": 0.4468616786711745, "grad_norm": 8.905330526737979, "learning_rate": 5.290794820593605e-06, "loss": 0.20643157958984376, "step": 51680 }, { "epoch": 0.4469049121927177, "grad_norm": 6.689786093760969, "learning_rate": 5.290663266508979e-06, "loss": 0.1128448486328125, "step": 51685 }, { "epoch": 0.446948145714261, "grad_norm": 0.33233324103965217, "learning_rate": 5.290531701860017e-06, "loss": 0.1892017364501953, "step": 51690 }, { "epoch": 0.4469913792358043, "grad_norm": 29.979401351808157, "learning_rate": 5.2904001266473255e-06, "loss": 0.343896484375, "step": 51695 }, { "epoch": 0.4470346127573475, "grad_norm": 11.547544496031518, "learning_rate": 5.290268540871511e-06, "loss": 0.13920440673828124, "step": 51700 }, { "epoch": 0.4470778462788908, "grad_norm": 10.683751170679626, "learning_rate": 5.290136944533181e-06, "loss": 0.16014556884765624, "step": 51705 }, { "epoch": 0.4471210798004341, "grad_norm": 9.529598538890982, "learning_rate": 5.290005337632941e-06, "loss": 0.1525665283203125, "step": 51710 }, { "epoch": 0.4471643133219773, "grad_norm": 0.4566580890472328, "learning_rate": 5.2898737201713975e-06, "loss": 0.18934860229492187, "step": 51715 }, { "epoch": 0.4472075468435206, "grad_norm": 5.807348749179954, "learning_rate": 5.289742092149161e-06, "loss": 0.18888931274414061, "step": 51720 }, { "epoch": 0.4472507803650639, "grad_norm": 4.320252569136774, "learning_rate": 5.289610453566834e-06, "loss": 0.13046722412109374, "step": 51725 }, { "epoch": 0.4472940138866071, "grad_norm": 3.4398990564343292, "learning_rate": 5.289478804425028e-06, "loss": 0.34952621459960936, "step": 51730 }, { "epoch": 0.4473372474081504, "grad_norm": 10.766005861462274, "learning_rate": 5.289347144724347e-06, "loss": 0.16774520874023438, "step": 51735 }, { "epoch": 0.4473804809296936, "grad_norm": 28.209586511819932, "learning_rate": 5.2892154744654e-06, "loss": 0.391607666015625, "step": 51740 }, { "epoch": 0.4474237144512369, "grad_norm": 33.50181146380638, "learning_rate": 5.289083793648792e-06, "loss": 0.45179290771484376, "step": 51745 }, { "epoch": 0.4474669479727802, "grad_norm": 13.563677339178536, "learning_rate": 5.288952102275134e-06, "loss": 0.0981109619140625, "step": 51750 }, { "epoch": 0.4475101814943234, "grad_norm": 8.299016924665587, "learning_rate": 5.288820400345029e-06, "loss": 0.12227630615234375, "step": 51755 }, { "epoch": 0.4475534150158667, "grad_norm": 2.2027342231269547, "learning_rate": 5.288688687859088e-06, "loss": 0.11338310241699219, "step": 51760 }, { "epoch": 0.44759664853741, "grad_norm": 2.3811438108606002, "learning_rate": 5.288556964817917e-06, "loss": 0.09878616333007813, "step": 51765 }, { "epoch": 0.4476398820589532, "grad_norm": 10.496975441731355, "learning_rate": 5.288425231222122e-06, "loss": 0.06291046142578124, "step": 51770 }, { "epoch": 0.4476831155804965, "grad_norm": 3.6795895896199804, "learning_rate": 5.288293487072313e-06, "loss": 0.03028564453125, "step": 51775 }, { "epoch": 0.44772634910203973, "grad_norm": 9.79109350207462, "learning_rate": 5.288161732369097e-06, "loss": 0.232611083984375, "step": 51780 }, { "epoch": 0.447769582623583, "grad_norm": 52.95416003431599, "learning_rate": 5.288029967113081e-06, "loss": 0.31653289794921874, "step": 51785 }, { "epoch": 0.4478128161451263, "grad_norm": 0.2637442643639685, "learning_rate": 5.287898191304873e-06, "loss": 0.07521400451660157, "step": 51790 }, { "epoch": 0.44785604966666953, "grad_norm": 3.2131903700803286, "learning_rate": 5.2877664049450805e-06, "loss": 0.140533447265625, "step": 51795 }, { "epoch": 0.4478992831882128, "grad_norm": 30.44777784485469, "learning_rate": 5.287634608034312e-06, "loss": 0.243896484375, "step": 51800 }, { "epoch": 0.4479425167097561, "grad_norm": 49.88362369926768, "learning_rate": 5.287502800573174e-06, "loss": 0.4409534454345703, "step": 51805 }, { "epoch": 0.44798575023129933, "grad_norm": 6.083336963417873, "learning_rate": 5.287370982562276e-06, "loss": 0.4169647216796875, "step": 51810 }, { "epoch": 0.4480289837528426, "grad_norm": 13.534570190579, "learning_rate": 5.287239154002225e-06, "loss": 0.085400390625, "step": 51815 }, { "epoch": 0.44807221727438584, "grad_norm": 18.156149476570725, "learning_rate": 5.287107314893629e-06, "loss": 0.18821640014648439, "step": 51820 }, { "epoch": 0.4481154507959291, "grad_norm": 1.1900066760306673, "learning_rate": 5.286975465237096e-06, "loss": 0.1536529541015625, "step": 51825 }, { "epoch": 0.4481586843174724, "grad_norm": 1.4563462603460073, "learning_rate": 5.286843605033234e-06, "loss": 0.25012054443359377, "step": 51830 }, { "epoch": 0.44820191783901564, "grad_norm": 2.3366796269185994, "learning_rate": 5.286711734282652e-06, "loss": 0.082684326171875, "step": 51835 }, { "epoch": 0.4482451513605589, "grad_norm": 1.5187879540694416, "learning_rate": 5.286579852985956e-06, "loss": 0.231524658203125, "step": 51840 }, { "epoch": 0.4482883848821022, "grad_norm": 14.41095618284111, "learning_rate": 5.286447961143758e-06, "loss": 0.1024993896484375, "step": 51845 }, { "epoch": 0.44833161840364544, "grad_norm": 2.9895315060862235, "learning_rate": 5.286316058756662e-06, "loss": 0.2674095153808594, "step": 51850 }, { "epoch": 0.4483748519251887, "grad_norm": 29.160104343408847, "learning_rate": 5.28618414582528e-06, "loss": 0.1644073486328125, "step": 51855 }, { "epoch": 0.44841808544673195, "grad_norm": 2.906195106759367, "learning_rate": 5.286052222350218e-06, "loss": 0.0252593994140625, "step": 51860 }, { "epoch": 0.44846131896827524, "grad_norm": 4.434327540100117, "learning_rate": 5.285920288332086e-06, "loss": 0.2224609375, "step": 51865 }, { "epoch": 0.4485045524898185, "grad_norm": 0.5550506179577476, "learning_rate": 5.28578834377149e-06, "loss": 0.23480491638183593, "step": 51870 }, { "epoch": 0.44854778601136175, "grad_norm": 6.697031659942536, "learning_rate": 5.285656388669041e-06, "loss": 0.218890380859375, "step": 51875 }, { "epoch": 0.44859101953290503, "grad_norm": 9.304560481644765, "learning_rate": 5.285524423025347e-06, "loss": 0.2789501190185547, "step": 51880 }, { "epoch": 0.4486342530544483, "grad_norm": 6.981814239271641, "learning_rate": 5.285392446841016e-06, "loss": 0.0569091796875, "step": 51885 }, { "epoch": 0.44867748657599155, "grad_norm": 3.189450146731803, "learning_rate": 5.285260460116658e-06, "loss": 0.085345458984375, "step": 51890 }, { "epoch": 0.44872072009753483, "grad_norm": 9.20776956977403, "learning_rate": 5.285128462852879e-06, "loss": 0.05399932861328125, "step": 51895 }, { "epoch": 0.4487639536190781, "grad_norm": 1.1509154250707248, "learning_rate": 5.284996455050289e-06, "loss": 0.0671142578125, "step": 51900 }, { "epoch": 0.44880718714062134, "grad_norm": 11.962775164515566, "learning_rate": 5.2848644367094995e-06, "loss": 0.2437713623046875, "step": 51905 }, { "epoch": 0.44885042066216463, "grad_norm": 1.5750209191888564, "learning_rate": 5.284732407831115e-06, "loss": 0.04903554916381836, "step": 51910 }, { "epoch": 0.44889365418370786, "grad_norm": 10.873476763006467, "learning_rate": 5.284600368415748e-06, "loss": 0.1598804473876953, "step": 51915 }, { "epoch": 0.44893688770525114, "grad_norm": 19.87290573833856, "learning_rate": 5.284468318464006e-06, "loss": 0.233148193359375, "step": 51920 }, { "epoch": 0.4489801212267944, "grad_norm": 1.620979808791717, "learning_rate": 5.284336257976497e-06, "loss": 0.0586212158203125, "step": 51925 }, { "epoch": 0.44902335474833766, "grad_norm": 1.7137965457173492, "learning_rate": 5.284204186953832e-06, "loss": 0.201690673828125, "step": 51930 }, { "epoch": 0.44906658826988094, "grad_norm": 15.088486007885608, "learning_rate": 5.284072105396618e-06, "loss": 0.23426513671875, "step": 51935 }, { "epoch": 0.4491098217914242, "grad_norm": 28.045672259348922, "learning_rate": 5.283940013305466e-06, "loss": 0.200018310546875, "step": 51940 }, { "epoch": 0.44915305531296745, "grad_norm": 0.8497710809067327, "learning_rate": 5.283807910680984e-06, "loss": 0.09764556884765625, "step": 51945 }, { "epoch": 0.44919628883451074, "grad_norm": 46.53292040491562, "learning_rate": 5.283675797523782e-06, "loss": 0.6413284301757812, "step": 51950 }, { "epoch": 0.44923952235605397, "grad_norm": 116.09687731819012, "learning_rate": 5.28354367383447e-06, "loss": 0.15789794921875, "step": 51955 }, { "epoch": 0.44928275587759725, "grad_norm": 59.42989889610305, "learning_rate": 5.283411539613655e-06, "loss": 0.28061904907226565, "step": 51960 }, { "epoch": 0.44932598939914054, "grad_norm": 5.614045971810457, "learning_rate": 5.283279394861948e-06, "loss": 0.173077392578125, "step": 51965 }, { "epoch": 0.44936922292068376, "grad_norm": 3.438332528076015, "learning_rate": 5.283147239579959e-06, "loss": 0.126495361328125, "step": 51970 }, { "epoch": 0.44941245644222705, "grad_norm": 1.453356112801442, "learning_rate": 5.283015073768295e-06, "loss": 0.14817047119140625, "step": 51975 }, { "epoch": 0.44945568996377033, "grad_norm": 14.092978847825089, "learning_rate": 5.282882897427569e-06, "loss": 0.13434906005859376, "step": 51980 }, { "epoch": 0.44949892348531356, "grad_norm": 0.9038457386643622, "learning_rate": 5.282750710558388e-06, "loss": 0.026319122314453124, "step": 51985 }, { "epoch": 0.44954215700685685, "grad_norm": 2.587193032176388, "learning_rate": 5.282618513161362e-06, "loss": 0.1160980224609375, "step": 51990 }, { "epoch": 0.4495853905284001, "grad_norm": 43.91224698339289, "learning_rate": 5.282486305237102e-06, "loss": 0.18560562133789063, "step": 51995 }, { "epoch": 0.44962862404994336, "grad_norm": 0.0814013631852053, "learning_rate": 5.282354086786216e-06, "loss": 0.23006210327148438, "step": 52000 }, { "epoch": 0.44967185757148664, "grad_norm": 0.174298220016862, "learning_rate": 5.282221857809315e-06, "loss": 0.036550140380859374, "step": 52005 }, { "epoch": 0.4497150910930299, "grad_norm": 62.66842251613572, "learning_rate": 5.282089618307009e-06, "loss": 0.6867385864257812, "step": 52010 }, { "epoch": 0.44975832461457316, "grad_norm": 1.8482774803667221, "learning_rate": 5.281957368279906e-06, "loss": 0.0420928955078125, "step": 52015 }, { "epoch": 0.44980155813611644, "grad_norm": 2.520928675854837, "learning_rate": 5.281825107728619e-06, "loss": 0.4444000244140625, "step": 52020 }, { "epoch": 0.44984479165765967, "grad_norm": 1.1544525811188238, "learning_rate": 5.281692836653755e-06, "loss": 0.037469482421875, "step": 52025 }, { "epoch": 0.44988802517920295, "grad_norm": 18.145778065956303, "learning_rate": 5.281560555055926e-06, "loss": 0.276434326171875, "step": 52030 }, { "epoch": 0.4499312587007462, "grad_norm": 3.681934669912415, "learning_rate": 5.28142826293574e-06, "loss": 0.1809326171875, "step": 52035 }, { "epoch": 0.44997449222228947, "grad_norm": 0.7123270241625956, "learning_rate": 5.28129596029381e-06, "loss": 0.15504150390625, "step": 52040 }, { "epoch": 0.45001772574383275, "grad_norm": 0.9678931701393981, "learning_rate": 5.281163647130743e-06, "loss": 0.3460090637207031, "step": 52045 }, { "epoch": 0.450060959265376, "grad_norm": 47.67296557090035, "learning_rate": 5.281031323447153e-06, "loss": 0.3962860107421875, "step": 52050 }, { "epoch": 0.45010419278691927, "grad_norm": 27.676202247215155, "learning_rate": 5.280898989243646e-06, "loss": 0.17516937255859374, "step": 52055 }, { "epoch": 0.45014742630846255, "grad_norm": 16.547378874815738, "learning_rate": 5.2807666445208355e-06, "loss": 0.3332633972167969, "step": 52060 }, { "epoch": 0.4501906598300058, "grad_norm": 1.53372449674899, "learning_rate": 5.28063428927933e-06, "loss": 0.19610595703125, "step": 52065 }, { "epoch": 0.45023389335154906, "grad_norm": 13.02027552057072, "learning_rate": 5.280501923519742e-06, "loss": 0.1571500778198242, "step": 52070 }, { "epoch": 0.45027712687309235, "grad_norm": 13.041876633773924, "learning_rate": 5.280369547242679e-06, "loss": 0.1268951416015625, "step": 52075 }, { "epoch": 0.4503203603946356, "grad_norm": 0.27280936231688063, "learning_rate": 5.280237160448755e-06, "loss": 0.3382415771484375, "step": 52080 }, { "epoch": 0.45036359391617886, "grad_norm": 13.256250334063878, "learning_rate": 5.280104763138577e-06, "loss": 0.22670326232910157, "step": 52085 }, { "epoch": 0.4504068274377221, "grad_norm": 18.642992261230997, "learning_rate": 5.2799723553127585e-06, "loss": 0.180133056640625, "step": 52090 }, { "epoch": 0.4504500609592654, "grad_norm": 0.313050689375663, "learning_rate": 5.279839936971908e-06, "loss": 0.04416351318359375, "step": 52095 }, { "epoch": 0.45049329448080866, "grad_norm": 20.48488330362285, "learning_rate": 5.279707508116638e-06, "loss": 0.23798370361328125, "step": 52100 }, { "epoch": 0.4505365280023519, "grad_norm": 1.8442225154333793, "learning_rate": 5.279575068747558e-06, "loss": 0.18292236328125, "step": 52105 }, { "epoch": 0.4505797615238952, "grad_norm": 0.18628880144491192, "learning_rate": 5.279442618865279e-06, "loss": 0.1426849365234375, "step": 52110 }, { "epoch": 0.45062299504543846, "grad_norm": 2.4893584066054077, "learning_rate": 5.279310158470412e-06, "loss": 0.1530292510986328, "step": 52115 }, { "epoch": 0.4506662285669817, "grad_norm": 10.747474239020203, "learning_rate": 5.279177687563569e-06, "loss": 0.40223846435546873, "step": 52120 }, { "epoch": 0.45070946208852497, "grad_norm": 24.033744196521425, "learning_rate": 5.279045206145359e-06, "loss": 0.11183662414550781, "step": 52125 }, { "epoch": 0.4507526956100682, "grad_norm": 0.3360982119787993, "learning_rate": 5.278912714216394e-06, "loss": 0.11035690307617188, "step": 52130 }, { "epoch": 0.4507959291316115, "grad_norm": 0.5464301781873095, "learning_rate": 5.2787802117772845e-06, "loss": 0.1569000244140625, "step": 52135 }, { "epoch": 0.45083916265315477, "grad_norm": 35.83988948347069, "learning_rate": 5.278647698828643e-06, "loss": 0.3151054382324219, "step": 52140 }, { "epoch": 0.450882396174698, "grad_norm": 2.117905643332883, "learning_rate": 5.278515175371078e-06, "loss": 0.20906982421875, "step": 52145 }, { "epoch": 0.4509256296962413, "grad_norm": 12.430562514642071, "learning_rate": 5.278382641405205e-06, "loss": 0.34287109375, "step": 52150 }, { "epoch": 0.45096886321778457, "grad_norm": 13.311993563997168, "learning_rate": 5.27825009693163e-06, "loss": 0.1608154296875, "step": 52155 }, { "epoch": 0.4510120967393278, "grad_norm": 0.5697330122506176, "learning_rate": 5.2781175419509685e-06, "loss": 0.0653564453125, "step": 52160 }, { "epoch": 0.4510553302608711, "grad_norm": 5.133824898052441, "learning_rate": 5.277984976463829e-06, "loss": 0.040586090087890624, "step": 52165 }, { "epoch": 0.4510985637824143, "grad_norm": 13.877377572414634, "learning_rate": 5.277852400470825e-06, "loss": 0.14807281494140626, "step": 52170 }, { "epoch": 0.4511417973039576, "grad_norm": 0.33124593614999187, "learning_rate": 5.277719813972567e-06, "loss": 0.190350341796875, "step": 52175 }, { "epoch": 0.4511850308255009, "grad_norm": 15.917106099497845, "learning_rate": 5.277587216969666e-06, "loss": 0.2957763671875, "step": 52180 }, { "epoch": 0.4512282643470441, "grad_norm": 11.409204500493237, "learning_rate": 5.277454609462734e-06, "loss": 0.054036712646484374, "step": 52185 }, { "epoch": 0.4512714978685874, "grad_norm": 2.2485205544673383, "learning_rate": 5.277321991452383e-06, "loss": 0.10841598510742187, "step": 52190 }, { "epoch": 0.4513147313901307, "grad_norm": 1.7202243686387984, "learning_rate": 5.277189362939223e-06, "loss": 0.12644462585449218, "step": 52195 }, { "epoch": 0.4513579649116739, "grad_norm": 12.605196958436863, "learning_rate": 5.277056723923869e-06, "loss": 0.1364950180053711, "step": 52200 }, { "epoch": 0.4514011984332172, "grad_norm": 0.31258004477529505, "learning_rate": 5.276924074406929e-06, "loss": 0.2743633270263672, "step": 52205 }, { "epoch": 0.4514444319547604, "grad_norm": 37.41504640118985, "learning_rate": 5.276791414389016e-06, "loss": 0.62872314453125, "step": 52210 }, { "epoch": 0.4514876654763037, "grad_norm": 3.1693258750198186, "learning_rate": 5.276658743870744e-06, "loss": 0.28739013671875, "step": 52215 }, { "epoch": 0.451530898997847, "grad_norm": 2.997160285935509, "learning_rate": 5.276526062852722e-06, "loss": 0.36666259765625, "step": 52220 }, { "epoch": 0.4515741325193902, "grad_norm": 1.7030514308432734, "learning_rate": 5.276393371335563e-06, "loss": 0.124395751953125, "step": 52225 }, { "epoch": 0.4516173660409335, "grad_norm": 31.773597376287558, "learning_rate": 5.276260669319879e-06, "loss": 0.2219757080078125, "step": 52230 }, { "epoch": 0.4516605995624768, "grad_norm": 42.20028277103073, "learning_rate": 5.276127956806281e-06, "loss": 0.2911712646484375, "step": 52235 }, { "epoch": 0.45170383308402, "grad_norm": 23.846543418351075, "learning_rate": 5.275995233795383e-06, "loss": 0.362689208984375, "step": 52240 }, { "epoch": 0.4517470666055633, "grad_norm": 2.0505973144404446, "learning_rate": 5.275862500287796e-06, "loss": 0.03620452880859375, "step": 52245 }, { "epoch": 0.4517903001271066, "grad_norm": 1.3014045094999902, "learning_rate": 5.275729756284132e-06, "loss": 0.15931472778320313, "step": 52250 }, { "epoch": 0.4518335336486498, "grad_norm": 0.8743797237044391, "learning_rate": 5.275597001785003e-06, "loss": 0.01842803955078125, "step": 52255 }, { "epoch": 0.4518767671701931, "grad_norm": 9.39315247432143, "learning_rate": 5.275464236791023e-06, "loss": 0.39247894287109375, "step": 52260 }, { "epoch": 0.4519200006917363, "grad_norm": 5.27453448773578, "learning_rate": 5.275331461302802e-06, "loss": 0.18377685546875, "step": 52265 }, { "epoch": 0.4519632342132796, "grad_norm": 29.96418654006927, "learning_rate": 5.275198675320954e-06, "loss": 0.1803924560546875, "step": 52270 }, { "epoch": 0.4520064677348229, "grad_norm": 12.681913532627066, "learning_rate": 5.27506587884609e-06, "loss": 0.14517059326171874, "step": 52275 }, { "epoch": 0.4520497012563661, "grad_norm": 1.2280927741621668, "learning_rate": 5.274933071878824e-06, "loss": 0.20180435180664064, "step": 52280 }, { "epoch": 0.4520929347779094, "grad_norm": 33.63115353823529, "learning_rate": 5.274800254419767e-06, "loss": 0.3639434814453125, "step": 52285 }, { "epoch": 0.4521361682994527, "grad_norm": 17.041368828367357, "learning_rate": 5.274667426469532e-06, "loss": 0.19546165466308593, "step": 52290 }, { "epoch": 0.4521794018209959, "grad_norm": 12.960549388828282, "learning_rate": 5.274534588028733e-06, "loss": 0.1468292236328125, "step": 52295 }, { "epoch": 0.4522226353425392, "grad_norm": 17.632242845148856, "learning_rate": 5.27440173909798e-06, "loss": 0.236224365234375, "step": 52300 }, { "epoch": 0.45226586886408243, "grad_norm": 16.761445333347048, "learning_rate": 5.274268879677889e-06, "loss": 0.11197662353515625, "step": 52305 }, { "epoch": 0.4523091023856257, "grad_norm": 21.259156737069784, "learning_rate": 5.274136009769069e-06, "loss": 0.10072746276855468, "step": 52310 }, { "epoch": 0.452352335907169, "grad_norm": 8.070154410896501, "learning_rate": 5.274003129372136e-06, "loss": 0.17117881774902344, "step": 52315 }, { "epoch": 0.45239556942871223, "grad_norm": 1.0066086250704285, "learning_rate": 5.273870238487701e-06, "loss": 0.08038406372070313, "step": 52320 }, { "epoch": 0.4524388029502555, "grad_norm": 79.53949093189577, "learning_rate": 5.273737337116377e-06, "loss": 0.8503128051757812, "step": 52325 }, { "epoch": 0.4524820364717988, "grad_norm": 0.2830825767771316, "learning_rate": 5.273604425258778e-06, "loss": 0.19214401245117188, "step": 52330 }, { "epoch": 0.452525269993342, "grad_norm": 0.3124473419506187, "learning_rate": 5.273471502915515e-06, "loss": 0.1194183349609375, "step": 52335 }, { "epoch": 0.4525685035148853, "grad_norm": 3.1647688379512156, "learning_rate": 5.273338570087204e-06, "loss": 0.18073348999023436, "step": 52340 }, { "epoch": 0.45261173703642854, "grad_norm": 8.0934328154736, "learning_rate": 5.273205626774456e-06, "loss": 0.469189453125, "step": 52345 }, { "epoch": 0.4526549705579718, "grad_norm": 8.834756638165015, "learning_rate": 5.273072672977884e-06, "loss": 0.09007797241210938, "step": 52350 }, { "epoch": 0.4526982040795151, "grad_norm": 1.3027845313790414, "learning_rate": 5.272939708698102e-06, "loss": 0.15366668701171876, "step": 52355 }, { "epoch": 0.45274143760105834, "grad_norm": 3.635581794066471, "learning_rate": 5.272806733935723e-06, "loss": 0.15182952880859374, "step": 52360 }, { "epoch": 0.4527846711226016, "grad_norm": 13.160218206496527, "learning_rate": 5.27267374869136e-06, "loss": 0.08268051147460938, "step": 52365 }, { "epoch": 0.4528279046441449, "grad_norm": 11.105820666198326, "learning_rate": 5.2725407529656255e-06, "loss": 0.202557373046875, "step": 52370 }, { "epoch": 0.45287113816568814, "grad_norm": 2.44206543927478, "learning_rate": 5.272407746759136e-06, "loss": 0.133123779296875, "step": 52375 }, { "epoch": 0.4529143716872314, "grad_norm": 7.888604590847069, "learning_rate": 5.272274730072501e-06, "loss": 0.468353271484375, "step": 52380 }, { "epoch": 0.45295760520877465, "grad_norm": 28.25842484697609, "learning_rate": 5.272141702906336e-06, "loss": 0.3313629150390625, "step": 52385 }, { "epoch": 0.45300083873031793, "grad_norm": 36.010171915320676, "learning_rate": 5.272008665261255e-06, "loss": 0.3293426513671875, "step": 52390 }, { "epoch": 0.4530440722518612, "grad_norm": 29.71357550402593, "learning_rate": 5.27187561713787e-06, "loss": 0.14788360595703126, "step": 52395 }, { "epoch": 0.45308730577340445, "grad_norm": 1.829237226190377, "learning_rate": 5.2717425585367956e-06, "loss": 0.0715799331665039, "step": 52400 }, { "epoch": 0.45313053929494773, "grad_norm": 0.3206072320387917, "learning_rate": 5.271609489458646e-06, "loss": 0.067041015625, "step": 52405 }, { "epoch": 0.453173772816491, "grad_norm": 16.40684588370265, "learning_rate": 5.271476409904032e-06, "loss": 0.2019927978515625, "step": 52410 }, { "epoch": 0.45321700633803425, "grad_norm": 18.061538824165957, "learning_rate": 5.271343319873572e-06, "loss": 0.06700973510742188, "step": 52415 }, { "epoch": 0.45326023985957753, "grad_norm": 44.98841698480656, "learning_rate": 5.271210219367876e-06, "loss": 0.47020263671875, "step": 52420 }, { "epoch": 0.45330347338112076, "grad_norm": 1.6491932442939266, "learning_rate": 5.27107710838756e-06, "loss": 0.08503875732421876, "step": 52425 }, { "epoch": 0.45334670690266404, "grad_norm": 0.6628311812169073, "learning_rate": 5.270943986933236e-06, "loss": 0.08234710693359375, "step": 52430 }, { "epoch": 0.4533899404242073, "grad_norm": 3.8403158387703242, "learning_rate": 5.27081085500552e-06, "loss": 0.06284379959106445, "step": 52435 }, { "epoch": 0.45343317394575056, "grad_norm": 1.2412415296804638, "learning_rate": 5.270677712605025e-06, "loss": 0.3441581726074219, "step": 52440 }, { "epoch": 0.45347640746729384, "grad_norm": 19.91476065593642, "learning_rate": 5.2705445597323635e-06, "loss": 0.06088104248046875, "step": 52445 }, { "epoch": 0.4535196409888371, "grad_norm": 1.5661257118470504, "learning_rate": 5.270411396388153e-06, "loss": 0.29962997436523436, "step": 52450 }, { "epoch": 0.45356287451038035, "grad_norm": 1.7662117929240826, "learning_rate": 5.270278222573005e-06, "loss": 0.06461029052734375, "step": 52455 }, { "epoch": 0.45360610803192364, "grad_norm": 0.5783090562464447, "learning_rate": 5.270145038287534e-06, "loss": 0.062252235412597653, "step": 52460 }, { "epoch": 0.4536493415534669, "grad_norm": 32.74376288389766, "learning_rate": 5.2700118435323545e-06, "loss": 0.259307861328125, "step": 52465 }, { "epoch": 0.45369257507501015, "grad_norm": 11.793839227422303, "learning_rate": 5.269878638308082e-06, "loss": 0.4215911865234375, "step": 52470 }, { "epoch": 0.45373580859655344, "grad_norm": 1.1006894324879821, "learning_rate": 5.269745422615329e-06, "loss": 0.1521484375, "step": 52475 }, { "epoch": 0.45377904211809666, "grad_norm": 27.12971230154841, "learning_rate": 5.269612196454711e-06, "loss": 0.1384674072265625, "step": 52480 }, { "epoch": 0.45382227563963995, "grad_norm": 9.1415351674009, "learning_rate": 5.269478959826842e-06, "loss": 0.07102241516113281, "step": 52485 }, { "epoch": 0.45386550916118323, "grad_norm": 24.76675337550535, "learning_rate": 5.2693457127323365e-06, "loss": 0.27721710205078126, "step": 52490 }, { "epoch": 0.45390874268272646, "grad_norm": 8.238313113987061, "learning_rate": 5.269212455171809e-06, "loss": 0.27103271484375, "step": 52495 }, { "epoch": 0.45395197620426975, "grad_norm": 5.63343042084283, "learning_rate": 5.269079187145875e-06, "loss": 0.18654937744140626, "step": 52500 }, { "epoch": 0.45399520972581303, "grad_norm": 0.38674076698870724, "learning_rate": 5.268945908655148e-06, "loss": 0.1546875, "step": 52505 }, { "epoch": 0.45403844324735626, "grad_norm": 22.489585000561032, "learning_rate": 5.2688126197002426e-06, "loss": 0.40334625244140626, "step": 52510 }, { "epoch": 0.45408167676889954, "grad_norm": 4.190306992164747, "learning_rate": 5.2686793202817736e-06, "loss": 0.0975250244140625, "step": 52515 }, { "epoch": 0.4541249102904428, "grad_norm": 18.594809643774656, "learning_rate": 5.268546010400356e-06, "loss": 0.28153076171875, "step": 52520 }, { "epoch": 0.45416814381198606, "grad_norm": 9.296692384573559, "learning_rate": 5.268412690056605e-06, "loss": 0.12519493103027343, "step": 52525 }, { "epoch": 0.45421137733352934, "grad_norm": 17.07717571932885, "learning_rate": 5.268279359251135e-06, "loss": 0.26297607421875, "step": 52530 }, { "epoch": 0.45425461085507257, "grad_norm": 2.472154474685067, "learning_rate": 5.268146017984561e-06, "loss": 0.4065895080566406, "step": 52535 }, { "epoch": 0.45429784437661586, "grad_norm": 34.94221186276995, "learning_rate": 5.268012666257499e-06, "loss": 0.2671966552734375, "step": 52540 }, { "epoch": 0.45434107789815914, "grad_norm": 0.35434385149656206, "learning_rate": 5.2678793040705615e-06, "loss": 0.17914466857910155, "step": 52545 }, { "epoch": 0.45438431141970237, "grad_norm": 2.1008110795632136, "learning_rate": 5.267745931424366e-06, "loss": 0.08944244384765625, "step": 52550 }, { "epoch": 0.45442754494124565, "grad_norm": 3.7895991310425874, "learning_rate": 5.267612548319527e-06, "loss": 0.099029541015625, "step": 52555 }, { "epoch": 0.4544707784627889, "grad_norm": 6.3286467714776, "learning_rate": 5.267479154756658e-06, "loss": 0.21631011962890626, "step": 52560 }, { "epoch": 0.45451401198433217, "grad_norm": 3.500508527326245, "learning_rate": 5.267345750736375e-06, "loss": 0.3109611511230469, "step": 52565 }, { "epoch": 0.45455724550587545, "grad_norm": 25.77266283329916, "learning_rate": 5.267212336259296e-06, "loss": 0.22406005859375, "step": 52570 }, { "epoch": 0.4546004790274187, "grad_norm": 0.7447632324296173, "learning_rate": 5.267078911326033e-06, "loss": 0.07877883911132813, "step": 52575 }, { "epoch": 0.45464371254896196, "grad_norm": 1.0352410021415106, "learning_rate": 5.2669454759372015e-06, "loss": 0.0484161376953125, "step": 52580 }, { "epoch": 0.45468694607050525, "grad_norm": 19.310573963571315, "learning_rate": 5.266812030093419e-06, "loss": 0.08804702758789062, "step": 52585 }, { "epoch": 0.4547301795920485, "grad_norm": 0.2121894010511059, "learning_rate": 5.2666785737952996e-06, "loss": 0.07958526611328125, "step": 52590 }, { "epoch": 0.45477341311359176, "grad_norm": 7.592607147116051, "learning_rate": 5.2665451070434584e-06, "loss": 0.0905303955078125, "step": 52595 }, { "epoch": 0.454816646635135, "grad_norm": 9.26400913712698, "learning_rate": 5.266411629838512e-06, "loss": 0.102349853515625, "step": 52600 }, { "epoch": 0.4548598801566783, "grad_norm": 32.678489537408545, "learning_rate": 5.266278142181075e-06, "loss": 0.25148773193359375, "step": 52605 }, { "epoch": 0.45490311367822156, "grad_norm": 30.48254885481344, "learning_rate": 5.266144644071762e-06, "loss": 0.18634910583496095, "step": 52610 }, { "epoch": 0.4549463471997648, "grad_norm": 1.1758592149328928, "learning_rate": 5.266011135511193e-06, "loss": 0.12392578125, "step": 52615 }, { "epoch": 0.4549895807213081, "grad_norm": 20.006173065270215, "learning_rate": 5.265877616499979e-06, "loss": 0.150054931640625, "step": 52620 }, { "epoch": 0.45503281424285136, "grad_norm": 2.1455671146080655, "learning_rate": 5.265744087038738e-06, "loss": 0.029901123046875, "step": 52625 }, { "epoch": 0.4550760477643946, "grad_norm": 7.531306331638058, "learning_rate": 5.265610547128086e-06, "loss": 0.29703636169433595, "step": 52630 }, { "epoch": 0.45511928128593787, "grad_norm": 2.024898637874384, "learning_rate": 5.265476996768638e-06, "loss": 0.091839599609375, "step": 52635 }, { "epoch": 0.45516251480748116, "grad_norm": 17.293207963741928, "learning_rate": 5.26534343596101e-06, "loss": 0.36468849182128904, "step": 52640 }, { "epoch": 0.4552057483290244, "grad_norm": 4.137292137050127, "learning_rate": 5.265209864705818e-06, "loss": 0.11787261962890624, "step": 52645 }, { "epoch": 0.45524898185056767, "grad_norm": 14.473256696704217, "learning_rate": 5.265076283003679e-06, "loss": 0.3420448303222656, "step": 52650 }, { "epoch": 0.4552922153721109, "grad_norm": 4.37675367731807, "learning_rate": 5.264942690855208e-06, "loss": 0.11860275268554688, "step": 52655 }, { "epoch": 0.4553354488936542, "grad_norm": 2.6862996035539304, "learning_rate": 5.264809088261022e-06, "loss": 0.24197998046875, "step": 52660 }, { "epoch": 0.45537868241519747, "grad_norm": 0.7563132211250306, "learning_rate": 5.264675475221736e-06, "loss": 0.12650680541992188, "step": 52665 }, { "epoch": 0.4554219159367407, "grad_norm": 3.3241744022970687, "learning_rate": 5.264541851737967e-06, "loss": 0.0452392578125, "step": 52670 }, { "epoch": 0.455465149458284, "grad_norm": 23.01234424785175, "learning_rate": 5.264408217810331e-06, "loss": 0.59814453125, "step": 52675 }, { "epoch": 0.45550838297982726, "grad_norm": 9.661030539777684, "learning_rate": 5.264274573439444e-06, "loss": 0.5052047729492187, "step": 52680 }, { "epoch": 0.4555516165013705, "grad_norm": 2.8718036397867936, "learning_rate": 5.264140918625923e-06, "loss": 0.115069580078125, "step": 52685 }, { "epoch": 0.4555948500229138, "grad_norm": 0.9479018136581496, "learning_rate": 5.2640072533703845e-06, "loss": 0.090570068359375, "step": 52690 }, { "epoch": 0.455638083544457, "grad_norm": 6.992509062131595, "learning_rate": 5.2638735776734444e-06, "loss": 0.0988037109375, "step": 52695 }, { "epoch": 0.4556813170660003, "grad_norm": 9.688807917077575, "learning_rate": 5.26373989153572e-06, "loss": 0.3822490692138672, "step": 52700 }, { "epoch": 0.4557245505875436, "grad_norm": 7.056160772752559, "learning_rate": 5.263606194957826e-06, "loss": 0.13660659790039062, "step": 52705 }, { "epoch": 0.4557677841090868, "grad_norm": 51.02909663105215, "learning_rate": 5.2634724879403806e-06, "loss": 0.265069580078125, "step": 52710 }, { "epoch": 0.4558110176306301, "grad_norm": 6.058383994435742, "learning_rate": 5.263338770484001e-06, "loss": 0.0817626953125, "step": 52715 }, { "epoch": 0.4558542511521734, "grad_norm": 8.375882978291395, "learning_rate": 5.263205042589302e-06, "loss": 0.09897003173828126, "step": 52720 }, { "epoch": 0.4558974846737166, "grad_norm": 15.756810597295356, "learning_rate": 5.2630713042569014e-06, "loss": 0.535516357421875, "step": 52725 }, { "epoch": 0.4559407181952599, "grad_norm": 37.96479581296046, "learning_rate": 5.262937555487417e-06, "loss": 0.241448974609375, "step": 52730 }, { "epoch": 0.4559839517168031, "grad_norm": 21.428216834185132, "learning_rate": 5.262803796281464e-06, "loss": 0.27870025634765627, "step": 52735 }, { "epoch": 0.4560271852383464, "grad_norm": 5.75474678245408, "learning_rate": 5.262670026639659e-06, "loss": 0.0898162841796875, "step": 52740 }, { "epoch": 0.4560704187598897, "grad_norm": 0.6961759895525983, "learning_rate": 5.26253624656262e-06, "loss": 0.16969146728515624, "step": 52745 }, { "epoch": 0.4561136522814329, "grad_norm": 2.4642620643885644, "learning_rate": 5.262402456050964e-06, "loss": 0.086810302734375, "step": 52750 }, { "epoch": 0.4561568858029762, "grad_norm": 12.785449361134285, "learning_rate": 5.262268655105308e-06, "loss": 0.0453277587890625, "step": 52755 }, { "epoch": 0.4562001193245195, "grad_norm": 16.309562604660044, "learning_rate": 5.262134843726269e-06, "loss": 0.09546966552734375, "step": 52760 }, { "epoch": 0.4562433528460627, "grad_norm": 1.9722047643855505, "learning_rate": 5.2620010219144635e-06, "loss": 0.22203636169433594, "step": 52765 }, { "epoch": 0.456286586367606, "grad_norm": 21.591828005227484, "learning_rate": 5.2618671896705094e-06, "loss": 0.153875732421875, "step": 52770 }, { "epoch": 0.4563298198891492, "grad_norm": 0.25152189983232504, "learning_rate": 5.261733346995024e-06, "loss": 0.21417770385742188, "step": 52775 }, { "epoch": 0.4563730534106925, "grad_norm": 1.2671731543734357, "learning_rate": 5.2615994938886234e-06, "loss": 0.07421112060546875, "step": 52780 }, { "epoch": 0.4564162869322358, "grad_norm": 2.063605744407149, "learning_rate": 5.2614656303519265e-06, "loss": 0.22644882202148436, "step": 52785 }, { "epoch": 0.456459520453779, "grad_norm": 42.16801790739766, "learning_rate": 5.26133175638555e-06, "loss": 0.464385986328125, "step": 52790 }, { "epoch": 0.4565027539753223, "grad_norm": 20.96134447033408, "learning_rate": 5.2611978719901105e-06, "loss": 0.076043701171875, "step": 52795 }, { "epoch": 0.4565459874968656, "grad_norm": 2.77146789563965, "learning_rate": 5.261063977166227e-06, "loss": 0.11514453887939453, "step": 52800 }, { "epoch": 0.4565892210184088, "grad_norm": 4.799803522386148, "learning_rate": 5.260930071914516e-06, "loss": 0.3005706787109375, "step": 52805 }, { "epoch": 0.4566324545399521, "grad_norm": 25.85917810696473, "learning_rate": 5.260796156235595e-06, "loss": 0.6109466552734375, "step": 52810 }, { "epoch": 0.4566756880614954, "grad_norm": 4.282563233188572, "learning_rate": 5.260662230130083e-06, "loss": 0.076336669921875, "step": 52815 }, { "epoch": 0.4567189215830386, "grad_norm": 0.5393646601824634, "learning_rate": 5.260528293598595e-06, "loss": 0.1941680908203125, "step": 52820 }, { "epoch": 0.4567621551045819, "grad_norm": 4.829315191599655, "learning_rate": 5.26039434664175e-06, "loss": 0.18703765869140626, "step": 52825 }, { "epoch": 0.45680538862612513, "grad_norm": 0.9121842934502588, "learning_rate": 5.2602603892601675e-06, "loss": 0.02948760986328125, "step": 52830 }, { "epoch": 0.4568486221476684, "grad_norm": 2.248435853072685, "learning_rate": 5.260126421454464e-06, "loss": 0.2880828857421875, "step": 52835 }, { "epoch": 0.4568918556692117, "grad_norm": 23.71551825352607, "learning_rate": 5.259992443225256e-06, "loss": 0.22174072265625, "step": 52840 }, { "epoch": 0.45693508919075493, "grad_norm": 21.10913125498691, "learning_rate": 5.259858454573163e-06, "loss": 0.23839111328125, "step": 52845 }, { "epoch": 0.4569783227122982, "grad_norm": 4.342348804767783, "learning_rate": 5.259724455498802e-06, "loss": 0.1331268310546875, "step": 52850 }, { "epoch": 0.4570215562338415, "grad_norm": 1.0917342654137263, "learning_rate": 5.2595904460027925e-06, "loss": 0.13935317993164062, "step": 52855 }, { "epoch": 0.4570647897553847, "grad_norm": 4.769837667661137, "learning_rate": 5.2594564260857506e-06, "loss": 0.05006103515625, "step": 52860 }, { "epoch": 0.457108023276928, "grad_norm": 0.2654216029531642, "learning_rate": 5.259322395748296e-06, "loss": 0.3565044403076172, "step": 52865 }, { "epoch": 0.45715125679847124, "grad_norm": 6.253187324585854, "learning_rate": 5.259188354991045e-06, "loss": 0.08596954345703126, "step": 52870 }, { "epoch": 0.4571944903200145, "grad_norm": 2.3798103157567327, "learning_rate": 5.259054303814618e-06, "loss": 0.1256561279296875, "step": 52875 }, { "epoch": 0.4572377238415578, "grad_norm": 11.12193064122295, "learning_rate": 5.258920242219632e-06, "loss": 0.412591552734375, "step": 52880 }, { "epoch": 0.45728095736310104, "grad_norm": 52.59510535905525, "learning_rate": 5.258786170206706e-06, "loss": 0.40526885986328126, "step": 52885 }, { "epoch": 0.4573241908846443, "grad_norm": 26.6420732900989, "learning_rate": 5.258652087776457e-06, "loss": 0.25135650634765627, "step": 52890 }, { "epoch": 0.4573674244061876, "grad_norm": 3.2651671982196855, "learning_rate": 5.258517994929504e-06, "loss": 0.185980224609375, "step": 52895 }, { "epoch": 0.45741065792773083, "grad_norm": 7.122760009919293, "learning_rate": 5.2583838916664655e-06, "loss": 0.037335205078125, "step": 52900 }, { "epoch": 0.4574538914492741, "grad_norm": 14.2237491688562, "learning_rate": 5.25824977798796e-06, "loss": 0.20105056762695311, "step": 52905 }, { "epoch": 0.45749712497081735, "grad_norm": 4.604618196151857, "learning_rate": 5.258115653894607e-06, "loss": 0.24130287170410156, "step": 52910 }, { "epoch": 0.45754035849236063, "grad_norm": 10.492176517607723, "learning_rate": 5.257981519387024e-06, "loss": 0.10464248657226563, "step": 52915 }, { "epoch": 0.4575835920139039, "grad_norm": 11.172965971706617, "learning_rate": 5.257847374465828e-06, "loss": 0.1302001953125, "step": 52920 }, { "epoch": 0.45762682553544715, "grad_norm": 2.9231470692893575, "learning_rate": 5.257713219131641e-06, "loss": 0.08710594177246093, "step": 52925 }, { "epoch": 0.45767005905699043, "grad_norm": 15.173535268541624, "learning_rate": 5.25757905338508e-06, "loss": 0.44625701904296877, "step": 52930 }, { "epoch": 0.4577132925785337, "grad_norm": 0.18746520004736686, "learning_rate": 5.257444877226763e-06, "loss": 0.052850341796875, "step": 52935 }, { "epoch": 0.45775652610007694, "grad_norm": 3.8213771589713494, "learning_rate": 5.257310690657311e-06, "loss": 0.1453857421875, "step": 52940 }, { "epoch": 0.45779975962162023, "grad_norm": 1.9278367726631767, "learning_rate": 5.257176493677341e-06, "loss": 0.11253433227539063, "step": 52945 }, { "epoch": 0.45784299314316346, "grad_norm": 3.324339055624797, "learning_rate": 5.257042286287473e-06, "loss": 0.13884429931640624, "step": 52950 }, { "epoch": 0.45788622666470674, "grad_norm": 28.133293139690572, "learning_rate": 5.256908068488324e-06, "loss": 0.2477783203125, "step": 52955 }, { "epoch": 0.45792946018625, "grad_norm": 3.765191656750019, "learning_rate": 5.2567738402805155e-06, "loss": 0.10144081115722656, "step": 52960 }, { "epoch": 0.45797269370779325, "grad_norm": 18.444632682556648, "learning_rate": 5.256639601664666e-06, "loss": 0.09005966186523437, "step": 52965 }, { "epoch": 0.45801592722933654, "grad_norm": 0.8152519269568812, "learning_rate": 5.256505352641393e-06, "loss": 0.18925876617431642, "step": 52970 }, { "epoch": 0.4580591607508798, "grad_norm": 0.5602462310180392, "learning_rate": 5.256371093211316e-06, "loss": 0.041057586669921875, "step": 52975 }, { "epoch": 0.45810239427242305, "grad_norm": 14.713574321387279, "learning_rate": 5.256236823375057e-06, "loss": 0.3322998046875, "step": 52980 }, { "epoch": 0.45814562779396634, "grad_norm": 13.032824363025512, "learning_rate": 5.256102543133232e-06, "loss": 0.10627670288085937, "step": 52985 }, { "epoch": 0.4581888613155096, "grad_norm": 0.2289069556065604, "learning_rate": 5.255968252486462e-06, "loss": 0.07387237548828125, "step": 52990 }, { "epoch": 0.45823209483705285, "grad_norm": 13.971647102837283, "learning_rate": 5.2558339514353645e-06, "loss": 0.16626548767089844, "step": 52995 }, { "epoch": 0.45827532835859613, "grad_norm": 27.429321983244222, "learning_rate": 5.2556996399805605e-06, "loss": 0.3166259765625, "step": 53000 }, { "epoch": 0.45831856188013936, "grad_norm": 3.8617868648314904, "learning_rate": 5.25556531812267e-06, "loss": 0.01616973876953125, "step": 53005 }, { "epoch": 0.45836179540168265, "grad_norm": 0.5368208443386508, "learning_rate": 5.255430985862311e-06, "loss": 0.09862060546875, "step": 53010 }, { "epoch": 0.45840502892322593, "grad_norm": 2.3455980108165497, "learning_rate": 5.255296643200103e-06, "loss": 0.0146881103515625, "step": 53015 }, { "epoch": 0.45844826244476916, "grad_norm": 0.4537829888776514, "learning_rate": 5.255162290136667e-06, "loss": 0.08002166748046875, "step": 53020 }, { "epoch": 0.45849149596631245, "grad_norm": 4.763688082387246, "learning_rate": 5.255027926672622e-06, "loss": 0.2590057373046875, "step": 53025 }, { "epoch": 0.45853472948785573, "grad_norm": 1.4543087126669427, "learning_rate": 5.254893552808586e-06, "loss": 0.23210906982421875, "step": 53030 }, { "epoch": 0.45857796300939896, "grad_norm": 30.441637595527506, "learning_rate": 5.254759168545182e-06, "loss": 0.20701026916503906, "step": 53035 }, { "epoch": 0.45862119653094224, "grad_norm": 5.5096167148461, "learning_rate": 5.254624773883027e-06, "loss": 0.146026611328125, "step": 53040 }, { "epoch": 0.4586644300524855, "grad_norm": 5.101490342888864, "learning_rate": 5.254490368822742e-06, "loss": 0.24098625183105468, "step": 53045 }, { "epoch": 0.45870766357402876, "grad_norm": 18.443921058476644, "learning_rate": 5.2543559533649465e-06, "loss": 0.220794677734375, "step": 53050 }, { "epoch": 0.45875089709557204, "grad_norm": 3.147852042484174, "learning_rate": 5.25422152751026e-06, "loss": 0.1110107421875, "step": 53055 }, { "epoch": 0.45879413061711527, "grad_norm": 1.7894306910395963, "learning_rate": 5.254087091259304e-06, "loss": 0.23988037109375, "step": 53060 }, { "epoch": 0.45883736413865855, "grad_norm": 17.277524676207257, "learning_rate": 5.253952644612697e-06, "loss": 0.2127532958984375, "step": 53065 }, { "epoch": 0.45888059766020184, "grad_norm": 10.835520324855288, "learning_rate": 5.25381818757106e-06, "loss": 0.3294708251953125, "step": 53070 }, { "epoch": 0.45892383118174507, "grad_norm": 7.0979304414717586, "learning_rate": 5.2536837201350116e-06, "loss": 0.21049346923828124, "step": 53075 }, { "epoch": 0.45896706470328835, "grad_norm": 38.660260443055975, "learning_rate": 5.2535492423051736e-06, "loss": 0.32159271240234377, "step": 53080 }, { "epoch": 0.4590102982248316, "grad_norm": 8.608565756166646, "learning_rate": 5.253414754082167e-06, "loss": 0.1234527587890625, "step": 53085 }, { "epoch": 0.45905353174637487, "grad_norm": 13.692448693683463, "learning_rate": 5.253280255466608e-06, "loss": 0.08283309936523438, "step": 53090 }, { "epoch": 0.45909676526791815, "grad_norm": 5.631299961092897, "learning_rate": 5.253145746459121e-06, "loss": 0.04577560424804687, "step": 53095 }, { "epoch": 0.4591399987894614, "grad_norm": 1.8027149987085769, "learning_rate": 5.253011227060325e-06, "loss": 0.13577418327331542, "step": 53100 }, { "epoch": 0.45918323231100466, "grad_norm": 2.9782441059052593, "learning_rate": 5.252876697270841e-06, "loss": 0.28601913452148436, "step": 53105 }, { "epoch": 0.45922646583254795, "grad_norm": 0.5527416477230643, "learning_rate": 5.252742157091287e-06, "loss": 0.044708251953125, "step": 53110 }, { "epoch": 0.4592696993540912, "grad_norm": 6.995876630709148, "learning_rate": 5.252607606522287e-06, "loss": 0.11136627197265625, "step": 53115 }, { "epoch": 0.45931293287563446, "grad_norm": 7.962720951261976, "learning_rate": 5.252473045564458e-06, "loss": 0.4496978759765625, "step": 53120 }, { "epoch": 0.4593561663971777, "grad_norm": 16.90933082210724, "learning_rate": 5.252338474218424e-06, "loss": 0.3511039733886719, "step": 53125 }, { "epoch": 0.459399399918721, "grad_norm": 14.234771144381359, "learning_rate": 5.252203892484802e-06, "loss": 0.14024009704589843, "step": 53130 }, { "epoch": 0.45944263344026426, "grad_norm": 36.743181443544394, "learning_rate": 5.252069300364217e-06, "loss": 0.370074462890625, "step": 53135 }, { "epoch": 0.4594858669618075, "grad_norm": 53.399366396786895, "learning_rate": 5.251934697857286e-06, "loss": 0.2128173828125, "step": 53140 }, { "epoch": 0.45952910048335077, "grad_norm": 6.11488667470479, "learning_rate": 5.251800084964631e-06, "loss": 0.07979564666748047, "step": 53145 }, { "epoch": 0.45957233400489406, "grad_norm": 26.722578831563776, "learning_rate": 5.2516654616868734e-06, "loss": 0.3233154296875, "step": 53150 }, { "epoch": 0.4596155675264373, "grad_norm": 3.7652565425217395, "learning_rate": 5.2515308280246335e-06, "loss": 0.08329010009765625, "step": 53155 }, { "epoch": 0.45965880104798057, "grad_norm": 7.520629907346344, "learning_rate": 5.251396183978533e-06, "loss": 0.37559814453125, "step": 53160 }, { "epoch": 0.4597020345695238, "grad_norm": 0.9859145889458545, "learning_rate": 5.251261529549191e-06, "loss": 0.1990142822265625, "step": 53165 }, { "epoch": 0.4597452680910671, "grad_norm": 14.024991246545937, "learning_rate": 5.251126864737232e-06, "loss": 0.16224517822265624, "step": 53170 }, { "epoch": 0.45978850161261037, "grad_norm": 6.0586244989567, "learning_rate": 5.250992189543272e-06, "loss": 0.303338623046875, "step": 53175 }, { "epoch": 0.4598317351341536, "grad_norm": 0.2996787474961059, "learning_rate": 5.250857503967936e-06, "loss": 0.13653945922851562, "step": 53180 }, { "epoch": 0.4598749686556969, "grad_norm": 0.2931543163890525, "learning_rate": 5.250722808011845e-06, "loss": 0.30996246337890626, "step": 53185 }, { "epoch": 0.45991820217724017, "grad_norm": 7.786236501332197, "learning_rate": 5.250588101675618e-06, "loss": 0.14552021026611328, "step": 53190 }, { "epoch": 0.4599614356987834, "grad_norm": 1.9542057727759172, "learning_rate": 5.250453384959879e-06, "loss": 0.025995635986328126, "step": 53195 }, { "epoch": 0.4600046692203267, "grad_norm": 0.8676502895022601, "learning_rate": 5.250318657865247e-06, "loss": 0.15231781005859374, "step": 53200 }, { "epoch": 0.46004790274186996, "grad_norm": 55.107861578187816, "learning_rate": 5.250183920392344e-06, "loss": 0.662750244140625, "step": 53205 }, { "epoch": 0.4600911362634132, "grad_norm": 2.651652600791953, "learning_rate": 5.250049172541792e-06, "loss": 0.08308868408203125, "step": 53210 }, { "epoch": 0.4601343697849565, "grad_norm": 0.4874506091846176, "learning_rate": 5.2499144143142114e-06, "loss": 0.18854026794433593, "step": 53215 }, { "epoch": 0.4601776033064997, "grad_norm": 3.0728692467536263, "learning_rate": 5.249779645710225e-06, "loss": 0.2186737060546875, "step": 53220 }, { "epoch": 0.460220836828043, "grad_norm": 7.700480859964304, "learning_rate": 5.249644866730454e-06, "loss": 0.25439453125, "step": 53225 }, { "epoch": 0.4602640703495863, "grad_norm": 18.789530596051463, "learning_rate": 5.249510077375519e-06, "loss": 0.0811004638671875, "step": 53230 }, { "epoch": 0.4603073038711295, "grad_norm": 15.766068311094802, "learning_rate": 5.2493752776460414e-06, "loss": 0.08191566467285157, "step": 53235 }, { "epoch": 0.4603505373926728, "grad_norm": 2.2618933168146635, "learning_rate": 5.249240467542645e-06, "loss": 0.16413421630859376, "step": 53240 }, { "epoch": 0.46039377091421607, "grad_norm": 1.9587089506378053, "learning_rate": 5.24910564706595e-06, "loss": 0.03710861206054687, "step": 53245 }, { "epoch": 0.4604370044357593, "grad_norm": 67.43384217410193, "learning_rate": 5.248970816216579e-06, "loss": 0.533984375, "step": 53250 }, { "epoch": 0.4604802379573026, "grad_norm": 1.1414853262304996, "learning_rate": 5.248835974995153e-06, "loss": 0.3383941650390625, "step": 53255 }, { "epoch": 0.4605234714788458, "grad_norm": 10.08273797867039, "learning_rate": 5.248701123402293e-06, "loss": 0.190924072265625, "step": 53260 }, { "epoch": 0.4605667050003891, "grad_norm": 9.121703019297861, "learning_rate": 5.248566261438624e-06, "loss": 0.1893798828125, "step": 53265 }, { "epoch": 0.4606099385219324, "grad_norm": 33.539502404346656, "learning_rate": 5.248431389104765e-06, "loss": 0.2158447265625, "step": 53270 }, { "epoch": 0.4606531720434756, "grad_norm": 6.591418743683457, "learning_rate": 5.24829650640134e-06, "loss": 0.2324676513671875, "step": 53275 }, { "epoch": 0.4606964055650189, "grad_norm": 57.25532645712394, "learning_rate": 5.248161613328969e-06, "loss": 0.322357177734375, "step": 53280 }, { "epoch": 0.4607396390865622, "grad_norm": 40.246211689889016, "learning_rate": 5.248026709888276e-06, "loss": 0.33010215759277345, "step": 53285 }, { "epoch": 0.4607828726081054, "grad_norm": 7.436749265369944, "learning_rate": 5.247891796079882e-06, "loss": 0.12262077331542968, "step": 53290 }, { "epoch": 0.4608261061296487, "grad_norm": 9.145563068368867, "learning_rate": 5.247756871904409e-06, "loss": 0.50048828125, "step": 53295 }, { "epoch": 0.4608693396511919, "grad_norm": 9.566180854590723, "learning_rate": 5.247621937362482e-06, "loss": 0.10711040496826171, "step": 53300 }, { "epoch": 0.4609125731727352, "grad_norm": 4.002937057012922, "learning_rate": 5.24748699245472e-06, "loss": 0.09601325988769531, "step": 53305 }, { "epoch": 0.4609558066942785, "grad_norm": 19.447507588720587, "learning_rate": 5.247352037181746e-06, "loss": 0.16734619140625, "step": 53310 }, { "epoch": 0.4609990402158217, "grad_norm": 11.155928721841054, "learning_rate": 5.247217071544184e-06, "loss": 0.19208755493164062, "step": 53315 }, { "epoch": 0.461042273737365, "grad_norm": 1.619431629062086, "learning_rate": 5.247082095542655e-06, "loss": 0.12023658752441406, "step": 53320 }, { "epoch": 0.4610855072589083, "grad_norm": 2.9847251832813524, "learning_rate": 5.246947109177782e-06, "loss": 0.32143325805664064, "step": 53325 }, { "epoch": 0.4611287407804515, "grad_norm": 1.71682872721188, "learning_rate": 5.246812112450187e-06, "loss": 0.1411590576171875, "step": 53330 }, { "epoch": 0.4611719743019948, "grad_norm": 10.693626028733817, "learning_rate": 5.246677105360494e-06, "loss": 0.09267425537109375, "step": 53335 }, { "epoch": 0.46121520782353803, "grad_norm": 16.181990007334424, "learning_rate": 5.246542087909324e-06, "loss": 0.34690399169921876, "step": 53340 }, { "epoch": 0.4612584413450813, "grad_norm": 12.93280432445726, "learning_rate": 5.246407060097301e-06, "loss": 0.21936187744140626, "step": 53345 }, { "epoch": 0.4613016748666246, "grad_norm": 1.5113634120306088, "learning_rate": 5.246272021925047e-06, "loss": 0.062482452392578124, "step": 53350 }, { "epoch": 0.46134490838816783, "grad_norm": 4.614869200247083, "learning_rate": 5.246136973393184e-06, "loss": 0.21139907836914062, "step": 53355 }, { "epoch": 0.4613881419097111, "grad_norm": 6.569564073260533, "learning_rate": 5.246001914502337e-06, "loss": 0.19033660888671874, "step": 53360 }, { "epoch": 0.4614313754312544, "grad_norm": 1.3055111104309856, "learning_rate": 5.245866845253126e-06, "loss": 0.02552947998046875, "step": 53365 }, { "epoch": 0.4614746089527976, "grad_norm": 37.75894965886728, "learning_rate": 5.245731765646178e-06, "loss": 0.6375, "step": 53370 }, { "epoch": 0.4615178424743409, "grad_norm": 0.6909882578166023, "learning_rate": 5.245596675682112e-06, "loss": 0.10046768188476562, "step": 53375 }, { "epoch": 0.4615610759958842, "grad_norm": 31.593941959256465, "learning_rate": 5.245461575361553e-06, "loss": 0.20330581665039063, "step": 53380 }, { "epoch": 0.4616043095174274, "grad_norm": 3.868770750694305, "learning_rate": 5.245326464685123e-06, "loss": 0.21771240234375, "step": 53385 }, { "epoch": 0.4616475430389707, "grad_norm": 2.540161550719531, "learning_rate": 5.245191343653447e-06, "loss": 0.2414989471435547, "step": 53390 }, { "epoch": 0.46169077656051394, "grad_norm": 0.6486959326279635, "learning_rate": 5.245056212267146e-06, "loss": 0.2266510009765625, "step": 53395 }, { "epoch": 0.4617340100820572, "grad_norm": 5.425997789614013, "learning_rate": 5.2449210705268434e-06, "loss": 0.060491943359375, "step": 53400 }, { "epoch": 0.4617772436036005, "grad_norm": 4.534417402901393, "learning_rate": 5.244785918433164e-06, "loss": 0.206304931640625, "step": 53405 }, { "epoch": 0.46182047712514374, "grad_norm": 32.568701527508146, "learning_rate": 5.24465075598673e-06, "loss": 0.3898963928222656, "step": 53410 }, { "epoch": 0.461863710646687, "grad_norm": 14.993979288804018, "learning_rate": 5.244515583188166e-06, "loss": 0.1115386962890625, "step": 53415 }, { "epoch": 0.4619069441682303, "grad_norm": 3.581032928986328, "learning_rate": 5.244380400038093e-06, "loss": 0.5192020416259766, "step": 53420 }, { "epoch": 0.46195017768977353, "grad_norm": 13.107536262139346, "learning_rate": 5.244245206537137e-06, "loss": 0.1477203369140625, "step": 53425 }, { "epoch": 0.4619934112113168, "grad_norm": 28.312613558264793, "learning_rate": 5.24411000268592e-06, "loss": 0.136077880859375, "step": 53430 }, { "epoch": 0.46203664473286005, "grad_norm": 8.348682431325583, "learning_rate": 5.243974788485067e-06, "loss": 0.182379150390625, "step": 53435 }, { "epoch": 0.46207987825440333, "grad_norm": 33.53007003061809, "learning_rate": 5.243839563935199e-06, "loss": 0.17811355590820313, "step": 53440 }, { "epoch": 0.4621231117759466, "grad_norm": 13.879143779836669, "learning_rate": 5.243704329036941e-06, "loss": 0.24383392333984374, "step": 53445 }, { "epoch": 0.46216634529748984, "grad_norm": 4.336992865604491, "learning_rate": 5.243569083790918e-06, "loss": 0.05511970520019531, "step": 53450 }, { "epoch": 0.46220957881903313, "grad_norm": 1.4872826035512445, "learning_rate": 5.2434338281977525e-06, "loss": 0.23068723678588868, "step": 53455 }, { "epoch": 0.4622528123405764, "grad_norm": 1.2234501742891317, "learning_rate": 5.243298562258068e-06, "loss": 0.0544830322265625, "step": 53460 }, { "epoch": 0.46229604586211964, "grad_norm": 0.20316492061788194, "learning_rate": 5.243163285972489e-06, "loss": 0.21521377563476562, "step": 53465 }, { "epoch": 0.4623392793836629, "grad_norm": 12.722478787307647, "learning_rate": 5.24302799934164e-06, "loss": 0.21300201416015624, "step": 53470 }, { "epoch": 0.46238251290520616, "grad_norm": 7.331010052525075, "learning_rate": 5.242892702366143e-06, "loss": 0.0798248291015625, "step": 53475 }, { "epoch": 0.46242574642674944, "grad_norm": 20.583772494092518, "learning_rate": 5.242757395046622e-06, "loss": 0.09219741821289062, "step": 53480 }, { "epoch": 0.4624689799482927, "grad_norm": 31.637674835109213, "learning_rate": 5.242622077383703e-06, "loss": 0.2111175537109375, "step": 53485 }, { "epoch": 0.46251221346983595, "grad_norm": 26.163510438421124, "learning_rate": 5.24248674937801e-06, "loss": 0.32833251953125, "step": 53490 }, { "epoch": 0.46255544699137924, "grad_norm": 8.038158939278393, "learning_rate": 5.242351411030166e-06, "loss": 0.0509002685546875, "step": 53495 }, { "epoch": 0.4625986805129225, "grad_norm": 7.699906909233647, "learning_rate": 5.242216062340795e-06, "loss": 0.2133941650390625, "step": 53500 }, { "epoch": 0.46264191403446575, "grad_norm": 4.273669398004509, "learning_rate": 5.2420807033105224e-06, "loss": 0.32807579040527346, "step": 53505 }, { "epoch": 0.46268514755600904, "grad_norm": 3.279938106448311, "learning_rate": 5.24194533393997e-06, "loss": 0.10926265716552734, "step": 53510 }, { "epoch": 0.46272838107755226, "grad_norm": 0.6909699121579876, "learning_rate": 5.241809954229765e-06, "loss": 0.11248016357421875, "step": 53515 }, { "epoch": 0.46277161459909555, "grad_norm": 1.6467248436137438, "learning_rate": 5.241674564180531e-06, "loss": 0.46463699340820314, "step": 53520 }, { "epoch": 0.46281484812063883, "grad_norm": 23.18887113539039, "learning_rate": 5.241539163792892e-06, "loss": 0.3097393035888672, "step": 53525 }, { "epoch": 0.46285808164218206, "grad_norm": 19.131787799431407, "learning_rate": 5.241403753067471e-06, "loss": 0.48744049072265627, "step": 53530 }, { "epoch": 0.46290131516372535, "grad_norm": 16.141568201950506, "learning_rate": 5.241268332004896e-06, "loss": 0.3947021484375, "step": 53535 }, { "epoch": 0.46294454868526863, "grad_norm": 0.7560628333197709, "learning_rate": 5.241132900605788e-06, "loss": 0.07792739868164063, "step": 53540 }, { "epoch": 0.46298778220681186, "grad_norm": 5.736817184610858, "learning_rate": 5.240997458870773e-06, "loss": 0.0958953857421875, "step": 53545 }, { "epoch": 0.46303101572835514, "grad_norm": 26.76894412830002, "learning_rate": 5.240862006800476e-06, "loss": 0.4024139404296875, "step": 53550 }, { "epoch": 0.46307424924989843, "grad_norm": 32.11197158039299, "learning_rate": 5.240726544395522e-06, "loss": 0.232037353515625, "step": 53555 }, { "epoch": 0.46311748277144166, "grad_norm": 0.24185032694288355, "learning_rate": 5.240591071656535e-06, "loss": 0.05707006454467774, "step": 53560 }, { "epoch": 0.46316071629298494, "grad_norm": 16.05040634507801, "learning_rate": 5.24045558858414e-06, "loss": 0.37490692138671877, "step": 53565 }, { "epoch": 0.46320394981452817, "grad_norm": 9.161074626718516, "learning_rate": 5.240320095178961e-06, "loss": 0.226788330078125, "step": 53570 }, { "epoch": 0.46324718333607146, "grad_norm": 11.686170985066246, "learning_rate": 5.240184591441624e-06, "loss": 0.20245132446289063, "step": 53575 }, { "epoch": 0.46329041685761474, "grad_norm": 0.18882606220733586, "learning_rate": 5.240049077372754e-06, "loss": 0.12882232666015625, "step": 53580 }, { "epoch": 0.46333365037915797, "grad_norm": 11.499348372038094, "learning_rate": 5.239913552972975e-06, "loss": 0.30801849365234374, "step": 53585 }, { "epoch": 0.46337688390070125, "grad_norm": 2.6717192383707515, "learning_rate": 5.239778018242914e-06, "loss": 0.05263595581054688, "step": 53590 }, { "epoch": 0.46342011742224454, "grad_norm": 13.143549390116021, "learning_rate": 5.239642473183193e-06, "loss": 0.1110748291015625, "step": 53595 }, { "epoch": 0.46346335094378777, "grad_norm": 19.482541360599186, "learning_rate": 5.239506917794439e-06, "loss": 0.17566490173339844, "step": 53600 }, { "epoch": 0.46350658446533105, "grad_norm": 1.5320074241406423, "learning_rate": 5.239371352077278e-06, "loss": 0.2570167541503906, "step": 53605 }, { "epoch": 0.4635498179868743, "grad_norm": 7.983179903960531, "learning_rate": 5.239235776032334e-06, "loss": 0.214654541015625, "step": 53610 }, { "epoch": 0.46359305150841756, "grad_norm": 7.383288807381018, "learning_rate": 5.239100189660232e-06, "loss": 0.149102783203125, "step": 53615 }, { "epoch": 0.46363628502996085, "grad_norm": 38.215279606971144, "learning_rate": 5.238964592961597e-06, "loss": 0.49298858642578125, "step": 53620 }, { "epoch": 0.4636795185515041, "grad_norm": 2.903513597093627, "learning_rate": 5.2388289859370574e-06, "loss": 0.09618091583251953, "step": 53625 }, { "epoch": 0.46372275207304736, "grad_norm": 21.582268883750217, "learning_rate": 5.238693368587234e-06, "loss": 0.12201995849609375, "step": 53630 }, { "epoch": 0.46376598559459065, "grad_norm": 7.654424978638285, "learning_rate": 5.238557740912756e-06, "loss": 0.0559661865234375, "step": 53635 }, { "epoch": 0.4638092191161339, "grad_norm": 0.6954474189437122, "learning_rate": 5.238422102914247e-06, "loss": 0.04877166748046875, "step": 53640 }, { "epoch": 0.46385245263767716, "grad_norm": 27.874287206601633, "learning_rate": 5.238286454592333e-06, "loss": 0.2025726318359375, "step": 53645 }, { "epoch": 0.4638956861592204, "grad_norm": 34.10373254105453, "learning_rate": 5.2381507959476406e-06, "loss": 0.37633056640625, "step": 53650 }, { "epoch": 0.4639389196807637, "grad_norm": 8.058159444894514, "learning_rate": 5.2380151269807935e-06, "loss": 0.073431396484375, "step": 53655 }, { "epoch": 0.46398215320230696, "grad_norm": 33.10994514828901, "learning_rate": 5.237879447692418e-06, "loss": 0.2860905170440674, "step": 53660 }, { "epoch": 0.4640253867238502, "grad_norm": 11.15626025714809, "learning_rate": 5.237743758083142e-06, "loss": 0.105096435546875, "step": 53665 }, { "epoch": 0.46406862024539347, "grad_norm": 156.11202356385442, "learning_rate": 5.237608058153587e-06, "loss": 0.2889495849609375, "step": 53670 }, { "epoch": 0.46411185376693675, "grad_norm": 2.5420135654720406, "learning_rate": 5.237472347904384e-06, "loss": 0.1537445068359375, "step": 53675 }, { "epoch": 0.46415508728848, "grad_norm": 29.277001634399166, "learning_rate": 5.237336627336154e-06, "loss": 0.10216827392578125, "step": 53680 }, { "epoch": 0.46419832081002327, "grad_norm": 34.402717169516364, "learning_rate": 5.237200896449527e-06, "loss": 0.41478424072265624, "step": 53685 }, { "epoch": 0.4642415543315665, "grad_norm": 12.045631955650261, "learning_rate": 5.237065155245126e-06, "loss": 0.11534423828125, "step": 53690 }, { "epoch": 0.4642847878531098, "grad_norm": 6.334071316039667, "learning_rate": 5.2369294037235786e-06, "loss": 0.0447906494140625, "step": 53695 }, { "epoch": 0.46432802137465307, "grad_norm": 20.48429208996695, "learning_rate": 5.236793641885509e-06, "loss": 0.16540908813476562, "step": 53700 }, { "epoch": 0.4643712548961963, "grad_norm": 22.373054162829256, "learning_rate": 5.236657869731547e-06, "loss": 0.37611083984375, "step": 53705 }, { "epoch": 0.4644144884177396, "grad_norm": 4.432941616971352, "learning_rate": 5.236522087262315e-06, "loss": 0.092156982421875, "step": 53710 }, { "epoch": 0.46445772193928286, "grad_norm": 7.584471584256683, "learning_rate": 5.2363862944784415e-06, "loss": 0.0471221923828125, "step": 53715 }, { "epoch": 0.4645009554608261, "grad_norm": 7.233356902079127, "learning_rate": 5.236250491380552e-06, "loss": 0.06080169677734375, "step": 53720 }, { "epoch": 0.4645441889823694, "grad_norm": 7.533985215827705, "learning_rate": 5.236114677969273e-06, "loss": 0.215460205078125, "step": 53725 }, { "epoch": 0.4645874225039126, "grad_norm": 5.18867316790253, "learning_rate": 5.2359788542452305e-06, "loss": 0.22772178649902344, "step": 53730 }, { "epoch": 0.4646306560254559, "grad_norm": 1.6663022092592557, "learning_rate": 5.23584302020905e-06, "loss": 0.1325286865234375, "step": 53735 }, { "epoch": 0.4646738895469992, "grad_norm": 9.561600217921388, "learning_rate": 5.2357071758613605e-06, "loss": 0.09808921813964844, "step": 53740 }, { "epoch": 0.4647171230685424, "grad_norm": 8.950324742168608, "learning_rate": 5.235571321202786e-06, "loss": 0.3599029541015625, "step": 53745 }, { "epoch": 0.4647603565900857, "grad_norm": 29.018071163915806, "learning_rate": 5.235435456233955e-06, "loss": 0.2917236328125, "step": 53750 }, { "epoch": 0.464803590111629, "grad_norm": 12.590084040370018, "learning_rate": 5.235299580955493e-06, "loss": 0.10230865478515624, "step": 53755 }, { "epoch": 0.4648468236331722, "grad_norm": 1.080151537043692, "learning_rate": 5.235163695368027e-06, "loss": 0.39306793212890623, "step": 53760 }, { "epoch": 0.4648900571547155, "grad_norm": 0.7507147528947086, "learning_rate": 5.235027799472183e-06, "loss": 0.1071624755859375, "step": 53765 }, { "epoch": 0.46493329067625877, "grad_norm": 7.503105308601275, "learning_rate": 5.234891893268589e-06, "loss": 0.262103271484375, "step": 53770 }, { "epoch": 0.464976524197802, "grad_norm": 15.780533861950703, "learning_rate": 5.23475597675787e-06, "loss": 0.23124847412109376, "step": 53775 }, { "epoch": 0.4650197577193453, "grad_norm": 14.704702276307886, "learning_rate": 5.234620049940655e-06, "loss": 0.0599212646484375, "step": 53780 }, { "epoch": 0.4650629912408885, "grad_norm": 1.0041691790259732, "learning_rate": 5.234484112817569e-06, "loss": 0.1431488037109375, "step": 53785 }, { "epoch": 0.4651062247624318, "grad_norm": 9.819568374140754, "learning_rate": 5.2343481653892406e-06, "loss": 0.11380043029785156, "step": 53790 }, { "epoch": 0.4651494582839751, "grad_norm": 0.7910620506255637, "learning_rate": 5.234212207656295e-06, "loss": 0.14437332153320312, "step": 53795 }, { "epoch": 0.4651926918055183, "grad_norm": 3.3629251733672185, "learning_rate": 5.234076239619361e-06, "loss": 0.18051834106445314, "step": 53800 }, { "epoch": 0.4652359253270616, "grad_norm": 8.94550846225434, "learning_rate": 5.233940261279064e-06, "loss": 0.34639892578125, "step": 53805 }, { "epoch": 0.4652791588486049, "grad_norm": 44.92961445043032, "learning_rate": 5.233804272636032e-06, "loss": 0.47334747314453124, "step": 53810 }, { "epoch": 0.4653223923701481, "grad_norm": 7.393441828162928, "learning_rate": 5.233668273690892e-06, "loss": 0.22359390258789064, "step": 53815 }, { "epoch": 0.4653656258916914, "grad_norm": 24.082497766910887, "learning_rate": 5.233532264444271e-06, "loss": 0.20709228515625, "step": 53820 }, { "epoch": 0.4654088594132346, "grad_norm": 35.25405634259536, "learning_rate": 5.233396244896798e-06, "loss": 0.376043701171875, "step": 53825 }, { "epoch": 0.4654520929347779, "grad_norm": 0.8510702258189041, "learning_rate": 5.233260215049098e-06, "loss": 0.34589691162109376, "step": 53830 }, { "epoch": 0.4654953264563212, "grad_norm": 0.06520656116450017, "learning_rate": 5.233124174901799e-06, "loss": 0.21961898803710939, "step": 53835 }, { "epoch": 0.4655385599778644, "grad_norm": 19.118523373365893, "learning_rate": 5.232988124455529e-06, "loss": 0.0727325439453125, "step": 53840 }, { "epoch": 0.4655817934994077, "grad_norm": 9.121853483245873, "learning_rate": 5.232852063710914e-06, "loss": 0.06180267333984375, "step": 53845 }, { "epoch": 0.465625027020951, "grad_norm": 2.964661529234741, "learning_rate": 5.232715992668584e-06, "loss": 0.12635269165039062, "step": 53850 }, { "epoch": 0.4656682605424942, "grad_norm": 25.059428020537705, "learning_rate": 5.232579911329165e-06, "loss": 0.19915275573730468, "step": 53855 }, { "epoch": 0.4657114940640375, "grad_norm": 28.74225487313068, "learning_rate": 5.232443819693284e-06, "loss": 0.31638641357421876, "step": 53860 }, { "epoch": 0.46575472758558073, "grad_norm": 36.222154292771776, "learning_rate": 5.23230771776157e-06, "loss": 0.14734649658203125, "step": 53865 }, { "epoch": 0.465797961107124, "grad_norm": 46.03684901637095, "learning_rate": 5.232171605534649e-06, "loss": 0.1087127685546875, "step": 53870 }, { "epoch": 0.4658411946286673, "grad_norm": 4.455776911612412, "learning_rate": 5.23203548301315e-06, "loss": 0.04075126647949219, "step": 53875 }, { "epoch": 0.46588442815021053, "grad_norm": 8.101163465840745, "learning_rate": 5.231899350197701e-06, "loss": 0.44351348876953123, "step": 53880 }, { "epoch": 0.4659276616717538, "grad_norm": 1.959605503354787, "learning_rate": 5.2317632070889305e-06, "loss": 0.2350433349609375, "step": 53885 }, { "epoch": 0.4659708951932971, "grad_norm": 13.878505487873491, "learning_rate": 5.231627053687464e-06, "loss": 0.10246543884277344, "step": 53890 }, { "epoch": 0.4660141287148403, "grad_norm": 14.289601884522604, "learning_rate": 5.231490889993931e-06, "loss": 0.09921646118164062, "step": 53895 }, { "epoch": 0.4660573622363836, "grad_norm": 1.4545171393019647, "learning_rate": 5.231354716008958e-06, "loss": 0.11439971923828125, "step": 53900 }, { "epoch": 0.46610059575792684, "grad_norm": 6.31228148546009, "learning_rate": 5.231218531733177e-06, "loss": 0.23050994873046876, "step": 53905 }, { "epoch": 0.4661438292794701, "grad_norm": 2.470077374759564, "learning_rate": 5.2310823371672115e-06, "loss": 0.1503936767578125, "step": 53910 }, { "epoch": 0.4661870628010134, "grad_norm": 4.849465561905961, "learning_rate": 5.2309461323116906e-06, "loss": 0.3072685241699219, "step": 53915 }, { "epoch": 0.46623029632255664, "grad_norm": 7.799458416562272, "learning_rate": 5.230809917167245e-06, "loss": 0.13865966796875, "step": 53920 }, { "epoch": 0.4662735298440999, "grad_norm": 0.5519665334512408, "learning_rate": 5.2306736917345005e-06, "loss": 0.08980636596679688, "step": 53925 }, { "epoch": 0.4663167633656432, "grad_norm": 23.342859670363133, "learning_rate": 5.230537456014085e-06, "loss": 0.170404052734375, "step": 53930 }, { "epoch": 0.46635999688718643, "grad_norm": 23.009458465435788, "learning_rate": 5.23040121000663e-06, "loss": 0.18080902099609375, "step": 53935 }, { "epoch": 0.4664032304087297, "grad_norm": 12.884736699610404, "learning_rate": 5.23026495371276e-06, "loss": 0.0714935302734375, "step": 53940 }, { "epoch": 0.466446463930273, "grad_norm": 11.384851936120729, "learning_rate": 5.230128687133105e-06, "loss": 0.05354156494140625, "step": 53945 }, { "epoch": 0.46648969745181623, "grad_norm": 18.832472093861046, "learning_rate": 5.229992410268295e-06, "loss": 0.23473892211914063, "step": 53950 }, { "epoch": 0.4665329309733595, "grad_norm": 0.8807769775144513, "learning_rate": 5.2298561231189555e-06, "loss": 0.096575927734375, "step": 53955 }, { "epoch": 0.46657616449490275, "grad_norm": 4.077322515509897, "learning_rate": 5.229719825685718e-06, "loss": 0.05079498291015625, "step": 53960 }, { "epoch": 0.46661939801644603, "grad_norm": 1.204539261695771, "learning_rate": 5.229583517969209e-06, "loss": 0.2104511260986328, "step": 53965 }, { "epoch": 0.4666626315379893, "grad_norm": 23.31722334694456, "learning_rate": 5.229447199970058e-06, "loss": 0.167425537109375, "step": 53970 }, { "epoch": 0.46670586505953254, "grad_norm": 5.191373023544115, "learning_rate": 5.229310871688894e-06, "loss": 0.16988525390625, "step": 53975 }, { "epoch": 0.4667490985810758, "grad_norm": 9.447179989770682, "learning_rate": 5.229174533126344e-06, "loss": 0.1552947998046875, "step": 53980 }, { "epoch": 0.4667923321026191, "grad_norm": 49.56007452622469, "learning_rate": 5.2290381842830384e-06, "loss": 0.14847831726074218, "step": 53985 }, { "epoch": 0.46683556562416234, "grad_norm": 1.888565188784062, "learning_rate": 5.228901825159606e-06, "loss": 0.10466880798339843, "step": 53990 }, { "epoch": 0.4668787991457056, "grad_norm": 4.257882772737681, "learning_rate": 5.228765455756675e-06, "loss": 0.19278030395507811, "step": 53995 }, { "epoch": 0.46692203266724885, "grad_norm": 45.32131463031867, "learning_rate": 5.2286290760748746e-06, "loss": 0.23804397583007814, "step": 54000 }, { "epoch": 0.46696526618879214, "grad_norm": 2.900614915863492, "learning_rate": 5.228492686114835e-06, "loss": 0.06533889770507813, "step": 54005 }, { "epoch": 0.4670084997103354, "grad_norm": 0.8488217819612184, "learning_rate": 5.228356285877183e-06, "loss": 0.20366668701171875, "step": 54010 }, { "epoch": 0.46705173323187865, "grad_norm": 9.420317870243313, "learning_rate": 5.228219875362548e-06, "loss": 0.23575439453125, "step": 54015 }, { "epoch": 0.46709496675342194, "grad_norm": 0.9270796293052375, "learning_rate": 5.228083454571561e-06, "loss": 0.1869354248046875, "step": 54020 }, { "epoch": 0.4671382002749652, "grad_norm": 0.2039730941553693, "learning_rate": 5.227947023504849e-06, "loss": 0.13194656372070312, "step": 54025 }, { "epoch": 0.46718143379650845, "grad_norm": 2.3422527339323342, "learning_rate": 5.2278105821630435e-06, "loss": 0.04014854431152344, "step": 54030 }, { "epoch": 0.46722466731805173, "grad_norm": 5.667875951735816, "learning_rate": 5.2276741305467715e-06, "loss": 0.1322509765625, "step": 54035 }, { "epoch": 0.46726790083959496, "grad_norm": 12.567068952879795, "learning_rate": 5.227537668656664e-06, "loss": 0.1536895751953125, "step": 54040 }, { "epoch": 0.46731113436113825, "grad_norm": 8.700422582416115, "learning_rate": 5.227401196493349e-06, "loss": 0.0395233154296875, "step": 54045 }, { "epoch": 0.46735436788268153, "grad_norm": 0.6344259313326663, "learning_rate": 5.227264714057456e-06, "loss": 0.012941741943359375, "step": 54050 }, { "epoch": 0.46739760140422476, "grad_norm": 34.059388258311316, "learning_rate": 5.227128221349616e-06, "loss": 0.2089141845703125, "step": 54055 }, { "epoch": 0.46744083492576805, "grad_norm": 2.6007022926757095, "learning_rate": 5.226991718370457e-06, "loss": 0.06075439453125, "step": 54060 }, { "epoch": 0.46748406844731133, "grad_norm": 4.83296170525919, "learning_rate": 5.22685520512061e-06, "loss": 0.09933013916015625, "step": 54065 }, { "epoch": 0.46752730196885456, "grad_norm": 19.26618823596071, "learning_rate": 5.226718681600702e-06, "loss": 0.13660736083984376, "step": 54070 }, { "epoch": 0.46757053549039784, "grad_norm": 5.372147202515213, "learning_rate": 5.226582147811365e-06, "loss": 0.24031219482421876, "step": 54075 }, { "epoch": 0.46761376901194107, "grad_norm": 35.71528246525784, "learning_rate": 5.2264456037532274e-06, "loss": 0.20716171264648436, "step": 54080 }, { "epoch": 0.46765700253348436, "grad_norm": 13.921905718773008, "learning_rate": 5.226309049426919e-06, "loss": 0.16783714294433594, "step": 54085 }, { "epoch": 0.46770023605502764, "grad_norm": 7.1069902737054, "learning_rate": 5.226172484833071e-06, "loss": 0.3747894287109375, "step": 54090 }, { "epoch": 0.46774346957657087, "grad_norm": 1.4028847168773466, "learning_rate": 5.226035909972312e-06, "loss": 0.1318981170654297, "step": 54095 }, { "epoch": 0.46778670309811415, "grad_norm": 9.003811797027495, "learning_rate": 5.225899324845271e-06, "loss": 0.1173828125, "step": 54100 }, { "epoch": 0.46782993661965744, "grad_norm": 11.255078689729878, "learning_rate": 5.2257627294525805e-06, "loss": 0.1044097900390625, "step": 54105 }, { "epoch": 0.46787317014120067, "grad_norm": 23.405507343775398, "learning_rate": 5.225626123794869e-06, "loss": 0.28922500610351565, "step": 54110 }, { "epoch": 0.46791640366274395, "grad_norm": 2.03640286382012, "learning_rate": 5.225489507872765e-06, "loss": 0.48909759521484375, "step": 54115 }, { "epoch": 0.46795963718428724, "grad_norm": 0.5500247444603028, "learning_rate": 5.225352881686901e-06, "loss": 0.0471435546875, "step": 54120 }, { "epoch": 0.46800287070583046, "grad_norm": 5.358533481227161, "learning_rate": 5.225216245237906e-06, "loss": 0.1737396240234375, "step": 54125 }, { "epoch": 0.46804610422737375, "grad_norm": 21.28952858454706, "learning_rate": 5.225079598526411e-06, "loss": 0.15412750244140624, "step": 54130 }, { "epoch": 0.468089337748917, "grad_norm": 5.049477278261306, "learning_rate": 5.224942941553044e-06, "loss": 0.1720428466796875, "step": 54135 }, { "epoch": 0.46813257127046026, "grad_norm": 26.21583950955644, "learning_rate": 5.224806274318438e-06, "loss": 0.8486736297607422, "step": 54140 }, { "epoch": 0.46817580479200355, "grad_norm": 0.6916273513734793, "learning_rate": 5.2246695968232215e-06, "loss": 0.1567657470703125, "step": 54145 }, { "epoch": 0.4682190383135468, "grad_norm": 18.73664502207775, "learning_rate": 5.224532909068026e-06, "loss": 0.39931640625, "step": 54150 }, { "epoch": 0.46826227183509006, "grad_norm": 0.2544502880490479, "learning_rate": 5.224396211053479e-06, "loss": 0.10921440124511719, "step": 54155 }, { "epoch": 0.46830550535663334, "grad_norm": 1.3177567923977713, "learning_rate": 5.224259502780216e-06, "loss": 0.14791946411132811, "step": 54160 }, { "epoch": 0.4683487388781766, "grad_norm": 6.442087030364317, "learning_rate": 5.224122784248864e-06, "loss": 0.2991455078125, "step": 54165 }, { "epoch": 0.46839197239971986, "grad_norm": 18.24085067599001, "learning_rate": 5.2239860554600535e-06, "loss": 0.14456253051757811, "step": 54170 }, { "epoch": 0.4684352059212631, "grad_norm": 21.44935086994579, "learning_rate": 5.223849316414416e-06, "loss": 0.14177703857421875, "step": 54175 }, { "epoch": 0.46847843944280637, "grad_norm": 3.469613590322474, "learning_rate": 5.223712567112583e-06, "loss": 0.07231292724609376, "step": 54180 }, { "epoch": 0.46852167296434966, "grad_norm": 0.6323275559701357, "learning_rate": 5.223575807555184e-06, "loss": 0.09552001953125, "step": 54185 }, { "epoch": 0.4685649064858929, "grad_norm": 25.87332886986931, "learning_rate": 5.223439037742849e-06, "loss": 0.24393310546875, "step": 54190 }, { "epoch": 0.46860814000743617, "grad_norm": 5.818892827068028, "learning_rate": 5.22330225767621e-06, "loss": 0.23844032287597655, "step": 54195 }, { "epoch": 0.46865137352897945, "grad_norm": 2.9383304343162813, "learning_rate": 5.2231654673558966e-06, "loss": 0.16606369018554687, "step": 54200 }, { "epoch": 0.4686946070505227, "grad_norm": 1.2397111331871333, "learning_rate": 5.223028666782542e-06, "loss": 0.10997314453125, "step": 54205 }, { "epoch": 0.46873784057206597, "grad_norm": 0.5425765100915423, "learning_rate": 5.222891855956775e-06, "loss": 0.06396484375, "step": 54210 }, { "epoch": 0.4687810740936092, "grad_norm": 0.8828448410173891, "learning_rate": 5.2227550348792265e-06, "loss": 0.120965576171875, "step": 54215 }, { "epoch": 0.4688243076151525, "grad_norm": 7.9950663915733, "learning_rate": 5.222618203550529e-06, "loss": 0.061153411865234375, "step": 54220 }, { "epoch": 0.46886754113669576, "grad_norm": 2.285589690217619, "learning_rate": 5.222481361971311e-06, "loss": 0.09974231719970703, "step": 54225 }, { "epoch": 0.468910774658239, "grad_norm": 4.696312826877375, "learning_rate": 5.222344510142208e-06, "loss": 0.18407859802246093, "step": 54230 }, { "epoch": 0.4689540081797823, "grad_norm": 3.8375359714233803, "learning_rate": 5.222207648063847e-06, "loss": 0.12265472412109375, "step": 54235 }, { "epoch": 0.46899724170132556, "grad_norm": 2.830130191306775, "learning_rate": 5.222070775736861e-06, "loss": 0.1988056182861328, "step": 54240 }, { "epoch": 0.4690404752228688, "grad_norm": 0.4808350388245072, "learning_rate": 5.221933893161881e-06, "loss": 0.10199394226074218, "step": 54245 }, { "epoch": 0.4690837087444121, "grad_norm": 3.8158848876370057, "learning_rate": 5.221797000339538e-06, "loss": 0.22203216552734376, "step": 54250 }, { "epoch": 0.4691269422659553, "grad_norm": 1.1192912181535004, "learning_rate": 5.2216600972704635e-06, "loss": 0.0651641845703125, "step": 54255 }, { "epoch": 0.4691701757874986, "grad_norm": 3.609513433862667, "learning_rate": 5.221523183955289e-06, "loss": 0.179827880859375, "step": 54260 }, { "epoch": 0.4692134093090419, "grad_norm": 37.71566349938839, "learning_rate": 5.221386260394646e-06, "loss": 0.2905731201171875, "step": 54265 }, { "epoch": 0.4692566428305851, "grad_norm": 0.8472980156478767, "learning_rate": 5.221249326589166e-06, "loss": 0.12646026611328126, "step": 54270 }, { "epoch": 0.4692998763521284, "grad_norm": 1.9691041500943987, "learning_rate": 5.2211123825394805e-06, "loss": 0.21142578125, "step": 54275 }, { "epoch": 0.46934310987367167, "grad_norm": 21.83671568372901, "learning_rate": 5.2209754282462205e-06, "loss": 0.07260017395019532, "step": 54280 }, { "epoch": 0.4693863433952149, "grad_norm": 7.240554347427703, "learning_rate": 5.220838463710018e-06, "loss": 0.25511932373046875, "step": 54285 }, { "epoch": 0.4694295769167582, "grad_norm": 26.85577342157639, "learning_rate": 5.220701488931505e-06, "loss": 0.26706886291503906, "step": 54290 }, { "epoch": 0.46947281043830147, "grad_norm": 0.09882788035467235, "learning_rate": 5.220564503911313e-06, "loss": 0.17859268188476562, "step": 54295 }, { "epoch": 0.4695160439598447, "grad_norm": 1.070395325354451, "learning_rate": 5.2204275086500735e-06, "loss": 0.24735641479492188, "step": 54300 }, { "epoch": 0.469559277481388, "grad_norm": 7.576371506453631, "learning_rate": 5.2202905031484186e-06, "loss": 0.128558349609375, "step": 54305 }, { "epoch": 0.4696025110029312, "grad_norm": 22.89146784712096, "learning_rate": 5.220153487406981e-06, "loss": 0.38497772216796877, "step": 54310 }, { "epoch": 0.4696457445244745, "grad_norm": 22.572053707577922, "learning_rate": 5.22001646142639e-06, "loss": 0.18703193664550782, "step": 54315 }, { "epoch": 0.4696889780460178, "grad_norm": 16.655437707490204, "learning_rate": 5.21987942520728e-06, "loss": 0.1137420654296875, "step": 54320 }, { "epoch": 0.469732211567561, "grad_norm": 7.613553269401451, "learning_rate": 5.219742378750283e-06, "loss": 0.20185546875, "step": 54325 }, { "epoch": 0.4697754450891043, "grad_norm": 3.7034289744998854, "learning_rate": 5.219605322056029e-06, "loss": 0.16717529296875, "step": 54330 }, { "epoch": 0.4698186786106476, "grad_norm": 0.2401653932103256, "learning_rate": 5.219468255125153e-06, "loss": 0.20364990234375, "step": 54335 }, { "epoch": 0.4698619121321908, "grad_norm": 9.279317263263117, "learning_rate": 5.219331177958284e-06, "loss": 0.0420989990234375, "step": 54340 }, { "epoch": 0.4699051456537341, "grad_norm": 17.972076478187258, "learning_rate": 5.219194090556056e-06, "loss": 0.201580810546875, "step": 54345 }, { "epoch": 0.4699483791752773, "grad_norm": 8.776584475734065, "learning_rate": 5.219056992919101e-06, "loss": 0.14196128845214845, "step": 54350 }, { "epoch": 0.4699916126968206, "grad_norm": 30.30140112572815, "learning_rate": 5.2189198850480515e-06, "loss": 0.233514404296875, "step": 54355 }, { "epoch": 0.4700348462183639, "grad_norm": 1.868252183492629, "learning_rate": 5.21878276694354e-06, "loss": 0.2839263916015625, "step": 54360 }, { "epoch": 0.4700780797399071, "grad_norm": 9.818683613517353, "learning_rate": 5.218645638606198e-06, "loss": 0.186468505859375, "step": 54365 }, { "epoch": 0.4701213132614504, "grad_norm": 38.88318458398492, "learning_rate": 5.218508500036657e-06, "loss": 0.328082275390625, "step": 54370 }, { "epoch": 0.4701645467829937, "grad_norm": 9.492098806973196, "learning_rate": 5.218371351235553e-06, "loss": 0.18897743225097657, "step": 54375 }, { "epoch": 0.4702077803045369, "grad_norm": 6.238886991595005, "learning_rate": 5.2182341922035156e-06, "loss": 0.206231689453125, "step": 54380 }, { "epoch": 0.4702510138260802, "grad_norm": 10.656305402737704, "learning_rate": 5.218097022941179e-06, "loss": 0.04517059326171875, "step": 54385 }, { "epoch": 0.47029424734762343, "grad_norm": 5.2170291786672935, "learning_rate": 5.217959843449172e-06, "loss": 0.07300262451171875, "step": 54390 }, { "epoch": 0.4703374808691667, "grad_norm": 6.483169588353465, "learning_rate": 5.217822653728133e-06, "loss": 0.165814208984375, "step": 54395 }, { "epoch": 0.47038071439071, "grad_norm": 11.878092965184315, "learning_rate": 5.217685453778691e-06, "loss": 0.5700668334960938, "step": 54400 }, { "epoch": 0.4704239479122532, "grad_norm": 1.4017821011755192, "learning_rate": 5.217548243601479e-06, "loss": 0.090777587890625, "step": 54405 }, { "epoch": 0.4704671814337965, "grad_norm": 0.044101916081702636, "learning_rate": 5.217411023197131e-06, "loss": 0.10714797973632813, "step": 54410 }, { "epoch": 0.4705104149553398, "grad_norm": 2.5615112646070246, "learning_rate": 5.21727379256628e-06, "loss": 0.029929351806640626, "step": 54415 }, { "epoch": 0.470553648476883, "grad_norm": 8.030125182681159, "learning_rate": 5.217136551709557e-06, "loss": 0.04923553466796875, "step": 54420 }, { "epoch": 0.4705968819984263, "grad_norm": 0.8786222317825861, "learning_rate": 5.216999300627597e-06, "loss": 0.05918731689453125, "step": 54425 }, { "epoch": 0.47064011551996954, "grad_norm": 30.467768892560944, "learning_rate": 5.216862039321032e-06, "loss": 0.3644233703613281, "step": 54430 }, { "epoch": 0.4706833490415128, "grad_norm": 2.3888870417325956, "learning_rate": 5.2167247677904945e-06, "loss": 0.62049560546875, "step": 54435 }, { "epoch": 0.4707265825630561, "grad_norm": 10.890078532601207, "learning_rate": 5.216587486036618e-06, "loss": 0.095068359375, "step": 54440 }, { "epoch": 0.47076981608459934, "grad_norm": 24.346628813098206, "learning_rate": 5.216450194060037e-06, "loss": 0.13940343856811524, "step": 54445 }, { "epoch": 0.4708130496061426, "grad_norm": 3.8707685305719104, "learning_rate": 5.216312891861383e-06, "loss": 0.1062042236328125, "step": 54450 }, { "epoch": 0.4708562831276859, "grad_norm": 6.635229302945502, "learning_rate": 5.21617557944129e-06, "loss": 0.05343704223632813, "step": 54455 }, { "epoch": 0.47089951664922913, "grad_norm": 3.6624167424788485, "learning_rate": 5.2160382568003915e-06, "loss": 0.12098407745361328, "step": 54460 }, { "epoch": 0.4709427501707724, "grad_norm": 6.160970715277446, "learning_rate": 5.215900923939319e-06, "loss": 0.1812713623046875, "step": 54465 }, { "epoch": 0.47098598369231565, "grad_norm": 7.595118005214445, "learning_rate": 5.215763580858709e-06, "loss": 0.15461502075195313, "step": 54470 }, { "epoch": 0.47102921721385893, "grad_norm": 3.1801200996111145, "learning_rate": 5.2156262275591926e-06, "loss": 0.05171051025390625, "step": 54475 }, { "epoch": 0.4710724507354022, "grad_norm": 15.271295827239156, "learning_rate": 5.215488864041403e-06, "loss": 0.0999481201171875, "step": 54480 }, { "epoch": 0.47111568425694544, "grad_norm": 6.209010902456406, "learning_rate": 5.215351490305975e-06, "loss": 0.112872314453125, "step": 54485 }, { "epoch": 0.47115891777848873, "grad_norm": 2.137851231861489, "learning_rate": 5.215214106353541e-06, "loss": 0.045809173583984376, "step": 54490 }, { "epoch": 0.471202151300032, "grad_norm": 26.268864983896826, "learning_rate": 5.215076712184737e-06, "loss": 0.30650482177734373, "step": 54495 }, { "epoch": 0.47124538482157524, "grad_norm": 5.5170691918632695, "learning_rate": 5.214939307800193e-06, "loss": 0.0882537841796875, "step": 54500 }, { "epoch": 0.4712886183431185, "grad_norm": 0.39791606282423364, "learning_rate": 5.214801893200546e-06, "loss": 0.13221817016601561, "step": 54505 }, { "epoch": 0.4713318518646618, "grad_norm": 11.7311361441409, "learning_rate": 5.214664468386426e-06, "loss": 0.121783447265625, "step": 54510 }, { "epoch": 0.47137508538620504, "grad_norm": 1.5491841552756667, "learning_rate": 5.214527033358473e-06, "loss": 0.062160491943359375, "step": 54515 }, { "epoch": 0.4714183189077483, "grad_norm": 3.979566362476111, "learning_rate": 5.214389588117314e-06, "loss": 0.1407012939453125, "step": 54520 }, { "epoch": 0.47146155242929155, "grad_norm": 10.95774690030013, "learning_rate": 5.214252132663587e-06, "loss": 0.2350341796875, "step": 54525 }, { "epoch": 0.47150478595083484, "grad_norm": 2.851144293146635, "learning_rate": 5.214114666997925e-06, "loss": 0.03719329833984375, "step": 54530 }, { "epoch": 0.4715480194723781, "grad_norm": 4.917368048553287, "learning_rate": 5.213977191120961e-06, "loss": 0.1154327392578125, "step": 54535 }, { "epoch": 0.47159125299392135, "grad_norm": 11.76798444056278, "learning_rate": 5.21383970503333e-06, "loss": 0.254510498046875, "step": 54540 }, { "epoch": 0.47163448651546463, "grad_norm": 3.5700599654607537, "learning_rate": 5.213702208735667e-06, "loss": 0.08874626159667968, "step": 54545 }, { "epoch": 0.4716777200370079, "grad_norm": 8.617170955727627, "learning_rate": 5.213564702228604e-06, "loss": 0.106512451171875, "step": 54550 }, { "epoch": 0.47172095355855115, "grad_norm": 25.854815072214507, "learning_rate": 5.213427185512777e-06, "loss": 0.2822662353515625, "step": 54555 }, { "epoch": 0.47176418708009443, "grad_norm": 1.29648547402808, "learning_rate": 5.2132896585888184e-06, "loss": 0.0436798095703125, "step": 54560 }, { "epoch": 0.47180742060163766, "grad_norm": 23.617228706981564, "learning_rate": 5.2131521214573645e-06, "loss": 0.17462615966796874, "step": 54565 }, { "epoch": 0.47185065412318095, "grad_norm": 0.717023731466904, "learning_rate": 5.2130145741190475e-06, "loss": 0.10099067687988281, "step": 54570 }, { "epoch": 0.47189388764472423, "grad_norm": 4.577175694734675, "learning_rate": 5.212877016574504e-06, "loss": 0.065728759765625, "step": 54575 }, { "epoch": 0.47193712116626746, "grad_norm": 30.742592623661, "learning_rate": 5.2127394488243655e-06, "loss": 0.28267669677734375, "step": 54580 }, { "epoch": 0.47198035468781074, "grad_norm": 0.9380511909041629, "learning_rate": 5.212601870869269e-06, "loss": 0.6706829071044922, "step": 54585 }, { "epoch": 0.47202358820935403, "grad_norm": 0.1420826890355089, "learning_rate": 5.212464282709849e-06, "loss": 0.1866128921508789, "step": 54590 }, { "epoch": 0.47206682173089726, "grad_norm": 0.37734500112220126, "learning_rate": 5.212326684346739e-06, "loss": 0.3335784912109375, "step": 54595 }, { "epoch": 0.47211005525244054, "grad_norm": 3.219666578030081, "learning_rate": 5.212189075780572e-06, "loss": 0.26724853515625, "step": 54600 }, { "epoch": 0.47215328877398377, "grad_norm": 23.490353905041772, "learning_rate": 5.212051457011986e-06, "loss": 0.5014083862304688, "step": 54605 }, { "epoch": 0.47219652229552705, "grad_norm": 0.847623187816254, "learning_rate": 5.2119138280416145e-06, "loss": 0.545626449584961, "step": 54610 }, { "epoch": 0.47223975581707034, "grad_norm": 7.611738408077201, "learning_rate": 5.211776188870091e-06, "loss": 0.042417144775390624, "step": 54615 }, { "epoch": 0.47228298933861357, "grad_norm": 7.562912122204761, "learning_rate": 5.2116385394980515e-06, "loss": 0.11398391723632813, "step": 54620 }, { "epoch": 0.47232622286015685, "grad_norm": 14.528805453101928, "learning_rate": 5.211500879926132e-06, "loss": 0.1780029296875, "step": 54625 }, { "epoch": 0.47236945638170014, "grad_norm": 1.1044976644245346, "learning_rate": 5.211363210154963e-06, "loss": 0.1037384033203125, "step": 54630 }, { "epoch": 0.47241268990324337, "grad_norm": 20.37571208955411, "learning_rate": 5.2112255301851845e-06, "loss": 0.23611373901367189, "step": 54635 }, { "epoch": 0.47245592342478665, "grad_norm": 0.26865501127189356, "learning_rate": 5.2110878400174284e-06, "loss": 0.0862091064453125, "step": 54640 }, { "epoch": 0.4724991569463299, "grad_norm": 2.0470588005720733, "learning_rate": 5.210950139652331e-06, "loss": 0.2551727294921875, "step": 54645 }, { "epoch": 0.47254239046787316, "grad_norm": 7.9018186350303194, "learning_rate": 5.210812429090526e-06, "loss": 0.05784273147583008, "step": 54650 }, { "epoch": 0.47258562398941645, "grad_norm": 5.108928554871379, "learning_rate": 5.21067470833265e-06, "loss": 0.257958984375, "step": 54655 }, { "epoch": 0.4726288575109597, "grad_norm": 0.7612820841245982, "learning_rate": 5.210536977379337e-06, "loss": 0.1671905517578125, "step": 54660 }, { "epoch": 0.47267209103250296, "grad_norm": 0.8413348009439593, "learning_rate": 5.210399236231223e-06, "loss": 0.1787689208984375, "step": 54665 }, { "epoch": 0.47271532455404625, "grad_norm": 0.5054349656995898, "learning_rate": 5.210261484888944e-06, "loss": 0.3558540344238281, "step": 54670 }, { "epoch": 0.4727585580755895, "grad_norm": 10.09941178745989, "learning_rate": 5.210123723353134e-06, "loss": 0.12506103515625, "step": 54675 }, { "epoch": 0.47280179159713276, "grad_norm": 13.821247633662137, "learning_rate": 5.209985951624428e-06, "loss": 0.21620330810546876, "step": 54680 }, { "epoch": 0.47284502511867604, "grad_norm": 6.256439470085384, "learning_rate": 5.209848169703463e-06, "loss": 0.10887603759765625, "step": 54685 }, { "epoch": 0.4728882586402193, "grad_norm": 28.439387445422984, "learning_rate": 5.209710377590872e-06, "loss": 0.23781471252441405, "step": 54690 }, { "epoch": 0.47293149216176256, "grad_norm": 0.20315219490686937, "learning_rate": 5.2095725752872935e-06, "loss": 0.04085273742675781, "step": 54695 }, { "epoch": 0.4729747256833058, "grad_norm": 15.232611420476237, "learning_rate": 5.209434762793361e-06, "loss": 0.07846527099609375, "step": 54700 }, { "epoch": 0.47301795920484907, "grad_norm": 15.17585122679387, "learning_rate": 5.209296940109709e-06, "loss": 0.10500030517578125, "step": 54705 }, { "epoch": 0.47306119272639235, "grad_norm": 12.802744768030939, "learning_rate": 5.209159107236977e-06, "loss": 0.12600173950195312, "step": 54710 }, { "epoch": 0.4731044262479356, "grad_norm": 0.7221078653814014, "learning_rate": 5.209021264175798e-06, "loss": 0.07945556640625, "step": 54715 }, { "epoch": 0.47314765976947887, "grad_norm": 0.35292567563161203, "learning_rate": 5.208883410926807e-06, "loss": 0.1072998046875, "step": 54720 }, { "epoch": 0.47319089329102215, "grad_norm": 4.413424187757666, "learning_rate": 5.208745547490642e-06, "loss": 0.41105804443359373, "step": 54725 }, { "epoch": 0.4732341268125654, "grad_norm": 0.5414106666424027, "learning_rate": 5.2086076738679375e-06, "loss": 0.26307373046875, "step": 54730 }, { "epoch": 0.47327736033410867, "grad_norm": 16.130724367151714, "learning_rate": 5.20846979005933e-06, "loss": 0.2119457244873047, "step": 54735 }, { "epoch": 0.4733205938556519, "grad_norm": 22.12255487868381, "learning_rate": 5.208331896065454e-06, "loss": 0.23148193359375, "step": 54740 }, { "epoch": 0.4733638273771952, "grad_norm": 3.8468979430891426, "learning_rate": 5.208193991886947e-06, "loss": 0.0801849365234375, "step": 54745 }, { "epoch": 0.47340706089873846, "grad_norm": 29.01374294558721, "learning_rate": 5.208056077524444e-06, "loss": 0.21926422119140626, "step": 54750 }, { "epoch": 0.4734502944202817, "grad_norm": 14.858328703794227, "learning_rate": 5.207918152978582e-06, "loss": 0.5307159423828125, "step": 54755 }, { "epoch": 0.473493527941825, "grad_norm": 17.270839477638248, "learning_rate": 5.207780218249996e-06, "loss": 0.255474853515625, "step": 54760 }, { "epoch": 0.47353676146336826, "grad_norm": 7.016484206245722, "learning_rate": 5.207642273339323e-06, "loss": 0.614016342163086, "step": 54765 }, { "epoch": 0.4735799949849115, "grad_norm": 1.7327012034069627, "learning_rate": 5.207504318247198e-06, "loss": 0.24102859497070311, "step": 54770 }, { "epoch": 0.4736232285064548, "grad_norm": 27.701921939613293, "learning_rate": 5.20736635297426e-06, "loss": 0.505523681640625, "step": 54775 }, { "epoch": 0.473666462027998, "grad_norm": 12.34106449320718, "learning_rate": 5.207228377521143e-06, "loss": 0.2740386962890625, "step": 54780 }, { "epoch": 0.4737096955495413, "grad_norm": 6.856837441185299, "learning_rate": 5.207090391888483e-06, "loss": 0.0661041259765625, "step": 54785 }, { "epoch": 0.47375292907108457, "grad_norm": 28.454345892230936, "learning_rate": 5.206952396076917e-06, "loss": 0.349267578125, "step": 54790 }, { "epoch": 0.4737961625926278, "grad_norm": 4.455927554256163, "learning_rate": 5.206814390087082e-06, "loss": 0.121710205078125, "step": 54795 }, { "epoch": 0.4738393961141711, "grad_norm": 1.6349929249973016, "learning_rate": 5.2066763739196146e-06, "loss": 0.19115676879882812, "step": 54800 }, { "epoch": 0.47388262963571437, "grad_norm": 15.478833803160784, "learning_rate": 5.206538347575149e-06, "loss": 0.1403289794921875, "step": 54805 }, { "epoch": 0.4739258631572576, "grad_norm": 0.27849206728234355, "learning_rate": 5.206400311054325e-06, "loss": 0.10364723205566406, "step": 54810 }, { "epoch": 0.4739690966788009, "grad_norm": 2.1041578056824077, "learning_rate": 5.206262264357777e-06, "loss": 0.30409698486328124, "step": 54815 }, { "epoch": 0.4740123302003441, "grad_norm": 1.8707500795087615, "learning_rate": 5.206124207486143e-06, "loss": 0.04868621826171875, "step": 54820 }, { "epoch": 0.4740555637218874, "grad_norm": 1.9467301963411558, "learning_rate": 5.205986140440059e-06, "loss": 0.2128469467163086, "step": 54825 }, { "epoch": 0.4740987972434307, "grad_norm": 0.7456480430284391, "learning_rate": 5.205848063220162e-06, "loss": 0.33518218994140625, "step": 54830 }, { "epoch": 0.4741420307649739, "grad_norm": 2.005102745107461, "learning_rate": 5.205709975827088e-06, "loss": 0.43274917602539065, "step": 54835 }, { "epoch": 0.4741852642865172, "grad_norm": 11.608044532779834, "learning_rate": 5.205571878261475e-06, "loss": 0.4431755065917969, "step": 54840 }, { "epoch": 0.4742284978080605, "grad_norm": 2.8608152478810087, "learning_rate": 5.20543377052396e-06, "loss": 0.10206222534179688, "step": 54845 }, { "epoch": 0.4742717313296037, "grad_norm": 25.403490150568125, "learning_rate": 5.205295652615178e-06, "loss": 0.17978363037109374, "step": 54850 }, { "epoch": 0.474314964851147, "grad_norm": 4.774038276863444, "learning_rate": 5.205157524535767e-06, "loss": 0.02127685546875, "step": 54855 }, { "epoch": 0.4743581983726903, "grad_norm": 2.178108811333432, "learning_rate": 5.205019386286366e-06, "loss": 0.107257080078125, "step": 54860 }, { "epoch": 0.4744014318942335, "grad_norm": 3.045062937392494, "learning_rate": 5.2048812378676094e-06, "loss": 0.0874856948852539, "step": 54865 }, { "epoch": 0.4744446654157768, "grad_norm": 1.083839829769938, "learning_rate": 5.204743079280135e-06, "loss": 0.5148567199707031, "step": 54870 }, { "epoch": 0.47448789893732, "grad_norm": 20.28849107592872, "learning_rate": 5.204604910524581e-06, "loss": 0.09999885559082031, "step": 54875 }, { "epoch": 0.4745311324588633, "grad_norm": 2.2557176788636624, "learning_rate": 5.204466731601584e-06, "loss": 0.060699462890625, "step": 54880 }, { "epoch": 0.4745743659804066, "grad_norm": 5.603685505100074, "learning_rate": 5.204328542511781e-06, "loss": 0.12602386474609376, "step": 54885 }, { "epoch": 0.4746175995019498, "grad_norm": 10.068594087138107, "learning_rate": 5.20419034325581e-06, "loss": 0.159576416015625, "step": 54890 }, { "epoch": 0.4746608330234931, "grad_norm": 3.3374375800954827, "learning_rate": 5.204052133834308e-06, "loss": 0.0704864501953125, "step": 54895 }, { "epoch": 0.4747040665450364, "grad_norm": 9.129153382831806, "learning_rate": 5.203913914247912e-06, "loss": 0.654400634765625, "step": 54900 }, { "epoch": 0.4747473000665796, "grad_norm": 3.801172329590553, "learning_rate": 5.2037756844972595e-06, "loss": 0.041933441162109376, "step": 54905 }, { "epoch": 0.4747905335881229, "grad_norm": 1.0098191203100797, "learning_rate": 5.203637444582988e-06, "loss": 0.2758209228515625, "step": 54910 }, { "epoch": 0.4748337671096661, "grad_norm": 1.1487231149023047, "learning_rate": 5.203499194505736e-06, "loss": 0.2357421875, "step": 54915 }, { "epoch": 0.4748770006312094, "grad_norm": 0.009177256994800995, "learning_rate": 5.20336093426614e-06, "loss": 0.12438697814941406, "step": 54920 }, { "epoch": 0.4749202341527527, "grad_norm": 8.553033588653213, "learning_rate": 5.203222663864838e-06, "loss": 0.27060394287109374, "step": 54925 }, { "epoch": 0.4749634676742959, "grad_norm": 5.781402943868396, "learning_rate": 5.203084383302468e-06, "loss": 0.13507537841796874, "step": 54930 }, { "epoch": 0.4750067011958392, "grad_norm": 2.322226983718601, "learning_rate": 5.202946092579667e-06, "loss": 0.059637451171875, "step": 54935 }, { "epoch": 0.4750499347173825, "grad_norm": 9.34575358601457, "learning_rate": 5.202807791697075e-06, "loss": 0.08948974609375, "step": 54940 }, { "epoch": 0.4750931682389257, "grad_norm": 9.858904550579547, "learning_rate": 5.202669480655325e-06, "loss": 0.05271148681640625, "step": 54945 }, { "epoch": 0.475136401760469, "grad_norm": 3.6327136557029984, "learning_rate": 5.202531159455061e-06, "loss": 0.15299072265625, "step": 54950 }, { "epoch": 0.47517963528201224, "grad_norm": 0.5191602354292225, "learning_rate": 5.202392828096917e-06, "loss": 0.3298637390136719, "step": 54955 }, { "epoch": 0.4752228688035555, "grad_norm": 4.0428658180661845, "learning_rate": 5.202254486581531e-06, "loss": 0.1686187744140625, "step": 54960 }, { "epoch": 0.4752661023250988, "grad_norm": 42.38650074676914, "learning_rate": 5.202116134909542e-06, "loss": 0.26139144897460936, "step": 54965 }, { "epoch": 0.47530933584664203, "grad_norm": 4.1733374395986695, "learning_rate": 5.201977773081589e-06, "loss": 0.26473846435546877, "step": 54970 }, { "epoch": 0.4753525693681853, "grad_norm": 4.398438546170717, "learning_rate": 5.201839401098308e-06, "loss": 0.0907470703125, "step": 54975 }, { "epoch": 0.4753958028897286, "grad_norm": 0.7959792812641038, "learning_rate": 5.201701018960339e-06, "loss": 0.6866607666015625, "step": 54980 }, { "epoch": 0.47543903641127183, "grad_norm": 0.42017127574168806, "learning_rate": 5.201562626668319e-06, "loss": 0.08617935180664063, "step": 54985 }, { "epoch": 0.4754822699328151, "grad_norm": 19.19685378936307, "learning_rate": 5.201424224222887e-06, "loss": 0.111456298828125, "step": 54990 }, { "epoch": 0.47552550345435834, "grad_norm": 19.840322922722017, "learning_rate": 5.201285811624681e-06, "loss": 0.147186279296875, "step": 54995 }, { "epoch": 0.47556873697590163, "grad_norm": 8.382096914780965, "learning_rate": 5.201147388874339e-06, "loss": 0.22015838623046874, "step": 55000 }, { "epoch": 0.4756119704974449, "grad_norm": 51.53332311539002, "learning_rate": 5.201008955972501e-06, "loss": 0.5092575073242187, "step": 55005 }, { "epoch": 0.47565520401898814, "grad_norm": 0.7924880474989994, "learning_rate": 5.200870512919803e-06, "loss": 0.28613967895507814, "step": 55010 }, { "epoch": 0.4756984375405314, "grad_norm": 2.596188232640346, "learning_rate": 5.200732059716885e-06, "loss": 0.1477203369140625, "step": 55015 }, { "epoch": 0.4757416710620747, "grad_norm": 0.6849248867124644, "learning_rate": 5.200593596364384e-06, "loss": 0.05752105712890625, "step": 55020 }, { "epoch": 0.47578490458361794, "grad_norm": 1.4295940693726075, "learning_rate": 5.200455122862941e-06, "loss": 0.059454345703125, "step": 55025 }, { "epoch": 0.4758281381051612, "grad_norm": 6.922204958033318, "learning_rate": 5.200316639213194e-06, "loss": 0.18430938720703124, "step": 55030 }, { "epoch": 0.4758713716267045, "grad_norm": 1.6465113860849632, "learning_rate": 5.20017814541578e-06, "loss": 0.13086700439453125, "step": 55035 }, { "epoch": 0.47591460514824774, "grad_norm": 44.27794534917041, "learning_rate": 5.200039641471339e-06, "loss": 0.14779720306396485, "step": 55040 }, { "epoch": 0.475957838669791, "grad_norm": 19.008130906566734, "learning_rate": 5.1999011273805096e-06, "loss": 0.2566619873046875, "step": 55045 }, { "epoch": 0.47600107219133425, "grad_norm": 5.58458679719837, "learning_rate": 5.199762603143931e-06, "loss": 0.14023361206054688, "step": 55050 }, { "epoch": 0.47604430571287754, "grad_norm": 19.90350522000485, "learning_rate": 5.199624068762241e-06, "loss": 0.230859375, "step": 55055 }, { "epoch": 0.4760875392344208, "grad_norm": 13.848305538102787, "learning_rate": 5.1994855242360795e-06, "loss": 0.11077919006347656, "step": 55060 }, { "epoch": 0.47613077275596405, "grad_norm": 7.22753292703026, "learning_rate": 5.199346969566086e-06, "loss": 0.0453521728515625, "step": 55065 }, { "epoch": 0.47617400627750733, "grad_norm": 17.534955806038273, "learning_rate": 5.199208404752897e-06, "loss": 0.3528106689453125, "step": 55070 }, { "epoch": 0.4762172397990506, "grad_norm": 21.520474643910315, "learning_rate": 5.199069829797153e-06, "loss": 0.4915863037109375, "step": 55075 }, { "epoch": 0.47626047332059385, "grad_norm": 6.720041949215632, "learning_rate": 5.198931244699496e-06, "loss": 0.42994537353515627, "step": 55080 }, { "epoch": 0.47630370684213713, "grad_norm": 5.999779456476179, "learning_rate": 5.198792649460559e-06, "loss": 0.25429229736328124, "step": 55085 }, { "epoch": 0.47634694036368036, "grad_norm": 9.379142659809753, "learning_rate": 5.198654044080987e-06, "loss": 0.0461761474609375, "step": 55090 }, { "epoch": 0.47639017388522364, "grad_norm": 1.204177287000002, "learning_rate": 5.198515428561416e-06, "loss": 0.1824920654296875, "step": 55095 }, { "epoch": 0.47643340740676693, "grad_norm": 7.055517528960598, "learning_rate": 5.198376802902485e-06, "loss": 0.22248249053955077, "step": 55100 }, { "epoch": 0.47647664092831016, "grad_norm": 29.58063213838333, "learning_rate": 5.198238167104836e-06, "loss": 0.2294586181640625, "step": 55105 }, { "epoch": 0.47651987444985344, "grad_norm": 22.527015992450032, "learning_rate": 5.1980995211691064e-06, "loss": 0.208209228515625, "step": 55110 }, { "epoch": 0.4765631079713967, "grad_norm": 3.5865391452522686, "learning_rate": 5.197960865095936e-06, "loss": 0.100311279296875, "step": 55115 }, { "epoch": 0.47660634149293996, "grad_norm": 9.835313571253021, "learning_rate": 5.197822198885965e-06, "loss": 0.17143030166625978, "step": 55120 }, { "epoch": 0.47664957501448324, "grad_norm": 0.09077894702837339, "learning_rate": 5.1976835225398306e-06, "loss": 0.1510852813720703, "step": 55125 }, { "epoch": 0.47669280853602647, "grad_norm": 0.20529960301558708, "learning_rate": 5.197544836058175e-06, "loss": 0.04691619873046875, "step": 55130 }, { "epoch": 0.47673604205756975, "grad_norm": 6.907199008613505, "learning_rate": 5.197406139441637e-06, "loss": 0.0394500732421875, "step": 55135 }, { "epoch": 0.47677927557911304, "grad_norm": 40.99530390664415, "learning_rate": 5.197267432690856e-06, "loss": 0.2925628662109375, "step": 55140 }, { "epoch": 0.47682250910065627, "grad_norm": 6.8383417430160165, "learning_rate": 5.197128715806471e-06, "loss": 0.156781005859375, "step": 55145 }, { "epoch": 0.47686574262219955, "grad_norm": 0.766598520702361, "learning_rate": 5.196989988789123e-06, "loss": 0.21975479125976563, "step": 55150 }, { "epoch": 0.47690897614374284, "grad_norm": 17.17344693898155, "learning_rate": 5.196851251639451e-06, "loss": 0.09734039306640625, "step": 55155 }, { "epoch": 0.47695220966528606, "grad_norm": 47.44029047444439, "learning_rate": 5.196712504358095e-06, "loss": 0.40573463439941404, "step": 55160 }, { "epoch": 0.47699544318682935, "grad_norm": 2.3320705622818454, "learning_rate": 5.196573746945696e-06, "loss": 0.7500930786132812, "step": 55165 }, { "epoch": 0.4770386767083726, "grad_norm": 5.346760240823603, "learning_rate": 5.196434979402892e-06, "loss": 0.1351348876953125, "step": 55170 }, { "epoch": 0.47708191022991586, "grad_norm": 15.94890220322219, "learning_rate": 5.196296201730325e-06, "loss": 0.11497650146484376, "step": 55175 }, { "epoch": 0.47712514375145915, "grad_norm": 2.2622756387525906, "learning_rate": 5.196157413928633e-06, "loss": 0.18864212036132813, "step": 55180 }, { "epoch": 0.4771683772730024, "grad_norm": 2.5820506544266832, "learning_rate": 5.196018615998457e-06, "loss": 0.2339080810546875, "step": 55185 }, { "epoch": 0.47721161079454566, "grad_norm": 1.2710998720189561, "learning_rate": 5.1958798079404385e-06, "loss": 0.1859039306640625, "step": 55190 }, { "epoch": 0.47725484431608894, "grad_norm": 2.8235733536744694, "learning_rate": 5.195740989755216e-06, "loss": 0.15524368286132811, "step": 55195 }, { "epoch": 0.4772980778376322, "grad_norm": 8.719504635478062, "learning_rate": 5.195602161443429e-06, "loss": 0.12230453491210938, "step": 55200 }, { "epoch": 0.47734131135917546, "grad_norm": 4.974453703180536, "learning_rate": 5.195463323005719e-06, "loss": 0.4694374084472656, "step": 55205 }, { "epoch": 0.4773845448807187, "grad_norm": 37.658777366167776, "learning_rate": 5.195324474442727e-06, "loss": 0.2092926025390625, "step": 55210 }, { "epoch": 0.47742777840226197, "grad_norm": 3.710419796789032, "learning_rate": 5.195185615755092e-06, "loss": 0.075921630859375, "step": 55215 }, { "epoch": 0.47747101192380526, "grad_norm": 20.263411656232506, "learning_rate": 5.195046746943454e-06, "loss": 0.122796630859375, "step": 55220 }, { "epoch": 0.4775142454453485, "grad_norm": 21.59442638668985, "learning_rate": 5.194907868008455e-06, "loss": 0.2107940673828125, "step": 55225 }, { "epoch": 0.47755747896689177, "grad_norm": 5.1428410442199795, "learning_rate": 5.194768978950736e-06, "loss": 0.04544219970703125, "step": 55230 }, { "epoch": 0.47760071248843505, "grad_norm": 5.881168796207397, "learning_rate": 5.194630079770935e-06, "loss": 0.125238037109375, "step": 55235 }, { "epoch": 0.4776439460099783, "grad_norm": 9.238198644619539, "learning_rate": 5.194491170469695e-06, "loss": 0.15278778076171876, "step": 55240 }, { "epoch": 0.47768717953152157, "grad_norm": 0.24522666748280844, "learning_rate": 5.194352251047655e-06, "loss": 0.15642242431640624, "step": 55245 }, { "epoch": 0.47773041305306485, "grad_norm": 30.239839345947285, "learning_rate": 5.194213321505457e-06, "loss": 0.31133270263671875, "step": 55250 }, { "epoch": 0.4777736465746081, "grad_norm": 0.3058056678436305, "learning_rate": 5.19407438184374e-06, "loss": 0.2655548095703125, "step": 55255 }, { "epoch": 0.47781688009615136, "grad_norm": 0.8644115393722386, "learning_rate": 5.1939354320631465e-06, "loss": 0.39952850341796875, "step": 55260 }, { "epoch": 0.4778601136176946, "grad_norm": 15.8206864729461, "learning_rate": 5.193796472164317e-06, "loss": 0.1880706787109375, "step": 55265 }, { "epoch": 0.4779033471392379, "grad_norm": 2.7860006441252843, "learning_rate": 5.193657502147891e-06, "loss": 0.17047653198242188, "step": 55270 }, { "epoch": 0.47794658066078116, "grad_norm": 19.68453487285245, "learning_rate": 5.193518522014511e-06, "loss": 0.380792236328125, "step": 55275 }, { "epoch": 0.4779898141823244, "grad_norm": 0.022292399759477253, "learning_rate": 5.193379531764818e-06, "loss": 0.08718757629394532, "step": 55280 }, { "epoch": 0.4780330477038677, "grad_norm": 40.75963098003976, "learning_rate": 5.193240531399451e-06, "loss": 0.3442390441894531, "step": 55285 }, { "epoch": 0.47807628122541096, "grad_norm": 15.431597467014532, "learning_rate": 5.1931015209190535e-06, "loss": 0.2558277130126953, "step": 55290 }, { "epoch": 0.4781195147469542, "grad_norm": 8.017702390855712, "learning_rate": 5.192962500324265e-06, "loss": 0.06364784240722657, "step": 55295 }, { "epoch": 0.4781627482684975, "grad_norm": 17.356134661859244, "learning_rate": 5.192823469615728e-06, "loss": 0.15259552001953125, "step": 55300 }, { "epoch": 0.4782059817900407, "grad_norm": 24.54666859063393, "learning_rate": 5.1926844287940825e-06, "loss": 0.1609344482421875, "step": 55305 }, { "epoch": 0.478249215311584, "grad_norm": 2.1045082306699605, "learning_rate": 5.19254537785997e-06, "loss": 0.18837814331054686, "step": 55310 }, { "epoch": 0.47829244883312727, "grad_norm": 3.4843689398294706, "learning_rate": 5.192406316814032e-06, "loss": 0.2666107177734375, "step": 55315 }, { "epoch": 0.4783356823546705, "grad_norm": 13.243076115991045, "learning_rate": 5.19226724565691e-06, "loss": 0.07855377197265626, "step": 55320 }, { "epoch": 0.4783789158762138, "grad_norm": 7.399384748325295, "learning_rate": 5.192128164389245e-06, "loss": 0.02898826599121094, "step": 55325 }, { "epoch": 0.47842214939775707, "grad_norm": 52.064961793931424, "learning_rate": 5.191989073011678e-06, "loss": 0.23653106689453124, "step": 55330 }, { "epoch": 0.4784653829193003, "grad_norm": 22.87585638892171, "learning_rate": 5.191849971524852e-06, "loss": 0.32025890350341796, "step": 55335 }, { "epoch": 0.4785086164408436, "grad_norm": 2.0045604220951017, "learning_rate": 5.191710859929407e-06, "loss": 0.02518768310546875, "step": 55340 }, { "epoch": 0.4785518499623868, "grad_norm": 3.5480460768334585, "learning_rate": 5.191571738225986e-06, "loss": 0.26664199829101565, "step": 55345 }, { "epoch": 0.4785950834839301, "grad_norm": 5.502133156606713, "learning_rate": 5.191432606415229e-06, "loss": 0.09314422607421875, "step": 55350 }, { "epoch": 0.4786383170054734, "grad_norm": 39.40310571573184, "learning_rate": 5.191293464497778e-06, "loss": 0.300225830078125, "step": 55355 }, { "epoch": 0.4786815505270166, "grad_norm": 0.20338176479805314, "learning_rate": 5.191154312474277e-06, "loss": 0.41373519897460936, "step": 55360 }, { "epoch": 0.4787247840485599, "grad_norm": 28.42086290994849, "learning_rate": 5.191015150345365e-06, "loss": 0.2893074035644531, "step": 55365 }, { "epoch": 0.4787680175701032, "grad_norm": 6.680585501291874, "learning_rate": 5.190875978111684e-06, "loss": 0.23260498046875, "step": 55370 }, { "epoch": 0.4788112510916464, "grad_norm": 40.19907461686883, "learning_rate": 5.190736795773878e-06, "loss": 0.28732452392578123, "step": 55375 }, { "epoch": 0.4788544846131897, "grad_norm": 1.428463532175745, "learning_rate": 5.1905976033325864e-06, "loss": 0.15601043701171874, "step": 55380 }, { "epoch": 0.4788977181347329, "grad_norm": 10.156381122447964, "learning_rate": 5.190458400788452e-06, "loss": 0.11127223968505859, "step": 55385 }, { "epoch": 0.4789409516562762, "grad_norm": 0.14683812159544873, "learning_rate": 5.1903191881421175e-06, "loss": 0.04395370483398438, "step": 55390 }, { "epoch": 0.4789841851778195, "grad_norm": 6.744067351477487, "learning_rate": 5.190179965394224e-06, "loss": 0.370135498046875, "step": 55395 }, { "epoch": 0.4790274186993627, "grad_norm": 13.778310497772162, "learning_rate": 5.190040732545416e-06, "loss": 0.1138031005859375, "step": 55400 }, { "epoch": 0.479070652220906, "grad_norm": 27.72214068353694, "learning_rate": 5.189901489596331e-06, "loss": 0.48510894775390623, "step": 55405 }, { "epoch": 0.4791138857424493, "grad_norm": 7.696937849803471, "learning_rate": 5.189762236547615e-06, "loss": 0.0634918212890625, "step": 55410 }, { "epoch": 0.4791571192639925, "grad_norm": 2.4754738023621807, "learning_rate": 5.189622973399909e-06, "loss": 0.2696052551269531, "step": 55415 }, { "epoch": 0.4792003527855358, "grad_norm": 53.87474720477299, "learning_rate": 5.189483700153856e-06, "loss": 0.3600128173828125, "step": 55420 }, { "epoch": 0.4792435863070791, "grad_norm": 0.23510767219408546, "learning_rate": 5.189344416810096e-06, "loss": 0.129620361328125, "step": 55425 }, { "epoch": 0.4792868198286223, "grad_norm": 5.380329730259943, "learning_rate": 5.1892051233692745e-06, "loss": 0.228082275390625, "step": 55430 }, { "epoch": 0.4793300533501656, "grad_norm": 8.138615015379164, "learning_rate": 5.189065819832032e-06, "loss": 0.04323272705078125, "step": 55435 }, { "epoch": 0.4793732868717088, "grad_norm": 8.411741932572738, "learning_rate": 5.18892650619901e-06, "loss": 0.2018157958984375, "step": 55440 }, { "epoch": 0.4794165203932521, "grad_norm": 14.098297779894398, "learning_rate": 5.188787182470854e-06, "loss": 0.3283428192138672, "step": 55445 }, { "epoch": 0.4794597539147954, "grad_norm": 55.21649287890213, "learning_rate": 5.188647848648204e-06, "loss": 0.25366363525390623, "step": 55450 }, { "epoch": 0.4795029874363386, "grad_norm": 0.558524079151177, "learning_rate": 5.188508504731705e-06, "loss": 0.036547088623046876, "step": 55455 }, { "epoch": 0.4795462209578819, "grad_norm": 13.377291653619052, "learning_rate": 5.188369150721997e-06, "loss": 0.0566864013671875, "step": 55460 }, { "epoch": 0.4795894544794252, "grad_norm": 9.27823809315112, "learning_rate": 5.188229786619724e-06, "loss": 0.233056640625, "step": 55465 }, { "epoch": 0.4796326880009684, "grad_norm": 2.4224058409011073, "learning_rate": 5.188090412425528e-06, "loss": 0.09912910461425781, "step": 55470 }, { "epoch": 0.4796759215225117, "grad_norm": 0.9757646895723459, "learning_rate": 5.1879510281400535e-06, "loss": 0.0726470947265625, "step": 55475 }, { "epoch": 0.47971915504405493, "grad_norm": 27.76037836908487, "learning_rate": 5.187811633763942e-06, "loss": 0.29234619140625, "step": 55480 }, { "epoch": 0.4797623885655982, "grad_norm": 1.2559745505110724, "learning_rate": 5.187672229297836e-06, "loss": 0.1718048095703125, "step": 55485 }, { "epoch": 0.4798056220871415, "grad_norm": 17.02664963088991, "learning_rate": 5.187532814742379e-06, "loss": 0.17124862670898439, "step": 55490 }, { "epoch": 0.47984885560868473, "grad_norm": 5.170519840696632, "learning_rate": 5.187393390098215e-06, "loss": 0.16211700439453125, "step": 55495 }, { "epoch": 0.479892089130228, "grad_norm": 2.9774876538308908, "learning_rate": 5.1872539553659845e-06, "loss": 0.174456787109375, "step": 55500 }, { "epoch": 0.4799353226517713, "grad_norm": 8.14504485825263, "learning_rate": 5.187114510546332e-06, "loss": 0.32061519622802737, "step": 55505 }, { "epoch": 0.47997855617331453, "grad_norm": 1.0498920063181942, "learning_rate": 5.1869750556399015e-06, "loss": 0.242230224609375, "step": 55510 }, { "epoch": 0.4800217896948578, "grad_norm": 2.57238144629386, "learning_rate": 5.186835590647335e-06, "loss": 0.04133415222167969, "step": 55515 }, { "epoch": 0.48006502321640104, "grad_norm": 3.3406851298612934, "learning_rate": 5.186696115569276e-06, "loss": 0.08154640197753907, "step": 55520 }, { "epoch": 0.48010825673794433, "grad_norm": 14.077696724426872, "learning_rate": 5.186556630406368e-06, "loss": 0.2603141784667969, "step": 55525 }, { "epoch": 0.4801514902594876, "grad_norm": 22.514578828545986, "learning_rate": 5.186417135159253e-06, "loss": 0.22874832153320312, "step": 55530 }, { "epoch": 0.48019472378103084, "grad_norm": 16.743556662916774, "learning_rate": 5.186277629828577e-06, "loss": 0.4261138916015625, "step": 55535 }, { "epoch": 0.4802379573025741, "grad_norm": 19.22889274554012, "learning_rate": 5.18613811441498e-06, "loss": 0.26673583984375, "step": 55540 }, { "epoch": 0.4802811908241174, "grad_norm": 1.2584619085537962, "learning_rate": 5.185998588919108e-06, "loss": 0.3995147705078125, "step": 55545 }, { "epoch": 0.48032442434566064, "grad_norm": 7.2962290729078125, "learning_rate": 5.185859053341604e-06, "loss": 0.05885467529296875, "step": 55550 }, { "epoch": 0.4803676578672039, "grad_norm": 39.36819186864807, "learning_rate": 5.185719507683112e-06, "loss": 0.2759735107421875, "step": 55555 }, { "epoch": 0.48041089138874715, "grad_norm": 18.582744959183007, "learning_rate": 5.185579951944274e-06, "loss": 0.07141876220703125, "step": 55560 }, { "epoch": 0.48045412491029044, "grad_norm": 16.761380267311395, "learning_rate": 5.185440386125734e-06, "loss": 0.26339263916015626, "step": 55565 }, { "epoch": 0.4804973584318337, "grad_norm": 31.696534243635952, "learning_rate": 5.185300810228135e-06, "loss": 0.095025634765625, "step": 55570 }, { "epoch": 0.48054059195337695, "grad_norm": 5.779544537274712, "learning_rate": 5.185161224252124e-06, "loss": 0.09030990600585938, "step": 55575 }, { "epoch": 0.48058382547492023, "grad_norm": 29.10313645431587, "learning_rate": 5.185021628198341e-06, "loss": 0.298663330078125, "step": 55580 }, { "epoch": 0.4806270589964635, "grad_norm": 42.8856498343719, "learning_rate": 5.184882022067432e-06, "loss": 0.4150146484375, "step": 55585 }, { "epoch": 0.48067029251800675, "grad_norm": 6.66788905407223, "learning_rate": 5.184742405860039e-06, "loss": 0.256036376953125, "step": 55590 }, { "epoch": 0.48071352603955003, "grad_norm": 18.907633698145553, "learning_rate": 5.184602779576809e-06, "loss": 0.49672393798828124, "step": 55595 }, { "epoch": 0.4807567595610933, "grad_norm": 1.2401191734843282, "learning_rate": 5.184463143218383e-06, "loss": 0.08011245727539062, "step": 55600 }, { "epoch": 0.48079999308263655, "grad_norm": 2.5948638043008856, "learning_rate": 5.1843234967854055e-06, "loss": 0.0853515625, "step": 55605 }, { "epoch": 0.48084322660417983, "grad_norm": 11.442647066968837, "learning_rate": 5.1841838402785215e-06, "loss": 0.2602783203125, "step": 55610 }, { "epoch": 0.48088646012572306, "grad_norm": 2.0237973729249457, "learning_rate": 5.184044173698375e-06, "loss": 0.4216331481933594, "step": 55615 }, { "epoch": 0.48092969364726634, "grad_norm": 4.927596975651443, "learning_rate": 5.1839044970456095e-06, "loss": 0.060232925415039065, "step": 55620 }, { "epoch": 0.4809729271688096, "grad_norm": 0.26271757082136094, "learning_rate": 5.183764810320869e-06, "loss": 0.439715576171875, "step": 55625 }, { "epoch": 0.48101616069035286, "grad_norm": 0.8352404703002763, "learning_rate": 5.183625113524799e-06, "loss": 0.23300094604492189, "step": 55630 }, { "epoch": 0.48105939421189614, "grad_norm": 5.520327312114517, "learning_rate": 5.183485406658042e-06, "loss": 0.09412565231323242, "step": 55635 }, { "epoch": 0.4811026277334394, "grad_norm": 6.502057612553929, "learning_rate": 5.1833456897212435e-06, "loss": 0.08272285461425781, "step": 55640 }, { "epoch": 0.48114586125498265, "grad_norm": 2.976117706333362, "learning_rate": 5.183205962715048e-06, "loss": 0.423077392578125, "step": 55645 }, { "epoch": 0.48118909477652594, "grad_norm": 15.695785951469691, "learning_rate": 5.1830662256401e-06, "loss": 0.15608367919921876, "step": 55650 }, { "epoch": 0.48123232829806917, "grad_norm": 28.107131258264598, "learning_rate": 5.182926478497043e-06, "loss": 0.2850677490234375, "step": 55655 }, { "epoch": 0.48127556181961245, "grad_norm": 7.68785017121419, "learning_rate": 5.182786721286522e-06, "loss": 0.5809783935546875, "step": 55660 }, { "epoch": 0.48131879534115574, "grad_norm": 17.410197387621047, "learning_rate": 5.18264695400918e-06, "loss": 0.08368682861328125, "step": 55665 }, { "epoch": 0.48136202886269897, "grad_norm": 68.6887868713102, "learning_rate": 5.182507176665664e-06, "loss": 0.17236328125, "step": 55670 }, { "epoch": 0.48140526238424225, "grad_norm": 3.8807068881919986, "learning_rate": 5.1823673892566195e-06, "loss": 0.04619159698486328, "step": 55675 }, { "epoch": 0.48144849590578553, "grad_norm": 7.011319092434789, "learning_rate": 5.182227591782687e-06, "loss": 0.2328369140625, "step": 55680 }, { "epoch": 0.48149172942732876, "grad_norm": 2.5281547019516197, "learning_rate": 5.182087784244516e-06, "loss": 0.107391357421875, "step": 55685 }, { "epoch": 0.48153496294887205, "grad_norm": 11.460327175984474, "learning_rate": 5.181947966642747e-06, "loss": 0.09232177734375, "step": 55690 }, { "epoch": 0.4815781964704153, "grad_norm": 4.709244230949936, "learning_rate": 5.181808138978027e-06, "loss": 0.12809295654296876, "step": 55695 }, { "epoch": 0.48162142999195856, "grad_norm": 3.0195591190743056, "learning_rate": 5.181668301251001e-06, "loss": 0.07872333526611328, "step": 55700 }, { "epoch": 0.48166466351350185, "grad_norm": 2.0443098498215457, "learning_rate": 5.181528453462315e-06, "loss": 0.0811269760131836, "step": 55705 }, { "epoch": 0.4817078970350451, "grad_norm": 2.7338118508145297, "learning_rate": 5.181388595612611e-06, "loss": 0.157073974609375, "step": 55710 }, { "epoch": 0.48175113055658836, "grad_norm": 8.015023027121362, "learning_rate": 5.181248727702535e-06, "loss": 0.232281494140625, "step": 55715 }, { "epoch": 0.48179436407813164, "grad_norm": 6.860040923901108, "learning_rate": 5.181108849732734e-06, "loss": 0.07379150390625, "step": 55720 }, { "epoch": 0.48183759759967487, "grad_norm": 1.2117289399328106, "learning_rate": 5.18096896170385e-06, "loss": 0.035430908203125, "step": 55725 }, { "epoch": 0.48188083112121816, "grad_norm": 1.3132518073799058, "learning_rate": 5.180829063616532e-06, "loss": 0.08892822265625, "step": 55730 }, { "epoch": 0.4819240646427614, "grad_norm": 20.06506139714173, "learning_rate": 5.180689155471421e-06, "loss": 0.0421783447265625, "step": 55735 }, { "epoch": 0.48196729816430467, "grad_norm": 20.32557135066423, "learning_rate": 5.180549237269166e-06, "loss": 0.27823333740234374, "step": 55740 }, { "epoch": 0.48201053168584795, "grad_norm": 41.03339815546922, "learning_rate": 5.18040930901041e-06, "loss": 0.635455322265625, "step": 55745 }, { "epoch": 0.4820537652073912, "grad_norm": 9.361833343306444, "learning_rate": 5.180269370695799e-06, "loss": 0.29647216796875, "step": 55750 }, { "epoch": 0.48209699872893447, "grad_norm": 4.647969941380001, "learning_rate": 5.180129422325978e-06, "loss": 0.03816642761230469, "step": 55755 }, { "epoch": 0.48214023225047775, "grad_norm": 1.231808137758927, "learning_rate": 5.179989463901593e-06, "loss": 0.07912712097167969, "step": 55760 }, { "epoch": 0.482183465772021, "grad_norm": 4.338472469803928, "learning_rate": 5.179849495423289e-06, "loss": 0.07733001708984374, "step": 55765 }, { "epoch": 0.48222669929356426, "grad_norm": 48.29307482114393, "learning_rate": 5.179709516891712e-06, "loss": 0.29933319091796873, "step": 55770 }, { "epoch": 0.48226993281510755, "grad_norm": 8.521473199418397, "learning_rate": 5.179569528307507e-06, "loss": 0.1831298828125, "step": 55775 }, { "epoch": 0.4823131663366508, "grad_norm": 9.239278562355599, "learning_rate": 5.17942952967132e-06, "loss": 0.2890350341796875, "step": 55780 }, { "epoch": 0.48235639985819406, "grad_norm": 29.907283914718874, "learning_rate": 5.179289520983796e-06, "loss": 0.1961833953857422, "step": 55785 }, { "epoch": 0.4823996333797373, "grad_norm": 30.215239934213514, "learning_rate": 5.179149502245582e-06, "loss": 0.4522195816040039, "step": 55790 }, { "epoch": 0.4824428669012806, "grad_norm": 3.5914309008779033, "learning_rate": 5.179009473457324e-06, "loss": 0.25756988525390623, "step": 55795 }, { "epoch": 0.48248610042282386, "grad_norm": 24.017766654785916, "learning_rate": 5.178869434619666e-06, "loss": 0.2488372802734375, "step": 55800 }, { "epoch": 0.4825293339443671, "grad_norm": 0.18959612808267404, "learning_rate": 5.178729385733254e-06, "loss": 0.07229461669921874, "step": 55805 }, { "epoch": 0.4825725674659104, "grad_norm": 0.3446637416519122, "learning_rate": 5.1785893267987354e-06, "loss": 0.0538848876953125, "step": 55810 }, { "epoch": 0.48261580098745366, "grad_norm": 18.996026411510364, "learning_rate": 5.178449257816755e-06, "loss": 0.18275680541992187, "step": 55815 }, { "epoch": 0.4826590345089969, "grad_norm": 0.7548560260311499, "learning_rate": 5.178309178787959e-06, "loss": 0.036663150787353514, "step": 55820 }, { "epoch": 0.48270226803054017, "grad_norm": 21.497311828560505, "learning_rate": 5.178169089712994e-06, "loss": 0.4245208740234375, "step": 55825 }, { "epoch": 0.4827455015520834, "grad_norm": 5.56594595764626, "learning_rate": 5.178028990592505e-06, "loss": 0.08128013610839843, "step": 55830 }, { "epoch": 0.4827887350736267, "grad_norm": 5.615571718363559, "learning_rate": 5.17788888142714e-06, "loss": 0.12260894775390625, "step": 55835 }, { "epoch": 0.48283196859516997, "grad_norm": 4.87044367771341, "learning_rate": 5.177748762217543e-06, "loss": 0.09200820922851563, "step": 55840 }, { "epoch": 0.4828752021167132, "grad_norm": 1.9001998896327645, "learning_rate": 5.177608632964361e-06, "loss": 0.13180007934570312, "step": 55845 }, { "epoch": 0.4829184356382565, "grad_norm": 9.68318129414547, "learning_rate": 5.17746849366824e-06, "loss": 0.1164459228515625, "step": 55850 }, { "epoch": 0.48296166915979977, "grad_norm": 4.118804617199151, "learning_rate": 5.177328344329828e-06, "loss": 0.2529605865478516, "step": 55855 }, { "epoch": 0.483004902681343, "grad_norm": 0.6209653268236769, "learning_rate": 5.177188184949768e-06, "loss": 0.29884185791015627, "step": 55860 }, { "epoch": 0.4830481362028863, "grad_norm": 2.7872959817712344, "learning_rate": 5.17704801552871e-06, "loss": 0.1597137451171875, "step": 55865 }, { "epoch": 0.4830913697244295, "grad_norm": 1.051654825585242, "learning_rate": 5.1769078360673e-06, "loss": 0.18420562744140626, "step": 55870 }, { "epoch": 0.4831346032459728, "grad_norm": 1.5652617575921277, "learning_rate": 5.176767646566181e-06, "loss": 0.09072723388671874, "step": 55875 }, { "epoch": 0.4831778367675161, "grad_norm": 1.2127478251322452, "learning_rate": 5.176627447026004e-06, "loss": 0.4214202880859375, "step": 55880 }, { "epoch": 0.4832210702890593, "grad_norm": 3.174866884620236, "learning_rate": 5.176487237447413e-06, "loss": 0.08237838745117188, "step": 55885 }, { "epoch": 0.4832643038106026, "grad_norm": 0.641198768236389, "learning_rate": 5.176347017831054e-06, "loss": 0.3151885986328125, "step": 55890 }, { "epoch": 0.4833075373321459, "grad_norm": 1.1635838119105528, "learning_rate": 5.1762067881775755e-06, "loss": 0.087103271484375, "step": 55895 }, { "epoch": 0.4833507708536891, "grad_norm": 1.6696164599261663, "learning_rate": 5.176066548487624e-06, "loss": 0.04519805908203125, "step": 55900 }, { "epoch": 0.4833940043752324, "grad_norm": 0.23508387829609664, "learning_rate": 5.175926298761845e-06, "loss": 0.06605072021484375, "step": 55905 }, { "epoch": 0.4834372378967756, "grad_norm": 3.0034502844109814, "learning_rate": 5.175786039000887e-06, "loss": 0.16195144653320312, "step": 55910 }, { "epoch": 0.4834804714183189, "grad_norm": 5.093454649094102, "learning_rate": 5.1756457692053965e-06, "loss": 0.1379241943359375, "step": 55915 }, { "epoch": 0.4835237049398622, "grad_norm": 29.868262890309737, "learning_rate": 5.175505489376019e-06, "loss": 0.23451995849609375, "step": 55920 }, { "epoch": 0.4835669384614054, "grad_norm": 25.24891193965005, "learning_rate": 5.175365199513403e-06, "loss": 0.179852294921875, "step": 55925 }, { "epoch": 0.4836101719829487, "grad_norm": 54.28848169192977, "learning_rate": 5.175224899618194e-06, "loss": 0.38994293212890624, "step": 55930 }, { "epoch": 0.483653405504492, "grad_norm": 7.302857592669793, "learning_rate": 5.175084589691041e-06, "loss": 0.37467384338378906, "step": 55935 }, { "epoch": 0.4836966390260352, "grad_norm": 0.5673777167185543, "learning_rate": 5.17494426973259e-06, "loss": 0.2705474853515625, "step": 55940 }, { "epoch": 0.4837398725475785, "grad_norm": 6.845244987227553, "learning_rate": 5.174803939743488e-06, "loss": 0.05357513427734375, "step": 55945 }, { "epoch": 0.4837831060691217, "grad_norm": 5.5533391281235165, "learning_rate": 5.174663599724382e-06, "loss": 0.1535125732421875, "step": 55950 }, { "epoch": 0.483826339590665, "grad_norm": 25.73318936087367, "learning_rate": 5.1745232496759206e-06, "loss": 0.194439697265625, "step": 55955 }, { "epoch": 0.4838695731122083, "grad_norm": 15.889203685372028, "learning_rate": 5.174382889598749e-06, "loss": 0.155865478515625, "step": 55960 }, { "epoch": 0.4839128066337515, "grad_norm": 6.11912747133954, "learning_rate": 5.174242519493516e-06, "loss": 0.303662109375, "step": 55965 }, { "epoch": 0.4839560401552948, "grad_norm": 16.99200921018908, "learning_rate": 5.174102139360869e-06, "loss": 0.336334228515625, "step": 55970 }, { "epoch": 0.4839992736768381, "grad_norm": 2.3849082614239308, "learning_rate": 5.173961749201455e-06, "loss": 0.09530181884765625, "step": 55975 }, { "epoch": 0.4840425071983813, "grad_norm": 1.3093266007284163, "learning_rate": 5.173821349015921e-06, "loss": 0.10666351318359375, "step": 55980 }, { "epoch": 0.4840857407199246, "grad_norm": 8.41981055685932, "learning_rate": 5.173680938804915e-06, "loss": 0.393316650390625, "step": 55985 }, { "epoch": 0.4841289742414679, "grad_norm": 35.40864953190369, "learning_rate": 5.1735405185690845e-06, "loss": 0.27686309814453125, "step": 55990 }, { "epoch": 0.4841722077630111, "grad_norm": 0.06351007165768964, "learning_rate": 5.173400088309078e-06, "loss": 0.046081924438476564, "step": 55995 }, { "epoch": 0.4842154412845544, "grad_norm": 28.336132099388013, "learning_rate": 5.173259648025542e-06, "loss": 0.23893890380859376, "step": 56000 }, { "epoch": 0.48425867480609763, "grad_norm": 0.8057791192956555, "learning_rate": 5.173119197719124e-06, "loss": 0.36682281494140623, "step": 56005 }, { "epoch": 0.4843019083276409, "grad_norm": 22.51333105659888, "learning_rate": 5.1729787373904726e-06, "loss": 0.16152896881103515, "step": 56010 }, { "epoch": 0.4843451418491842, "grad_norm": 33.16696364207748, "learning_rate": 5.172838267040236e-06, "loss": 0.54246826171875, "step": 56015 }, { "epoch": 0.48438837537072743, "grad_norm": 0.052764279018517754, "learning_rate": 5.17269778666906e-06, "loss": 0.1351165771484375, "step": 56020 }, { "epoch": 0.4844316088922707, "grad_norm": 20.35585433366604, "learning_rate": 5.172557296277594e-06, "loss": 0.12852306365966798, "step": 56025 }, { "epoch": 0.484474842413814, "grad_norm": 4.364605327472669, "learning_rate": 5.172416795866487e-06, "loss": 0.2838775634765625, "step": 56030 }, { "epoch": 0.48451807593535723, "grad_norm": 0.16545220607922032, "learning_rate": 5.1722762854363845e-06, "loss": 0.15768051147460938, "step": 56035 }, { "epoch": 0.4845613094569005, "grad_norm": 6.067207562031843, "learning_rate": 5.172135764987937e-06, "loss": 0.09642791748046875, "step": 56040 }, { "epoch": 0.48460454297844374, "grad_norm": 0.38809023007868637, "learning_rate": 5.17199523452179e-06, "loss": 0.18368797302246093, "step": 56045 }, { "epoch": 0.484647776499987, "grad_norm": 3.3150247645174002, "learning_rate": 5.171854694038593e-06, "loss": 0.3822299957275391, "step": 56050 }, { "epoch": 0.4846910100215303, "grad_norm": 15.834953476898782, "learning_rate": 5.171714143538995e-06, "loss": 0.20450897216796876, "step": 56055 }, { "epoch": 0.48473424354307354, "grad_norm": 0.8964807877172061, "learning_rate": 5.1715735830236425e-06, "loss": 0.1898345947265625, "step": 56060 }, { "epoch": 0.4847774770646168, "grad_norm": 35.20107155864394, "learning_rate": 5.171433012493184e-06, "loss": 0.3064861297607422, "step": 56065 }, { "epoch": 0.4848207105861601, "grad_norm": 5.208675241633695, "learning_rate": 5.171292431948269e-06, "loss": 0.0945587158203125, "step": 56070 }, { "epoch": 0.48486394410770334, "grad_norm": 4.781226630078502, "learning_rate": 5.171151841389545e-06, "loss": 0.0481903076171875, "step": 56075 }, { "epoch": 0.4849071776292466, "grad_norm": 14.057190858213133, "learning_rate": 5.17101124081766e-06, "loss": 0.2393157958984375, "step": 56080 }, { "epoch": 0.48495041115078985, "grad_norm": 25.794539095480847, "learning_rate": 5.170870630233264e-06, "loss": 0.20457305908203124, "step": 56085 }, { "epoch": 0.48499364467233314, "grad_norm": 0.25004881050104383, "learning_rate": 5.1707300096370046e-06, "loss": 0.1405120849609375, "step": 56090 }, { "epoch": 0.4850368781938764, "grad_norm": 37.27436720595142, "learning_rate": 5.17058937902953e-06, "loss": 0.19626007080078126, "step": 56095 }, { "epoch": 0.48508011171541965, "grad_norm": 23.483056632584315, "learning_rate": 5.170448738411488e-06, "loss": 0.3340087890625, "step": 56100 }, { "epoch": 0.48512334523696293, "grad_norm": 1.733010114282187, "learning_rate": 5.170308087783529e-06, "loss": 0.02863311767578125, "step": 56105 }, { "epoch": 0.4851665787585062, "grad_norm": 8.649639437622382, "learning_rate": 5.170167427146301e-06, "loss": 0.24090576171875, "step": 56110 }, { "epoch": 0.48520981228004945, "grad_norm": 19.887777990776506, "learning_rate": 5.170026756500452e-06, "loss": 0.30810546875, "step": 56115 }, { "epoch": 0.48525304580159273, "grad_norm": 3.211058065366551, "learning_rate": 5.169886075846632e-06, "loss": 0.49651031494140624, "step": 56120 }, { "epoch": 0.48529627932313596, "grad_norm": 7.034833317690693, "learning_rate": 5.169745385185489e-06, "loss": 0.542205810546875, "step": 56125 }, { "epoch": 0.48533951284467924, "grad_norm": 1.0502812931636418, "learning_rate": 5.169604684517672e-06, "loss": 0.0848388671875, "step": 56130 }, { "epoch": 0.48538274636622253, "grad_norm": 33.28325068963491, "learning_rate": 5.169463973843829e-06, "loss": 0.22337646484375, "step": 56135 }, { "epoch": 0.48542597988776576, "grad_norm": 7.917090159868217, "learning_rate": 5.169323253164612e-06, "loss": 0.10498428344726562, "step": 56140 }, { "epoch": 0.48546921340930904, "grad_norm": 21.05405622602094, "learning_rate": 5.169182522480665e-06, "loss": 0.403387451171875, "step": 56145 }, { "epoch": 0.4855124469308523, "grad_norm": 22.835942073834556, "learning_rate": 5.169041781792641e-06, "loss": 0.064874267578125, "step": 56150 }, { "epoch": 0.48555568045239555, "grad_norm": 31.29772551123734, "learning_rate": 5.168901031101189e-06, "loss": 0.21482696533203124, "step": 56155 }, { "epoch": 0.48559891397393884, "grad_norm": 6.461107604370018, "learning_rate": 5.168760270406957e-06, "loss": 0.08847312927246094, "step": 56160 }, { "epoch": 0.4856421474954821, "grad_norm": 3.9683935320148778, "learning_rate": 5.1686194997105925e-06, "loss": 0.3825286865234375, "step": 56165 }, { "epoch": 0.48568538101702535, "grad_norm": 0.2791406278381878, "learning_rate": 5.168478719012748e-06, "loss": 0.023724365234375, "step": 56170 }, { "epoch": 0.48572861453856864, "grad_norm": 1.1028322837449023, "learning_rate": 5.168337928314071e-06, "loss": 0.04063720703125, "step": 56175 }, { "epoch": 0.48577184806011187, "grad_norm": 6.590461138830963, "learning_rate": 5.168197127615211e-06, "loss": 0.18667755126953126, "step": 56180 }, { "epoch": 0.48581508158165515, "grad_norm": 1.423234395891659, "learning_rate": 5.1680563169168175e-06, "loss": 0.05765838623046875, "step": 56185 }, { "epoch": 0.48585831510319843, "grad_norm": 0.05910053139415682, "learning_rate": 5.16791549621954e-06, "loss": 0.08246536254882812, "step": 56190 }, { "epoch": 0.48590154862474166, "grad_norm": 15.64953662508808, "learning_rate": 5.167774665524028e-06, "loss": 0.3083099365234375, "step": 56195 }, { "epoch": 0.48594478214628495, "grad_norm": 5.139781150005331, "learning_rate": 5.167633824830929e-06, "loss": 0.02957611083984375, "step": 56200 }, { "epoch": 0.48598801566782823, "grad_norm": 34.61743999645468, "learning_rate": 5.1674929741408965e-06, "loss": 0.22674407958984374, "step": 56205 }, { "epoch": 0.48603124918937146, "grad_norm": 2.060817977642594, "learning_rate": 5.167352113454577e-06, "loss": 0.1188568115234375, "step": 56210 }, { "epoch": 0.48607448271091475, "grad_norm": 25.677700154961528, "learning_rate": 5.1672112427726204e-06, "loss": 0.19457550048828126, "step": 56215 }, { "epoch": 0.486117716232458, "grad_norm": 14.507856619745109, "learning_rate": 5.167070362095678e-06, "loss": 0.06508102416992187, "step": 56220 }, { "epoch": 0.48616094975400126, "grad_norm": 11.809374620030654, "learning_rate": 5.1669294714243986e-06, "loss": 0.12397003173828125, "step": 56225 }, { "epoch": 0.48620418327554454, "grad_norm": 1.9451275563494341, "learning_rate": 5.166788570759432e-06, "loss": 0.08303680419921874, "step": 56230 }, { "epoch": 0.4862474167970878, "grad_norm": 0.11711526766796428, "learning_rate": 5.166647660101428e-06, "loss": 0.21847686767578126, "step": 56235 }, { "epoch": 0.48629065031863106, "grad_norm": 10.034434999148566, "learning_rate": 5.166506739451035e-06, "loss": 0.20168991088867189, "step": 56240 }, { "epoch": 0.48633388384017434, "grad_norm": 2.7934188871289938, "learning_rate": 5.166365808808906e-06, "loss": 0.1527679443359375, "step": 56245 }, { "epoch": 0.48637711736171757, "grad_norm": 4.837853052347621, "learning_rate": 5.16622486817569e-06, "loss": 0.1554901123046875, "step": 56250 }, { "epoch": 0.48642035088326085, "grad_norm": 0.5466032534668754, "learning_rate": 5.166083917552034e-06, "loss": 0.18203582763671874, "step": 56255 }, { "epoch": 0.4864635844048041, "grad_norm": 3.2997298705479605, "learning_rate": 5.1659429569385925e-06, "loss": 0.2262958526611328, "step": 56260 }, { "epoch": 0.48650681792634737, "grad_norm": 16.490412755809967, "learning_rate": 5.165801986336012e-06, "loss": 0.0926361083984375, "step": 56265 }, { "epoch": 0.48655005144789065, "grad_norm": 2.892810584454145, "learning_rate": 5.165661005744944e-06, "loss": 0.11423873901367188, "step": 56270 }, { "epoch": 0.4865932849694339, "grad_norm": 6.63826705318064, "learning_rate": 5.16552001516604e-06, "loss": 0.05649013519287109, "step": 56275 }, { "epoch": 0.48663651849097717, "grad_norm": 0.5886523521573849, "learning_rate": 5.1653790145999475e-06, "loss": 0.2062915802001953, "step": 56280 }, { "epoch": 0.48667975201252045, "grad_norm": 18.690115121077646, "learning_rate": 5.16523800404732e-06, "loss": 0.10446701049804688, "step": 56285 }, { "epoch": 0.4867229855340637, "grad_norm": 37.34619691583385, "learning_rate": 5.1650969835088056e-06, "loss": 0.23114013671875, "step": 56290 }, { "epoch": 0.48676621905560696, "grad_norm": 6.050202527710181, "learning_rate": 5.164955952985054e-06, "loss": 0.083160400390625, "step": 56295 }, { "epoch": 0.4868094525771502, "grad_norm": 7.938956773990561, "learning_rate": 5.164814912476719e-06, "loss": 0.2715362548828125, "step": 56300 }, { "epoch": 0.4868526860986935, "grad_norm": 18.0206346367021, "learning_rate": 5.164673861984448e-06, "loss": 0.0977294921875, "step": 56305 }, { "epoch": 0.48689591962023676, "grad_norm": 30.844994531963906, "learning_rate": 5.164532801508892e-06, "loss": 0.43543319702148436, "step": 56310 }, { "epoch": 0.48693915314178, "grad_norm": 4.680057280987548, "learning_rate": 5.1643917310507015e-06, "loss": 0.0950347900390625, "step": 56315 }, { "epoch": 0.4869823866633233, "grad_norm": 22.266425815422238, "learning_rate": 5.164250650610529e-06, "loss": 0.36281585693359375, "step": 56320 }, { "epoch": 0.48702562018486656, "grad_norm": 0.9058660427271337, "learning_rate": 5.164109560189022e-06, "loss": 0.13702545166015626, "step": 56325 }, { "epoch": 0.4870688537064098, "grad_norm": 5.938854215083321, "learning_rate": 5.1639684597868335e-06, "loss": 0.1512969970703125, "step": 56330 }, { "epoch": 0.48711208722795307, "grad_norm": 15.446165477319607, "learning_rate": 5.1638273494046145e-06, "loss": 0.255096435546875, "step": 56335 }, { "epoch": 0.48715532074949636, "grad_norm": 5.773502075096628, "learning_rate": 5.163686229043014e-06, "loss": 0.11641387939453125, "step": 56340 }, { "epoch": 0.4871985542710396, "grad_norm": 3.090438885105016, "learning_rate": 5.1635450987026855e-06, "loss": 0.3839752197265625, "step": 56345 }, { "epoch": 0.48724178779258287, "grad_norm": 10.548906662586882, "learning_rate": 5.1634039583842766e-06, "loss": 0.13975486755371094, "step": 56350 }, { "epoch": 0.4872850213141261, "grad_norm": 15.801514929863526, "learning_rate": 5.1632628080884395e-06, "loss": 0.08871917724609375, "step": 56355 }, { "epoch": 0.4873282548356694, "grad_norm": 11.48905562527221, "learning_rate": 5.163121647815827e-06, "loss": 0.17842025756835939, "step": 56360 }, { "epoch": 0.48737148835721267, "grad_norm": 52.76244361814703, "learning_rate": 5.1629804775670875e-06, "loss": 0.45127105712890625, "step": 56365 }, { "epoch": 0.4874147218787559, "grad_norm": 1.384612425679205, "learning_rate": 5.1628392973428735e-06, "loss": 0.07398681640625, "step": 56370 }, { "epoch": 0.4874579554002992, "grad_norm": 21.104532667636708, "learning_rate": 5.162698107143836e-06, "loss": 0.5539131164550781, "step": 56375 }, { "epoch": 0.48750118892184247, "grad_norm": 22.24240845649266, "learning_rate": 5.162556906970626e-06, "loss": 0.1137237548828125, "step": 56380 }, { "epoch": 0.4875444224433857, "grad_norm": 5.411910085506967, "learning_rate": 5.162415696823894e-06, "loss": 0.14801769256591796, "step": 56385 }, { "epoch": 0.487587655964929, "grad_norm": 4.376286049822477, "learning_rate": 5.1622744767042925e-06, "loss": 0.07687835693359375, "step": 56390 }, { "epoch": 0.4876308894864722, "grad_norm": 9.963417709902458, "learning_rate": 5.1621332466124715e-06, "loss": 0.14764404296875, "step": 56395 }, { "epoch": 0.4876741230080155, "grad_norm": 9.88640294347525, "learning_rate": 5.161992006549083e-06, "loss": 0.5513961791992188, "step": 56400 }, { "epoch": 0.4877173565295588, "grad_norm": 27.98344775459492, "learning_rate": 5.161850756514779e-06, "loss": 0.4012176513671875, "step": 56405 }, { "epoch": 0.487760590051102, "grad_norm": 3.8019521594618704, "learning_rate": 5.16170949651021e-06, "loss": 0.434320068359375, "step": 56410 }, { "epoch": 0.4878038235726453, "grad_norm": 2.32027664031765, "learning_rate": 5.161568226536028e-06, "loss": 0.5915069580078125, "step": 56415 }, { "epoch": 0.4878470570941886, "grad_norm": 4.657682513918844, "learning_rate": 5.1614269465928845e-06, "loss": 0.05504150390625, "step": 56420 }, { "epoch": 0.4878902906157318, "grad_norm": 20.075058531496627, "learning_rate": 5.16128565668143e-06, "loss": 0.3477439880371094, "step": 56425 }, { "epoch": 0.4879335241372751, "grad_norm": 2.368391138739467, "learning_rate": 5.161144356802318e-06, "loss": 0.19188232421875, "step": 56430 }, { "epoch": 0.4879767576588183, "grad_norm": 4.77300672709036, "learning_rate": 5.161003046956199e-06, "loss": 0.0837249755859375, "step": 56435 }, { "epoch": 0.4880199911803616, "grad_norm": 0.3261072941120313, "learning_rate": 5.160861727143725e-06, "loss": 0.10208511352539062, "step": 56440 }, { "epoch": 0.4880632247019049, "grad_norm": 0.20119113810695433, "learning_rate": 5.160720397365547e-06, "loss": 0.2927711486816406, "step": 56445 }, { "epoch": 0.4881064582234481, "grad_norm": 9.089858809317983, "learning_rate": 5.1605790576223176e-06, "loss": 0.20836715698242186, "step": 56450 }, { "epoch": 0.4881496917449914, "grad_norm": 19.42411475055847, "learning_rate": 5.160437707914689e-06, "loss": 0.2022705078125, "step": 56455 }, { "epoch": 0.4881929252665347, "grad_norm": 12.98224230400183, "learning_rate": 5.160296348243312e-06, "loss": 0.12274360656738281, "step": 56460 }, { "epoch": 0.4882361587880779, "grad_norm": 1.657730622477332, "learning_rate": 5.160154978608841e-06, "loss": 0.3503927230834961, "step": 56465 }, { "epoch": 0.4882793923096212, "grad_norm": 0.7230028394266766, "learning_rate": 5.160013599011924e-06, "loss": 0.03316497802734375, "step": 56470 }, { "epoch": 0.4883226258311644, "grad_norm": 3.6190639274950893, "learning_rate": 5.159872209453217e-06, "loss": 0.20361194610595704, "step": 56475 }, { "epoch": 0.4883658593527077, "grad_norm": 7.276024999050233, "learning_rate": 5.159730809933367e-06, "loss": 0.3785888671875, "step": 56480 }, { "epoch": 0.488409092874251, "grad_norm": 17.862821084352635, "learning_rate": 5.1595894004530315e-06, "loss": 0.12563819885253907, "step": 56485 }, { "epoch": 0.4884523263957942, "grad_norm": 31.300385676034587, "learning_rate": 5.159447981012861e-06, "loss": 0.43977012634277346, "step": 56490 }, { "epoch": 0.4884955599173375, "grad_norm": 24.14866826552675, "learning_rate": 5.159306551613506e-06, "loss": 0.30538787841796877, "step": 56495 }, { "epoch": 0.4885387934388808, "grad_norm": 0.576707992175473, "learning_rate": 5.15916511225562e-06, "loss": 0.20006561279296875, "step": 56500 }, { "epoch": 0.488582026960424, "grad_norm": 4.5278060994103555, "learning_rate": 5.1590236629398565e-06, "loss": 0.09695358276367187, "step": 56505 }, { "epoch": 0.4886252604819673, "grad_norm": 2.4425590500385677, "learning_rate": 5.158882203666866e-06, "loss": 0.2793212890625, "step": 56510 }, { "epoch": 0.4886684940035106, "grad_norm": 12.858804263108235, "learning_rate": 5.158740734437301e-06, "loss": 0.28567657470703123, "step": 56515 }, { "epoch": 0.4887117275250538, "grad_norm": 35.33507114174121, "learning_rate": 5.158599255251816e-06, "loss": 0.22517681121826172, "step": 56520 }, { "epoch": 0.4887549610465971, "grad_norm": 2.959425733346562, "learning_rate": 5.1584577661110606e-06, "loss": 0.03376693725585937, "step": 56525 }, { "epoch": 0.48879819456814033, "grad_norm": 6.603189661957555, "learning_rate": 5.15831626701569e-06, "loss": 0.35143585205078126, "step": 56530 }, { "epoch": 0.4888414280896836, "grad_norm": 5.458104953071412, "learning_rate": 5.1581747579663534e-06, "loss": 0.0560943603515625, "step": 56535 }, { "epoch": 0.4888846616112269, "grad_norm": 0.30553929361483667, "learning_rate": 5.158033238963707e-06, "loss": 0.1212860107421875, "step": 56540 }, { "epoch": 0.48892789513277013, "grad_norm": 7.437080026685106, "learning_rate": 5.157891710008402e-06, "loss": 0.027094268798828126, "step": 56545 }, { "epoch": 0.4889711286543134, "grad_norm": 4.518704182778885, "learning_rate": 5.15775017110109e-06, "loss": 0.3953826904296875, "step": 56550 }, { "epoch": 0.4890143621758567, "grad_norm": 8.654923697713864, "learning_rate": 5.157608622242426e-06, "loss": 0.185205078125, "step": 56555 }, { "epoch": 0.4890575956973999, "grad_norm": 21.460642634208472, "learning_rate": 5.1574670634330615e-06, "loss": 0.1763580322265625, "step": 56560 }, { "epoch": 0.4891008292189432, "grad_norm": 8.429553895377389, "learning_rate": 5.157325494673649e-06, "loss": 0.33036231994628906, "step": 56565 }, { "epoch": 0.48914406274048644, "grad_norm": 2.5061628092356663, "learning_rate": 5.157183915964842e-06, "loss": 0.13249969482421875, "step": 56570 }, { "epoch": 0.4891872962620297, "grad_norm": 40.64686572631644, "learning_rate": 5.157042327307294e-06, "loss": 0.27046661376953124, "step": 56575 }, { "epoch": 0.489230529783573, "grad_norm": 1.129598546398456, "learning_rate": 5.1569007287016565e-06, "loss": 0.15151290893554686, "step": 56580 }, { "epoch": 0.48927376330511624, "grad_norm": 1.217241953212972, "learning_rate": 5.156759120148585e-06, "loss": 0.15259933471679688, "step": 56585 }, { "epoch": 0.4893169968266595, "grad_norm": 2.6265884241007353, "learning_rate": 5.15661750164873e-06, "loss": 0.1151885986328125, "step": 56590 }, { "epoch": 0.4893602303482028, "grad_norm": 15.829940914415596, "learning_rate": 5.156475873202745e-06, "loss": 0.18881454467773437, "step": 56595 }, { "epoch": 0.48940346386974604, "grad_norm": 13.800989201167045, "learning_rate": 5.156334234811285e-06, "loss": 0.2195098876953125, "step": 56600 }, { "epoch": 0.4894466973912893, "grad_norm": 0.11649835694814042, "learning_rate": 5.1561925864750015e-06, "loss": 0.11179962158203124, "step": 56605 }, { "epoch": 0.48948993091283255, "grad_norm": 2.357548612303094, "learning_rate": 5.156050928194548e-06, "loss": 0.26094398498535154, "step": 56610 }, { "epoch": 0.48953316443437583, "grad_norm": 38.390186030193064, "learning_rate": 5.1559092599705785e-06, "loss": 0.18875732421875, "step": 56615 }, { "epoch": 0.4895763979559191, "grad_norm": 12.038528704961225, "learning_rate": 5.155767581803746e-06, "loss": 0.06758918762207031, "step": 56620 }, { "epoch": 0.48961963147746235, "grad_norm": 17.702624416700235, "learning_rate": 5.155625893694704e-06, "loss": 0.159588623046875, "step": 56625 }, { "epoch": 0.48966286499900563, "grad_norm": 13.781152424846905, "learning_rate": 5.155484195644106e-06, "loss": 0.24018363952636718, "step": 56630 }, { "epoch": 0.4897060985205489, "grad_norm": 0.383896586615762, "learning_rate": 5.155342487652605e-06, "loss": 0.20889434814453126, "step": 56635 }, { "epoch": 0.48974933204209214, "grad_norm": 1.1817796228787374, "learning_rate": 5.155200769720855e-06, "loss": 0.16400604248046874, "step": 56640 }, { "epoch": 0.48979256556363543, "grad_norm": 17.4857015647999, "learning_rate": 5.15505904184951e-06, "loss": 0.30469818115234376, "step": 56645 }, { "epoch": 0.48983579908517866, "grad_norm": 5.508918247729092, "learning_rate": 5.154917304039222e-06, "loss": 0.3279022216796875, "step": 56650 }, { "epoch": 0.48987903260672194, "grad_norm": 3.122202858858433, "learning_rate": 5.154775556290646e-06, "loss": 0.040618896484375, "step": 56655 }, { "epoch": 0.4899222661282652, "grad_norm": 1.0818612793310152, "learning_rate": 5.1546337986044365e-06, "loss": 0.150140380859375, "step": 56660 }, { "epoch": 0.48996549964980846, "grad_norm": 24.970437414336253, "learning_rate": 5.154492030981245e-06, "loss": 0.2901702880859375, "step": 56665 }, { "epoch": 0.49000873317135174, "grad_norm": 26.004126139770555, "learning_rate": 5.1543502534217275e-06, "loss": 0.44759979248046877, "step": 56670 }, { "epoch": 0.490051966692895, "grad_norm": 18.062089659460487, "learning_rate": 5.154208465926537e-06, "loss": 0.048999977111816403, "step": 56675 }, { "epoch": 0.49009520021443825, "grad_norm": 1.3079038744197395, "learning_rate": 5.1540666684963276e-06, "loss": 0.13128662109375, "step": 56680 }, { "epoch": 0.49013843373598154, "grad_norm": 1.1729749117507404, "learning_rate": 5.153924861131753e-06, "loss": 0.12982025146484374, "step": 56685 }, { "epoch": 0.49018166725752477, "grad_norm": 2.6054545379141767, "learning_rate": 5.153783043833466e-06, "loss": 0.11060752868652343, "step": 56690 }, { "epoch": 0.49022490077906805, "grad_norm": 42.08251402881635, "learning_rate": 5.153641216602123e-06, "loss": 0.28135986328125, "step": 56695 }, { "epoch": 0.49026813430061134, "grad_norm": 0.725830865175852, "learning_rate": 5.153499379438377e-06, "loss": 0.40496063232421875, "step": 56700 }, { "epoch": 0.49031136782215456, "grad_norm": 7.117606282355342, "learning_rate": 5.1533575323428825e-06, "loss": 0.15123748779296875, "step": 56705 }, { "epoch": 0.49035460134369785, "grad_norm": 3.6961054103546016, "learning_rate": 5.1532156753162925e-06, "loss": 0.07662353515625, "step": 56710 }, { "epoch": 0.49039783486524113, "grad_norm": 23.063730840918822, "learning_rate": 5.153073808359262e-06, "loss": 0.3405914306640625, "step": 56715 }, { "epoch": 0.49044106838678436, "grad_norm": 2.0161498894438177, "learning_rate": 5.1529319314724455e-06, "loss": 0.0965576171875, "step": 56720 }, { "epoch": 0.49048430190832765, "grad_norm": 17.45286048752997, "learning_rate": 5.152790044656498e-06, "loss": 0.18444061279296875, "step": 56725 }, { "epoch": 0.49052753542987093, "grad_norm": 4.970235708565524, "learning_rate": 5.152648147912072e-06, "loss": 0.137896728515625, "step": 56730 }, { "epoch": 0.49057076895141416, "grad_norm": 37.83625594580188, "learning_rate": 5.152506241239824e-06, "loss": 0.061053466796875, "step": 56735 }, { "epoch": 0.49061400247295744, "grad_norm": 1.3489111919713679, "learning_rate": 5.1523643246404075e-06, "loss": 0.28594970703125, "step": 56740 }, { "epoch": 0.4906572359945007, "grad_norm": 19.501237841887527, "learning_rate": 5.152222398114476e-06, "loss": 0.1366119384765625, "step": 56745 }, { "epoch": 0.49070046951604396, "grad_norm": 14.624213068042659, "learning_rate": 5.152080461662685e-06, "loss": 0.2734771728515625, "step": 56750 }, { "epoch": 0.49074370303758724, "grad_norm": 2.3712786022651846, "learning_rate": 5.15193851528569e-06, "loss": 0.16068267822265625, "step": 56755 }, { "epoch": 0.49078693655913047, "grad_norm": 18.803618760652498, "learning_rate": 5.151796558984143e-06, "loss": 0.30308074951171876, "step": 56760 }, { "epoch": 0.49083017008067376, "grad_norm": 15.179748306099997, "learning_rate": 5.151654592758702e-06, "loss": 0.618096923828125, "step": 56765 }, { "epoch": 0.49087340360221704, "grad_norm": 9.846846549907779, "learning_rate": 5.151512616610019e-06, "loss": 0.3500518798828125, "step": 56770 }, { "epoch": 0.49091663712376027, "grad_norm": 0.986848758580302, "learning_rate": 5.151370630538751e-06, "loss": 0.12043914794921876, "step": 56775 }, { "epoch": 0.49095987064530355, "grad_norm": 30.595120324613745, "learning_rate": 5.151228634545551e-06, "loss": 0.43048095703125, "step": 56780 }, { "epoch": 0.4910031041668468, "grad_norm": 1.8094725626482409, "learning_rate": 5.151086628631076e-06, "loss": 0.1480224609375, "step": 56785 }, { "epoch": 0.49104633768839007, "grad_norm": 6.3065884597814135, "learning_rate": 5.150944612795977e-06, "loss": 0.11935577392578126, "step": 56790 }, { "epoch": 0.49108957120993335, "grad_norm": 7.2603756021956904, "learning_rate": 5.1508025870409134e-06, "loss": 0.028099822998046874, "step": 56795 }, { "epoch": 0.4911328047314766, "grad_norm": 33.023097220501484, "learning_rate": 5.150660551366538e-06, "loss": 0.142901611328125, "step": 56800 }, { "epoch": 0.49117603825301986, "grad_norm": 0.5308639202497739, "learning_rate": 5.150518505773505e-06, "loss": 0.167620849609375, "step": 56805 }, { "epoch": 0.49121927177456315, "grad_norm": 51.259930568715525, "learning_rate": 5.150376450262472e-06, "loss": 0.27802886962890627, "step": 56810 }, { "epoch": 0.4912625052961064, "grad_norm": 35.6067449524151, "learning_rate": 5.150234384834093e-06, "loss": 0.139910888671875, "step": 56815 }, { "epoch": 0.49130573881764966, "grad_norm": 3.1813378176162557, "learning_rate": 5.150092309489023e-06, "loss": 0.19151611328125, "step": 56820 }, { "epoch": 0.4913489723391929, "grad_norm": 1.426227138553698, "learning_rate": 5.149950224227917e-06, "loss": 0.054866790771484375, "step": 56825 }, { "epoch": 0.4913922058607362, "grad_norm": 6.146795339738322, "learning_rate": 5.14980812905143e-06, "loss": 0.20952415466308594, "step": 56830 }, { "epoch": 0.49143543938227946, "grad_norm": 3.8508435751475445, "learning_rate": 5.149666023960218e-06, "loss": 0.0672637939453125, "step": 56835 }, { "epoch": 0.4914786729038227, "grad_norm": 4.943209800162351, "learning_rate": 5.149523908954937e-06, "loss": 0.4585845947265625, "step": 56840 }, { "epoch": 0.491521906425366, "grad_norm": 3.2500616846466204, "learning_rate": 5.1493817840362405e-06, "loss": 0.06726608276367188, "step": 56845 }, { "epoch": 0.49156513994690926, "grad_norm": 24.75020443924539, "learning_rate": 5.149239649204785e-06, "loss": 0.2952789306640625, "step": 56850 }, { "epoch": 0.4916083734684525, "grad_norm": 13.67941754573838, "learning_rate": 5.149097504461228e-06, "loss": 0.1128631591796875, "step": 56855 }, { "epoch": 0.49165160698999577, "grad_norm": 2.340121670869607, "learning_rate": 5.148955349806222e-06, "loss": 0.21388587951660157, "step": 56860 }, { "epoch": 0.491694840511539, "grad_norm": 42.783829724204395, "learning_rate": 5.148813185240423e-06, "loss": 0.17795028686523437, "step": 56865 }, { "epoch": 0.4917380740330823, "grad_norm": 12.458520995191298, "learning_rate": 5.14867101076449e-06, "loss": 0.350384521484375, "step": 56870 }, { "epoch": 0.49178130755462557, "grad_norm": 26.230341874616148, "learning_rate": 5.148528826379073e-06, "loss": 0.11828346252441406, "step": 56875 }, { "epoch": 0.4918245410761688, "grad_norm": 21.32885210275375, "learning_rate": 5.148386632084832e-06, "loss": 0.2336761474609375, "step": 56880 }, { "epoch": 0.4918677745977121, "grad_norm": 8.181868011628483, "learning_rate": 5.148244427882423e-06, "loss": 0.22941207885742188, "step": 56885 }, { "epoch": 0.49191100811925537, "grad_norm": 2.3515843151131572, "learning_rate": 5.148102213772499e-06, "loss": 0.101275634765625, "step": 56890 }, { "epoch": 0.4919542416407986, "grad_norm": 1.57531594635461, "learning_rate": 5.147959989755719e-06, "loss": 0.14952964782714845, "step": 56895 }, { "epoch": 0.4919974751623419, "grad_norm": 1.3673295176393878, "learning_rate": 5.147817755832735e-06, "loss": 0.15784034729003907, "step": 56900 }, { "epoch": 0.49204070868388516, "grad_norm": 24.49957847660843, "learning_rate": 5.147675512004208e-06, "loss": 0.1332315444946289, "step": 56905 }, { "epoch": 0.4920839422054284, "grad_norm": 2.4686029187398524, "learning_rate": 5.147533258270789e-06, "loss": 0.39904632568359377, "step": 56910 }, { "epoch": 0.4921271757269717, "grad_norm": 7.618499402539596, "learning_rate": 5.147390994633138e-06, "loss": 0.199847412109375, "step": 56915 }, { "epoch": 0.4921704092485149, "grad_norm": 5.372633906539865, "learning_rate": 5.147248721091909e-06, "loss": 0.2326171875, "step": 56920 }, { "epoch": 0.4922136427700582, "grad_norm": 12.682100563786303, "learning_rate": 5.147106437647758e-06, "loss": 0.15748100280761718, "step": 56925 }, { "epoch": 0.4922568762916015, "grad_norm": 4.232739187790294, "learning_rate": 5.1469641443013425e-06, "loss": 0.13297576904296876, "step": 56930 }, { "epoch": 0.4923001098131447, "grad_norm": 21.065099201917143, "learning_rate": 5.146821841053318e-06, "loss": 0.5898391723632812, "step": 56935 }, { "epoch": 0.492343343334688, "grad_norm": 1.1171234957579699, "learning_rate": 5.14667952790434e-06, "loss": 0.03323822021484375, "step": 56940 }, { "epoch": 0.4923865768562313, "grad_norm": 1.9056759190780963, "learning_rate": 5.146537204855067e-06, "loss": 0.28826904296875, "step": 56945 }, { "epoch": 0.4924298103777745, "grad_norm": 2.657114679959573, "learning_rate": 5.146394871906153e-06, "loss": 0.07840309143066407, "step": 56950 }, { "epoch": 0.4924730438993178, "grad_norm": 0.14293723819130877, "learning_rate": 5.146252529058256e-06, "loss": 0.09500579833984375, "step": 56955 }, { "epoch": 0.492516277420861, "grad_norm": 0.26152529478512093, "learning_rate": 5.146110176312032e-06, "loss": 0.15377235412597656, "step": 56960 }, { "epoch": 0.4925595109424043, "grad_norm": 34.96075772387097, "learning_rate": 5.1459678136681374e-06, "loss": 0.1745941162109375, "step": 56965 }, { "epoch": 0.4926027444639476, "grad_norm": 0.9695365038595399, "learning_rate": 5.145825441127229e-06, "loss": 0.30641632080078124, "step": 56970 }, { "epoch": 0.4926459779854908, "grad_norm": 6.587250473303711, "learning_rate": 5.145683058689963e-06, "loss": 0.17233200073242189, "step": 56975 }, { "epoch": 0.4926892115070341, "grad_norm": 1.351370120978682, "learning_rate": 5.1455406663569965e-06, "loss": 0.21410369873046875, "step": 56980 }, { "epoch": 0.4927324450285774, "grad_norm": 0.5893423735660102, "learning_rate": 5.1453982641289866e-06, "loss": 0.07620391845703126, "step": 56985 }, { "epoch": 0.4927756785501206, "grad_norm": 15.821927966974583, "learning_rate": 5.145255852006588e-06, "loss": 0.08991222381591797, "step": 56990 }, { "epoch": 0.4928189120716639, "grad_norm": 38.43889917001406, "learning_rate": 5.1451134299904615e-06, "loss": 0.3322662353515625, "step": 56995 }, { "epoch": 0.4928621455932071, "grad_norm": 0.421853822104974, "learning_rate": 5.144970998081259e-06, "loss": 0.19520149230957032, "step": 57000 }, { "epoch": 0.4929053791147504, "grad_norm": 7.231232705875412, "learning_rate": 5.14482855627964e-06, "loss": 0.12813491821289064, "step": 57005 }, { "epoch": 0.4929486126362937, "grad_norm": 8.737612681381812, "learning_rate": 5.1446861045862625e-06, "loss": 0.05033454895019531, "step": 57010 }, { "epoch": 0.4929918461578369, "grad_norm": 2.1855740194962134, "learning_rate": 5.144543643001782e-06, "loss": 0.22195663452148437, "step": 57015 }, { "epoch": 0.4930350796793802, "grad_norm": 0.978992005145196, "learning_rate": 5.144401171526856e-06, "loss": 0.059651947021484374, "step": 57020 }, { "epoch": 0.4930783132009235, "grad_norm": 9.837332981748164, "learning_rate": 5.144258690162139e-06, "loss": 0.0608245849609375, "step": 57025 }, { "epoch": 0.4931215467224667, "grad_norm": 2.690259366881713, "learning_rate": 5.144116198908292e-06, "loss": 0.10889892578125, "step": 57030 }, { "epoch": 0.49316478024401, "grad_norm": 3.395121544112416, "learning_rate": 5.143973697765971e-06, "loss": 0.11601715087890625, "step": 57035 }, { "epoch": 0.49320801376555323, "grad_norm": 1.3093984112709125, "learning_rate": 5.143831186735832e-06, "loss": 0.4786525726318359, "step": 57040 }, { "epoch": 0.4932512472870965, "grad_norm": 19.926196502129823, "learning_rate": 5.143688665818533e-06, "loss": 0.04384918212890625, "step": 57045 }, { "epoch": 0.4932944808086398, "grad_norm": 15.741086954576355, "learning_rate": 5.143546135014732e-06, "loss": 0.5914306640625, "step": 57050 }, { "epoch": 0.49333771433018303, "grad_norm": 3.9262912532957035, "learning_rate": 5.143403594325085e-06, "loss": 0.19832000732421876, "step": 57055 }, { "epoch": 0.4933809478517263, "grad_norm": 3.6754323933824793, "learning_rate": 5.143261043750251e-06, "loss": 0.06412429809570312, "step": 57060 }, { "epoch": 0.4934241813732696, "grad_norm": 9.798280495180054, "learning_rate": 5.143118483290885e-06, "loss": 0.1984527587890625, "step": 57065 }, { "epoch": 0.49346741489481283, "grad_norm": 0.3583088031869488, "learning_rate": 5.1429759129476465e-06, "loss": 0.048004150390625, "step": 57070 }, { "epoch": 0.4935106484163561, "grad_norm": 1.3883746103723291, "learning_rate": 5.142833332721192e-06, "loss": 0.03349113464355469, "step": 57075 }, { "epoch": 0.4935538819378994, "grad_norm": 0.25514986249081517, "learning_rate": 5.14269074261218e-06, "loss": 0.16940078735351563, "step": 57080 }, { "epoch": 0.4935971154594426, "grad_norm": 0.6493100039595604, "learning_rate": 5.142548142621267e-06, "loss": 0.14896240234375, "step": 57085 }, { "epoch": 0.4936403489809859, "grad_norm": 4.956774148996727, "learning_rate": 5.142405532749112e-06, "loss": 0.061077880859375, "step": 57090 }, { "epoch": 0.49368358250252914, "grad_norm": 8.637374314881754, "learning_rate": 5.1422629129963725e-06, "loss": 0.3005615234375, "step": 57095 }, { "epoch": 0.4937268160240724, "grad_norm": 9.073418853349816, "learning_rate": 5.142120283363705e-06, "loss": 0.13638458251953126, "step": 57100 }, { "epoch": 0.4937700495456157, "grad_norm": 4.42140191193418, "learning_rate": 5.141977643851769e-06, "loss": 0.03909759521484375, "step": 57105 }, { "epoch": 0.49381328306715894, "grad_norm": 0.9778845849377112, "learning_rate": 5.141834994461221e-06, "loss": 0.14965057373046875, "step": 57110 }, { "epoch": 0.4938565165887022, "grad_norm": 0.19675874986567243, "learning_rate": 5.141692335192717e-06, "loss": 0.15378255844116212, "step": 57115 }, { "epoch": 0.4938997501102455, "grad_norm": 7.496291539083036, "learning_rate": 5.14154966604692e-06, "loss": 0.3339954376220703, "step": 57120 }, { "epoch": 0.49394298363178873, "grad_norm": 15.18819447623076, "learning_rate": 5.141406987024485e-06, "loss": 0.05316619873046875, "step": 57125 }, { "epoch": 0.493986217153332, "grad_norm": 3.207806890347766, "learning_rate": 5.141264298126068e-06, "loss": 0.347393798828125, "step": 57130 }, { "epoch": 0.49402945067487525, "grad_norm": 3.717239090258167, "learning_rate": 5.141121599352331e-06, "loss": 0.07989768981933594, "step": 57135 }, { "epoch": 0.49407268419641853, "grad_norm": 6.598388754696503, "learning_rate": 5.14097889070393e-06, "loss": 0.386114501953125, "step": 57140 }, { "epoch": 0.4941159177179618, "grad_norm": 34.65067485674405, "learning_rate": 5.140836172181524e-06, "loss": 0.3119041442871094, "step": 57145 }, { "epoch": 0.49415915123950505, "grad_norm": 4.236464572591932, "learning_rate": 5.140693443785769e-06, "loss": 0.07241439819335938, "step": 57150 }, { "epoch": 0.49420238476104833, "grad_norm": 2.0211161532871893, "learning_rate": 5.140550705517327e-06, "loss": 0.01638946533203125, "step": 57155 }, { "epoch": 0.4942456182825916, "grad_norm": 5.150320139440606, "learning_rate": 5.140407957376853e-06, "loss": 0.219622802734375, "step": 57160 }, { "epoch": 0.49428885180413484, "grad_norm": 0.39196816307152416, "learning_rate": 5.140265199365008e-06, "loss": 0.11102294921875, "step": 57165 }, { "epoch": 0.49433208532567813, "grad_norm": 6.434815248715271, "learning_rate": 5.140122431482448e-06, "loss": 0.3472747802734375, "step": 57170 }, { "epoch": 0.49437531884722136, "grad_norm": 9.534734392380752, "learning_rate": 5.139979653729833e-06, "loss": 0.212603759765625, "step": 57175 }, { "epoch": 0.49441855236876464, "grad_norm": 7.25981772493794, "learning_rate": 5.1398368661078205e-06, "loss": 0.0931427001953125, "step": 57180 }, { "epoch": 0.4944617858903079, "grad_norm": 1.7175959211967242, "learning_rate": 5.139694068617069e-06, "loss": 0.2695167541503906, "step": 57185 }, { "epoch": 0.49450501941185115, "grad_norm": 4.128468020336775, "learning_rate": 5.1395512612582375e-06, "loss": 0.10444869995117187, "step": 57190 }, { "epoch": 0.49454825293339444, "grad_norm": 0.18252665078988198, "learning_rate": 5.139408444031986e-06, "loss": 0.03315248489379883, "step": 57195 }, { "epoch": 0.4945914864549377, "grad_norm": 23.040955580299304, "learning_rate": 5.139265616938971e-06, "loss": 0.26868743896484376, "step": 57200 }, { "epoch": 0.49463471997648095, "grad_norm": 11.245636119601716, "learning_rate": 5.139122779979851e-06, "loss": 0.2544586181640625, "step": 57205 }, { "epoch": 0.49467795349802424, "grad_norm": 0.3478518547347926, "learning_rate": 5.138979933155288e-06, "loss": 0.14685287475585937, "step": 57210 }, { "epoch": 0.49472118701956747, "grad_norm": 0.8714748983316298, "learning_rate": 5.138837076465937e-06, "loss": 0.224359130859375, "step": 57215 }, { "epoch": 0.49476442054111075, "grad_norm": 7.801782937758989, "learning_rate": 5.138694209912459e-06, "loss": 0.3937042236328125, "step": 57220 }, { "epoch": 0.49480765406265403, "grad_norm": 8.767959974034744, "learning_rate": 5.1385513334955115e-06, "loss": 0.11879119873046876, "step": 57225 }, { "epoch": 0.49485088758419726, "grad_norm": 30.296875136164708, "learning_rate": 5.138408447215755e-06, "loss": 0.6384063720703125, "step": 57230 }, { "epoch": 0.49489412110574055, "grad_norm": 54.5302536479765, "learning_rate": 5.1382655510738465e-06, "loss": 0.3310302734375, "step": 57235 }, { "epoch": 0.49493735462728383, "grad_norm": 0.6350531615888715, "learning_rate": 5.138122645070448e-06, "loss": 0.2109283447265625, "step": 57240 }, { "epoch": 0.49498058814882706, "grad_norm": 11.223113253203278, "learning_rate": 5.1379797292062155e-06, "loss": 0.100341796875, "step": 57245 }, { "epoch": 0.49502382167037035, "grad_norm": 18.57993306971831, "learning_rate": 5.137836803481809e-06, "loss": 0.34489593505859373, "step": 57250 }, { "epoch": 0.49506705519191363, "grad_norm": 1.8789632697305996, "learning_rate": 5.1376938678978895e-06, "loss": 0.25206298828125, "step": 57255 }, { "epoch": 0.49511028871345686, "grad_norm": 32.99821548587454, "learning_rate": 5.1375509224551136e-06, "loss": 0.3959716796875, "step": 57260 }, { "epoch": 0.49515352223500014, "grad_norm": 27.678664092619034, "learning_rate": 5.137407967154143e-06, "loss": 0.3490692138671875, "step": 57265 }, { "epoch": 0.49519675575654337, "grad_norm": 2.06866124624915, "learning_rate": 5.137265001995635e-06, "loss": 0.3010009765625, "step": 57270 }, { "epoch": 0.49523998927808666, "grad_norm": 23.359328373951765, "learning_rate": 5.137122026980249e-06, "loss": 0.15490455627441407, "step": 57275 }, { "epoch": 0.49528322279962994, "grad_norm": 42.99248825858913, "learning_rate": 5.1369790421086465e-06, "loss": 0.27078857421875, "step": 57280 }, { "epoch": 0.49532645632117317, "grad_norm": 2.9571479478191423, "learning_rate": 5.136836047381485e-06, "loss": 0.06490936279296874, "step": 57285 }, { "epoch": 0.49536968984271645, "grad_norm": 7.023811557251552, "learning_rate": 5.136693042799423e-06, "loss": 0.08618106842041015, "step": 57290 }, { "epoch": 0.49541292336425974, "grad_norm": 0.46234196098285457, "learning_rate": 5.136550028363123e-06, "loss": 0.13500823974609374, "step": 57295 }, { "epoch": 0.49545615688580297, "grad_norm": 6.232508314691749, "learning_rate": 5.136407004073243e-06, "loss": 0.2014862060546875, "step": 57300 }, { "epoch": 0.49549939040734625, "grad_norm": 27.546250033195914, "learning_rate": 5.1362639699304415e-06, "loss": 0.29672775268554685, "step": 57305 }, { "epoch": 0.4955426239288895, "grad_norm": 23.581728285561347, "learning_rate": 5.1361209259353804e-06, "loss": 0.1660186767578125, "step": 57310 }, { "epoch": 0.49558585745043277, "grad_norm": 28.208452169650617, "learning_rate": 5.135977872088719e-06, "loss": 0.2466602325439453, "step": 57315 }, { "epoch": 0.49562909097197605, "grad_norm": 4.041477218303325, "learning_rate": 5.135834808391115e-06, "loss": 0.2859100341796875, "step": 57320 }, { "epoch": 0.4956723244935193, "grad_norm": 5.4887658313047, "learning_rate": 5.13569173484323e-06, "loss": 0.3352783203125, "step": 57325 }, { "epoch": 0.49571555801506256, "grad_norm": 2.5001436518569746, "learning_rate": 5.135548651445724e-06, "loss": 0.04841766357421875, "step": 57330 }, { "epoch": 0.49575879153660585, "grad_norm": 4.2919987509777675, "learning_rate": 5.135405558199255e-06, "loss": 0.1943634033203125, "step": 57335 }, { "epoch": 0.4958020250581491, "grad_norm": 3.81339301506086, "learning_rate": 5.1352624551044845e-06, "loss": 0.07997817993164062, "step": 57340 }, { "epoch": 0.49584525857969236, "grad_norm": 4.030776960632459, "learning_rate": 5.135119342162073e-06, "loss": 0.124346923828125, "step": 57345 }, { "epoch": 0.4958884921012356, "grad_norm": 0.6632187610942714, "learning_rate": 5.13497621937268e-06, "loss": 0.13354949951171874, "step": 57350 }, { "epoch": 0.4959317256227789, "grad_norm": 3.639163607486372, "learning_rate": 5.134833086736965e-06, "loss": 0.03887786865234375, "step": 57355 }, { "epoch": 0.49597495914432216, "grad_norm": 0.09584400488875758, "learning_rate": 5.134689944255588e-06, "loss": 0.07680740356445312, "step": 57360 }, { "epoch": 0.4960181926658654, "grad_norm": 26.549047597690503, "learning_rate": 5.134546791929211e-06, "loss": 0.1106048583984375, "step": 57365 }, { "epoch": 0.49606142618740867, "grad_norm": 4.171254516763449, "learning_rate": 5.134403629758492e-06, "loss": 0.26507568359375, "step": 57370 }, { "epoch": 0.49610465970895196, "grad_norm": 0.2457667858642076, "learning_rate": 5.1342604577440915e-06, "loss": 0.24896240234375, "step": 57375 }, { "epoch": 0.4961478932304952, "grad_norm": 1.946047523454154, "learning_rate": 5.1341172758866705e-06, "loss": 0.1227294921875, "step": 57380 }, { "epoch": 0.49619112675203847, "grad_norm": 7.798028082950219, "learning_rate": 5.133974084186889e-06, "loss": 0.09343147277832031, "step": 57385 }, { "epoch": 0.4962343602735817, "grad_norm": 13.917571773116267, "learning_rate": 5.1338308826454085e-06, "loss": 0.13219451904296875, "step": 57390 }, { "epoch": 0.496277593795125, "grad_norm": 2.225383513246604, "learning_rate": 5.133687671262888e-06, "loss": 0.2272735595703125, "step": 57395 }, { "epoch": 0.49632082731666827, "grad_norm": 44.48275924079175, "learning_rate": 5.133544450039988e-06, "loss": 0.10752525329589843, "step": 57400 }, { "epoch": 0.4963640608382115, "grad_norm": 6.2315917087387644, "learning_rate": 5.133401218977371e-06, "loss": 0.3755767822265625, "step": 57405 }, { "epoch": 0.4964072943597548, "grad_norm": 0.4264740427709329, "learning_rate": 5.133257978075694e-06, "loss": 0.20479393005371094, "step": 57410 }, { "epoch": 0.49645052788129806, "grad_norm": 0.6329484273196743, "learning_rate": 5.1331147273356205e-06, "loss": 0.3633026123046875, "step": 57415 }, { "epoch": 0.4964937614028413, "grad_norm": 0.5707111489728635, "learning_rate": 5.132971466757811e-06, "loss": 0.1604034423828125, "step": 57420 }, { "epoch": 0.4965369949243846, "grad_norm": 25.92500055066992, "learning_rate": 5.132828196342926e-06, "loss": 0.32982177734375, "step": 57425 }, { "epoch": 0.4965802284459278, "grad_norm": 5.366542980415289, "learning_rate": 5.132684916091624e-06, "loss": 0.1373046875, "step": 57430 }, { "epoch": 0.4966234619674711, "grad_norm": 0.11248876537126856, "learning_rate": 5.132541626004569e-06, "loss": 0.2034515380859375, "step": 57435 }, { "epoch": 0.4966666954890144, "grad_norm": 23.561778873459758, "learning_rate": 5.13239832608242e-06, "loss": 0.419732666015625, "step": 57440 }, { "epoch": 0.4967099290105576, "grad_norm": 0.6775763319962287, "learning_rate": 5.132255016325839e-06, "loss": 0.042926025390625, "step": 57445 }, { "epoch": 0.4967531625321009, "grad_norm": 26.689805589257052, "learning_rate": 5.132111696735485e-06, "loss": 0.14911727905273436, "step": 57450 }, { "epoch": 0.4967963960536442, "grad_norm": 10.255060984138533, "learning_rate": 5.131968367312022e-06, "loss": 0.10155105590820312, "step": 57455 }, { "epoch": 0.4968396295751874, "grad_norm": 2.660882800208732, "learning_rate": 5.131825028056108e-06, "loss": 0.05526046752929688, "step": 57460 }, { "epoch": 0.4968828630967307, "grad_norm": 14.791282353046428, "learning_rate": 5.1316816789684045e-06, "loss": 0.5109954833984375, "step": 57465 }, { "epoch": 0.49692609661827397, "grad_norm": 0.511914092049935, "learning_rate": 5.131538320049575e-06, "loss": 0.09277496337890626, "step": 57470 }, { "epoch": 0.4969693301398172, "grad_norm": 6.610101171819164, "learning_rate": 5.131394951300279e-06, "loss": 0.23571929931640626, "step": 57475 }, { "epoch": 0.4970125636613605, "grad_norm": 22.384499854717735, "learning_rate": 5.131251572721178e-06, "loss": 0.10643463134765625, "step": 57480 }, { "epoch": 0.4970557971829037, "grad_norm": 4.222897507398039, "learning_rate": 5.131108184312932e-06, "loss": 0.0643524169921875, "step": 57485 }, { "epoch": 0.497099030704447, "grad_norm": 5.697502798483105, "learning_rate": 5.130964786076204e-06, "loss": 0.35028076171875, "step": 57490 }, { "epoch": 0.4971422642259903, "grad_norm": 1.2859465487945125, "learning_rate": 5.130821378011654e-06, "loss": 0.42697601318359374, "step": 57495 }, { "epoch": 0.4971854977475335, "grad_norm": 14.664206568871782, "learning_rate": 5.130677960119945e-06, "loss": 0.4390869140625, "step": 57500 }, { "epoch": 0.4972287312690768, "grad_norm": 10.81664115885757, "learning_rate": 5.130534532401737e-06, "loss": 0.0690399169921875, "step": 57505 }, { "epoch": 0.4972719647906201, "grad_norm": 7.314452888621677, "learning_rate": 5.130391094857692e-06, "loss": 0.2096923828125, "step": 57510 }, { "epoch": 0.4973151983121633, "grad_norm": 0.6346040843270241, "learning_rate": 5.130247647488472e-06, "loss": 0.10455055236816406, "step": 57515 }, { "epoch": 0.4973584318337066, "grad_norm": 5.561489789862974, "learning_rate": 5.130104190294738e-06, "loss": 0.5196136474609375, "step": 57520 }, { "epoch": 0.4974016653552498, "grad_norm": 17.28661069853044, "learning_rate": 5.129960723277152e-06, "loss": 0.18984146118164064, "step": 57525 }, { "epoch": 0.4974448988767931, "grad_norm": 0.8285790407470337, "learning_rate": 5.129817246436374e-06, "loss": 0.0915863037109375, "step": 57530 }, { "epoch": 0.4974881323983364, "grad_norm": 13.349731930238658, "learning_rate": 5.129673759773068e-06, "loss": 0.077276611328125, "step": 57535 }, { "epoch": 0.4975313659198796, "grad_norm": 14.165402587725104, "learning_rate": 5.1295302632878954e-06, "loss": 0.3261383056640625, "step": 57540 }, { "epoch": 0.4975745994414229, "grad_norm": 8.155906375876715, "learning_rate": 5.129386756981516e-06, "loss": 0.29839630126953126, "step": 57545 }, { "epoch": 0.4976178329629662, "grad_norm": 1.9711593501819613, "learning_rate": 5.129243240854594e-06, "loss": 0.06038322448730469, "step": 57550 }, { "epoch": 0.4976610664845094, "grad_norm": 4.363650082206104, "learning_rate": 5.12909971490779e-06, "loss": 0.0715606689453125, "step": 57555 }, { "epoch": 0.4977043000060527, "grad_norm": 0.41187542303611757, "learning_rate": 5.128956179141766e-06, "loss": 0.2205474853515625, "step": 57560 }, { "epoch": 0.49774753352759593, "grad_norm": 3.650625769475153, "learning_rate": 5.1288126335571845e-06, "loss": 0.09096832275390625, "step": 57565 }, { "epoch": 0.4977907670491392, "grad_norm": 1.9617640771164426, "learning_rate": 5.128669078154708e-06, "loss": 0.2564208984375, "step": 57570 }, { "epoch": 0.4978340005706825, "grad_norm": 47.444009439313035, "learning_rate": 5.128525512934996e-06, "loss": 0.694476318359375, "step": 57575 }, { "epoch": 0.49787723409222573, "grad_norm": 7.034787601120349, "learning_rate": 5.128381937898714e-06, "loss": 0.17872161865234376, "step": 57580 }, { "epoch": 0.497920467613769, "grad_norm": 1.8084495027103809, "learning_rate": 5.1282383530465216e-06, "loss": 0.28543815612792967, "step": 57585 }, { "epoch": 0.4979637011353123, "grad_norm": 6.2880388692394735, "learning_rate": 5.128094758379082e-06, "loss": 0.22676849365234375, "step": 57590 }, { "epoch": 0.4980069346568555, "grad_norm": 11.853315271948862, "learning_rate": 5.127951153897059e-06, "loss": 0.10940284729003906, "step": 57595 }, { "epoch": 0.4980501681783988, "grad_norm": 6.702544767301986, "learning_rate": 5.127807539601112e-06, "loss": 0.0945465087890625, "step": 57600 }, { "epoch": 0.49809340169994204, "grad_norm": 8.56737676319973, "learning_rate": 5.127663915491905e-06, "loss": 0.052501678466796875, "step": 57605 }, { "epoch": 0.4981366352214853, "grad_norm": 0.2592770851144326, "learning_rate": 5.127520281570099e-06, "loss": 0.046459197998046875, "step": 57610 }, { "epoch": 0.4981798687430286, "grad_norm": 2.1694192307757545, "learning_rate": 5.127376637836358e-06, "loss": 0.23058929443359374, "step": 57615 }, { "epoch": 0.49822310226457184, "grad_norm": 0.9164415694136511, "learning_rate": 5.127232984291344e-06, "loss": 0.08163948059082031, "step": 57620 }, { "epoch": 0.4982663357861151, "grad_norm": 1.986673608585496, "learning_rate": 5.1270893209357205e-06, "loss": 0.12833786010742188, "step": 57625 }, { "epoch": 0.4983095693076584, "grad_norm": 5.64372217884177, "learning_rate": 5.126945647770148e-06, "loss": 0.1349884033203125, "step": 57630 }, { "epoch": 0.49835280282920164, "grad_norm": 0.3076194670240015, "learning_rate": 5.126801964795289e-06, "loss": 0.11060600280761719, "step": 57635 }, { "epoch": 0.4983960363507449, "grad_norm": 3.4776683324040842, "learning_rate": 5.12665827201181e-06, "loss": 0.5196941375732422, "step": 57640 }, { "epoch": 0.4984392698722882, "grad_norm": 6.784865319240239, "learning_rate": 5.126514569420369e-06, "loss": 0.2277587890625, "step": 57645 }, { "epoch": 0.49848250339383143, "grad_norm": 7.514901687155419, "learning_rate": 5.126370857021631e-06, "loss": 0.10029296875, "step": 57650 }, { "epoch": 0.4985257369153747, "grad_norm": 3.7131662003052286, "learning_rate": 5.126227134816258e-06, "loss": 0.09840087890625, "step": 57655 }, { "epoch": 0.49856897043691795, "grad_norm": 8.126489482878238, "learning_rate": 5.126083402804915e-06, "loss": 0.162628173828125, "step": 57660 }, { "epoch": 0.49861220395846123, "grad_norm": 1.6181459880772566, "learning_rate": 5.125939660988261e-06, "loss": 0.445172119140625, "step": 57665 }, { "epoch": 0.4986554374800045, "grad_norm": 4.6009927136167095, "learning_rate": 5.125795909366963e-06, "loss": 0.03623199462890625, "step": 57670 }, { "epoch": 0.49869867100154774, "grad_norm": 16.862023886930693, "learning_rate": 5.125652147941681e-06, "loss": 0.33719940185546876, "step": 57675 }, { "epoch": 0.49874190452309103, "grad_norm": 12.631896865137072, "learning_rate": 5.12550837671308e-06, "loss": 0.044346046447753903, "step": 57680 }, { "epoch": 0.4987851380446343, "grad_norm": 44.57579837886999, "learning_rate": 5.125364595681822e-06, "loss": 0.475775146484375, "step": 57685 }, { "epoch": 0.49882837156617754, "grad_norm": 13.603357965534258, "learning_rate": 5.12522080484857e-06, "loss": 0.3851478576660156, "step": 57690 }, { "epoch": 0.4988716050877208, "grad_norm": 15.199435824137073, "learning_rate": 5.125077004213987e-06, "loss": 0.20382041931152345, "step": 57695 }, { "epoch": 0.49891483860926406, "grad_norm": 0.9686347934463637, "learning_rate": 5.124933193778738e-06, "loss": 0.12021255493164062, "step": 57700 }, { "epoch": 0.49895807213080734, "grad_norm": 17.804174969325757, "learning_rate": 5.124789373543483e-06, "loss": 0.121331787109375, "step": 57705 }, { "epoch": 0.4990013056523506, "grad_norm": 9.321479056726195, "learning_rate": 5.124645543508889e-06, "loss": 0.27947235107421875, "step": 57710 }, { "epoch": 0.49904453917389385, "grad_norm": 12.742892361589734, "learning_rate": 5.124501703675617e-06, "loss": 0.13438720703125, "step": 57715 }, { "epoch": 0.49908777269543714, "grad_norm": 5.697428843037649, "learning_rate": 5.12435785404433e-06, "loss": 0.16612396240234376, "step": 57720 }, { "epoch": 0.4991310062169804, "grad_norm": 3.829707477569238, "learning_rate": 5.124213994615692e-06, "loss": 0.332183837890625, "step": 57725 }, { "epoch": 0.49917423973852365, "grad_norm": 8.85824111824502, "learning_rate": 5.1240701253903676e-06, "loss": 0.07637367248535157, "step": 57730 }, { "epoch": 0.49921747326006694, "grad_norm": 21.330813059457757, "learning_rate": 5.123926246369019e-06, "loss": 0.10408515930175781, "step": 57735 }, { "epoch": 0.49926070678161016, "grad_norm": 0.04364331009398846, "learning_rate": 5.1237823575523115e-06, "loss": 0.20440101623535156, "step": 57740 }, { "epoch": 0.49930394030315345, "grad_norm": 18.454347415345538, "learning_rate": 5.123638458940906e-06, "loss": 0.16748733520507814, "step": 57745 }, { "epoch": 0.49934717382469673, "grad_norm": 22.205106453049435, "learning_rate": 5.123494550535468e-06, "loss": 0.3615119934082031, "step": 57750 }, { "epoch": 0.49939040734623996, "grad_norm": 1.0673938916732748, "learning_rate": 5.1233506323366604e-06, "loss": 0.13404464721679688, "step": 57755 }, { "epoch": 0.49943364086778325, "grad_norm": 16.16182136156761, "learning_rate": 5.123206704345148e-06, "loss": 0.14160919189453125, "step": 57760 }, { "epoch": 0.49947687438932653, "grad_norm": 9.959687802513091, "learning_rate": 5.123062766561595e-06, "loss": 0.1192230224609375, "step": 57765 }, { "epoch": 0.49952010791086976, "grad_norm": 6.850690614233785, "learning_rate": 5.122918818986662e-06, "loss": 0.14494171142578124, "step": 57770 }, { "epoch": 0.49956334143241304, "grad_norm": 5.192361156262009, "learning_rate": 5.122774861621015e-06, "loss": 0.3276153564453125, "step": 57775 }, { "epoch": 0.4996065749539563, "grad_norm": 13.228579646643464, "learning_rate": 5.12263089446532e-06, "loss": 0.15825042724609376, "step": 57780 }, { "epoch": 0.49964980847549956, "grad_norm": 2.0128700966660755, "learning_rate": 5.122486917520237e-06, "loss": 0.19557876586914064, "step": 57785 }, { "epoch": 0.49969304199704284, "grad_norm": 2.421076594385165, "learning_rate": 5.1223429307864326e-06, "loss": 0.29468765258789065, "step": 57790 }, { "epoch": 0.49973627551858607, "grad_norm": 9.002332659292616, "learning_rate": 5.12219893426457e-06, "loss": 0.1904022216796875, "step": 57795 }, { "epoch": 0.49977950904012935, "grad_norm": 3.1067458766609777, "learning_rate": 5.122054927955313e-06, "loss": 0.0309295654296875, "step": 57800 }, { "epoch": 0.49982274256167264, "grad_norm": 4.604300654026297, "learning_rate": 5.121910911859327e-06, "loss": 0.2540580749511719, "step": 57805 }, { "epoch": 0.49986597608321587, "grad_norm": 2.4109523512285027, "learning_rate": 5.121766885977274e-06, "loss": 0.1367584228515625, "step": 57810 }, { "epoch": 0.49990920960475915, "grad_norm": 0.7223692933154939, "learning_rate": 5.121622850309821e-06, "loss": 0.09977645874023437, "step": 57815 }, { "epoch": 0.49995244312630244, "grad_norm": 34.94418767281978, "learning_rate": 5.121478804857631e-06, "loss": 0.37558135986328123, "step": 57820 }, { "epoch": 0.49999567664784567, "grad_norm": 0.3533320116142757, "learning_rate": 5.1213347496213674e-06, "loss": 0.30629501342773435, "step": 57825 }, { "epoch": 0.500038910169389, "grad_norm": 3.845712474052244, "learning_rate": 5.1211906846016965e-06, "loss": 0.2660400390625, "step": 57830 }, { "epoch": 0.5000821436909322, "grad_norm": 8.404699851548074, "learning_rate": 5.1210466097992795e-06, "loss": 0.11290855407714843, "step": 57835 }, { "epoch": 0.5001253772124755, "grad_norm": 12.822482186044413, "learning_rate": 5.120902525214784e-06, "loss": 0.06322669982910156, "step": 57840 }, { "epoch": 0.5001686107340187, "grad_norm": 2.846187446515203, "learning_rate": 5.120758430848875e-06, "loss": 0.0895843505859375, "step": 57845 }, { "epoch": 0.500211844255562, "grad_norm": 15.86927375186087, "learning_rate": 5.120614326702213e-06, "loss": 0.2301422119140625, "step": 57850 }, { "epoch": 0.5002550777771052, "grad_norm": 10.497833116008364, "learning_rate": 5.120470212775467e-06, "loss": 0.063311767578125, "step": 57855 }, { "epoch": 0.5002983112986485, "grad_norm": 38.67036109189676, "learning_rate": 5.120326089069298e-06, "loss": 0.209283447265625, "step": 57860 }, { "epoch": 0.5003415448201918, "grad_norm": 17.012463458433775, "learning_rate": 5.120181955584374e-06, "loss": 0.26985931396484375, "step": 57865 }, { "epoch": 0.500384778341735, "grad_norm": 8.305513396074371, "learning_rate": 5.120037812321358e-06, "loss": 0.09510040283203125, "step": 57870 }, { "epoch": 0.5004280118632783, "grad_norm": 4.48991672030308, "learning_rate": 5.119893659280915e-06, "loss": 0.13021087646484375, "step": 57875 }, { "epoch": 0.5004712453848216, "grad_norm": 14.809111757373831, "learning_rate": 5.1197494964637085e-06, "loss": 0.11537628173828125, "step": 57880 }, { "epoch": 0.5005144789063648, "grad_norm": 8.831959350155685, "learning_rate": 5.119605323870406e-06, "loss": 0.1733673095703125, "step": 57885 }, { "epoch": 0.5005577124279081, "grad_norm": 7.229379235883895, "learning_rate": 5.11946114150167e-06, "loss": 0.0649688720703125, "step": 57890 }, { "epoch": 0.5006009459494514, "grad_norm": 35.94408301258955, "learning_rate": 5.119316949358167e-06, "loss": 0.3302825927734375, "step": 57895 }, { "epoch": 0.5006441794709946, "grad_norm": 21.684563562403795, "learning_rate": 5.119172747440562e-06, "loss": 0.19376068115234374, "step": 57900 }, { "epoch": 0.5006874129925379, "grad_norm": 4.217060635426231, "learning_rate": 5.119028535749519e-06, "loss": 0.122344970703125, "step": 57905 }, { "epoch": 0.5007306465140812, "grad_norm": 2.0799251899953255, "learning_rate": 5.118884314285703e-06, "loss": 0.2557964324951172, "step": 57910 }, { "epoch": 0.5007738800356244, "grad_norm": 6.76314851252023, "learning_rate": 5.118740083049781e-06, "loss": 0.021317672729492188, "step": 57915 }, { "epoch": 0.5008171135571677, "grad_norm": 2.446768749790831, "learning_rate": 5.1185958420424156e-06, "loss": 0.07679214477539062, "step": 57920 }, { "epoch": 0.500860347078711, "grad_norm": 20.38457037054321, "learning_rate": 5.118451591264274e-06, "loss": 0.09161605834960937, "step": 57925 }, { "epoch": 0.5009035806002542, "grad_norm": 2.083374968242004, "learning_rate": 5.118307330716021e-06, "loss": 0.2692596435546875, "step": 57930 }, { "epoch": 0.5009468141217974, "grad_norm": 1.81360354761919, "learning_rate": 5.118163060398322e-06, "loss": 0.22018966674804688, "step": 57935 }, { "epoch": 0.5009900476433408, "grad_norm": 0.5655767717664514, "learning_rate": 5.118018780311842e-06, "loss": 0.080804443359375, "step": 57940 }, { "epoch": 0.501033281164884, "grad_norm": 5.401904330798626, "learning_rate": 5.117874490457246e-06, "loss": 0.05297698974609375, "step": 57945 }, { "epoch": 0.5010765146864272, "grad_norm": 10.40435064983009, "learning_rate": 5.1177301908352005e-06, "loss": 0.1313018798828125, "step": 57950 }, { "epoch": 0.5011197482079706, "grad_norm": 14.57777845751251, "learning_rate": 5.11758588144637e-06, "loss": 0.13642578125, "step": 57955 }, { "epoch": 0.5011629817295138, "grad_norm": 14.286073489047645, "learning_rate": 5.117441562291422e-06, "loss": 0.167156982421875, "step": 57960 }, { "epoch": 0.501206215251057, "grad_norm": 3.514706759249179, "learning_rate": 5.117297233371019e-06, "loss": 0.2768756866455078, "step": 57965 }, { "epoch": 0.5012494487726004, "grad_norm": 5.481189916836924, "learning_rate": 5.1171528946858285e-06, "loss": 0.08923187255859374, "step": 57970 }, { "epoch": 0.5012926822941436, "grad_norm": 1.278627333542684, "learning_rate": 5.117008546236517e-06, "loss": 0.08538131713867188, "step": 57975 }, { "epoch": 0.5013359158156868, "grad_norm": 19.964503436744096, "learning_rate": 5.1168641880237485e-06, "loss": 0.56292724609375, "step": 57980 }, { "epoch": 0.5013791493372302, "grad_norm": 4.742593452021101, "learning_rate": 5.1167198200481885e-06, "loss": 0.361810302734375, "step": 57985 }, { "epoch": 0.5014223828587734, "grad_norm": 33.92872196489868, "learning_rate": 5.116575442310505e-06, "loss": 0.147943115234375, "step": 57990 }, { "epoch": 0.5014656163803166, "grad_norm": 1.6438418725214723, "learning_rate": 5.116431054811362e-06, "loss": 0.11577301025390625, "step": 57995 }, { "epoch": 0.50150884990186, "grad_norm": 34.67915386093295, "learning_rate": 5.116286657551426e-06, "loss": 0.13792648315429687, "step": 58000 }, { "epoch": 0.5015520834234032, "grad_norm": 8.068409829915668, "learning_rate": 5.116142250531362e-06, "loss": 0.21726951599121094, "step": 58005 }, { "epoch": 0.5015953169449464, "grad_norm": 4.488976043915484, "learning_rate": 5.115997833751839e-06, "loss": 0.1768798828125, "step": 58010 }, { "epoch": 0.5016385504664898, "grad_norm": 5.525024847183343, "learning_rate": 5.11585340721352e-06, "loss": 0.07719268798828124, "step": 58015 }, { "epoch": 0.501681783988033, "grad_norm": 17.97793584737625, "learning_rate": 5.115708970917072e-06, "loss": 0.41983795166015625, "step": 58020 }, { "epoch": 0.5017250175095762, "grad_norm": 21.62022961196236, "learning_rate": 5.115564524863161e-06, "loss": 0.319268798828125, "step": 58025 }, { "epoch": 0.5017682510311194, "grad_norm": 5.404395266584007, "learning_rate": 5.115420069052453e-06, "loss": 0.24136276245117189, "step": 58030 }, { "epoch": 0.5018114845526628, "grad_norm": 2.3835588713414686, "learning_rate": 5.115275603485615e-06, "loss": 0.08918838500976563, "step": 58035 }, { "epoch": 0.501854718074206, "grad_norm": 0.5616307409244186, "learning_rate": 5.115131128163314e-06, "loss": 0.07995719909667968, "step": 58040 }, { "epoch": 0.5018979515957492, "grad_norm": 0.2721931156354146, "learning_rate": 5.114986643086214e-06, "loss": 0.047503662109375, "step": 58045 }, { "epoch": 0.5019411851172926, "grad_norm": 16.095457753036907, "learning_rate": 5.114842148254981e-06, "loss": 0.14856643676757814, "step": 58050 }, { "epoch": 0.5019844186388358, "grad_norm": 1.2537853184513936, "learning_rate": 5.114697643670285e-06, "loss": 0.022088623046875, "step": 58055 }, { "epoch": 0.502027652160379, "grad_norm": 8.526768643095359, "learning_rate": 5.11455312933279e-06, "loss": 0.058147430419921875, "step": 58060 }, { "epoch": 0.5020708856819224, "grad_norm": 2.3332590362991943, "learning_rate": 5.1144086052431614e-06, "loss": 0.24812698364257812, "step": 58065 }, { "epoch": 0.5021141192034656, "grad_norm": 2.2217592048688877, "learning_rate": 5.114264071402069e-06, "loss": 0.08983154296875, "step": 58070 }, { "epoch": 0.5021573527250088, "grad_norm": 1.7882547183943975, "learning_rate": 5.114119527810177e-06, "loss": 0.0587371826171875, "step": 58075 }, { "epoch": 0.5022005862465522, "grad_norm": 2.1221150716778627, "learning_rate": 5.1139749744681514e-06, "loss": 0.43649826049804685, "step": 58080 }, { "epoch": 0.5022438197680954, "grad_norm": 3.675958150162464, "learning_rate": 5.1138304113766615e-06, "loss": 0.23875732421875, "step": 58085 }, { "epoch": 0.5022870532896386, "grad_norm": 7.033405938013344, "learning_rate": 5.113685838536371e-06, "loss": 0.2024749755859375, "step": 58090 }, { "epoch": 0.502330286811182, "grad_norm": 0.5763750450904697, "learning_rate": 5.11354125594795e-06, "loss": 0.0965179443359375, "step": 58095 }, { "epoch": 0.5023735203327252, "grad_norm": 24.976298429539014, "learning_rate": 5.113396663612063e-06, "loss": 0.15729904174804688, "step": 58100 }, { "epoch": 0.5024167538542684, "grad_norm": 15.97629122331392, "learning_rate": 5.113252061529376e-06, "loss": 0.142144775390625, "step": 58105 }, { "epoch": 0.5024599873758117, "grad_norm": 3.042754208723532, "learning_rate": 5.113107449700559e-06, "loss": 0.11105194091796874, "step": 58110 }, { "epoch": 0.502503220897355, "grad_norm": 0.22713047937749417, "learning_rate": 5.112962828126276e-06, "loss": 0.10142669677734376, "step": 58115 }, { "epoch": 0.5025464544188982, "grad_norm": 14.21114547378822, "learning_rate": 5.112818196807195e-06, "loss": 0.3303314208984375, "step": 58120 }, { "epoch": 0.5025896879404415, "grad_norm": 0.5221272734941349, "learning_rate": 5.112673555743985e-06, "loss": 0.18174896240234376, "step": 58125 }, { "epoch": 0.5026329214619848, "grad_norm": 3.6370696713865778, "learning_rate": 5.112528904937309e-06, "loss": 0.10067596435546874, "step": 58130 }, { "epoch": 0.502676154983528, "grad_norm": 0.684254976617005, "learning_rate": 5.1123842443878365e-06, "loss": 0.06774368286132812, "step": 58135 }, { "epoch": 0.5027193885050713, "grad_norm": 8.866483461599787, "learning_rate": 5.112239574096235e-06, "loss": 0.05024871826171875, "step": 58140 }, { "epoch": 0.5027626220266146, "grad_norm": 2.3004234907431265, "learning_rate": 5.112094894063172e-06, "loss": 0.23303375244140626, "step": 58145 }, { "epoch": 0.5028058555481578, "grad_norm": 15.756613373680269, "learning_rate": 5.111950204289314e-06, "loss": 0.070928955078125, "step": 58150 }, { "epoch": 0.502849089069701, "grad_norm": 28.928831660260457, "learning_rate": 5.111805504775327e-06, "loss": 0.4179847717285156, "step": 58155 }, { "epoch": 0.5028923225912444, "grad_norm": 25.874550675612078, "learning_rate": 5.11166079552188e-06, "loss": 0.27395477294921877, "step": 58160 }, { "epoch": 0.5029355561127876, "grad_norm": 1.5074464273009773, "learning_rate": 5.111516076529641e-06, "loss": 0.08956680297851563, "step": 58165 }, { "epoch": 0.5029787896343308, "grad_norm": 85.24319783469986, "learning_rate": 5.111371347799276e-06, "loss": 0.305816650390625, "step": 58170 }, { "epoch": 0.5030220231558742, "grad_norm": 5.227825871129071, "learning_rate": 5.111226609331451e-06, "loss": 0.053118896484375, "step": 58175 }, { "epoch": 0.5030652566774174, "grad_norm": 6.323069382940032, "learning_rate": 5.111081861126838e-06, "loss": 0.1377716064453125, "step": 58180 }, { "epoch": 0.5031084901989606, "grad_norm": 1.1469184427799737, "learning_rate": 5.110937103186101e-06, "loss": 0.073492431640625, "step": 58185 }, { "epoch": 0.503151723720504, "grad_norm": 7.869502194688199, "learning_rate": 5.110792335509909e-06, "loss": 0.12546558380126954, "step": 58190 }, { "epoch": 0.5031949572420472, "grad_norm": 22.492534625057427, "learning_rate": 5.110647558098929e-06, "loss": 0.0971221923828125, "step": 58195 }, { "epoch": 0.5032381907635904, "grad_norm": 6.823477032161401, "learning_rate": 5.110502770953829e-06, "loss": 0.224560546875, "step": 58200 }, { "epoch": 0.5032814242851337, "grad_norm": 14.283368852536281, "learning_rate": 5.110357974075276e-06, "loss": 0.125909423828125, "step": 58205 }, { "epoch": 0.503324657806677, "grad_norm": 4.941379944680487, "learning_rate": 5.110213167463939e-06, "loss": 0.2491851806640625, "step": 58210 }, { "epoch": 0.5033678913282202, "grad_norm": 23.015994107081152, "learning_rate": 5.110068351120486e-06, "loss": 0.2579833984375, "step": 58215 }, { "epoch": 0.5034111248497635, "grad_norm": 16.540219384261025, "learning_rate": 5.109923525045584e-06, "loss": 0.395166015625, "step": 58220 }, { "epoch": 0.5034543583713068, "grad_norm": 25.965905599335226, "learning_rate": 5.1097786892399e-06, "loss": 0.140460205078125, "step": 58225 }, { "epoch": 0.50349759189285, "grad_norm": 2.1578406311982943, "learning_rate": 5.109633843704104e-06, "loss": 0.09796142578125, "step": 58230 }, { "epoch": 0.5035408254143933, "grad_norm": 7.101431508601173, "learning_rate": 5.109488988438863e-06, "loss": 0.11928558349609375, "step": 58235 }, { "epoch": 0.5035840589359366, "grad_norm": 2.0191240751507853, "learning_rate": 5.109344123444845e-06, "loss": 0.4951080322265625, "step": 58240 }, { "epoch": 0.5036272924574798, "grad_norm": 0.5896727531617437, "learning_rate": 5.109199248722719e-06, "loss": 0.173016357421875, "step": 58245 }, { "epoch": 0.5036705259790231, "grad_norm": 17.531437891974345, "learning_rate": 5.109054364273152e-06, "loss": 0.3712738037109375, "step": 58250 }, { "epoch": 0.5037137595005664, "grad_norm": 11.868196235363355, "learning_rate": 5.108909470096813e-06, "loss": 0.0634429931640625, "step": 58255 }, { "epoch": 0.5037569930221096, "grad_norm": 2.8172046664399804, "learning_rate": 5.108764566194368e-06, "loss": 0.050382232666015624, "step": 58260 }, { "epoch": 0.5038002265436529, "grad_norm": 2.2844209178361896, "learning_rate": 5.10861965256649e-06, "loss": 0.043695831298828126, "step": 58265 }, { "epoch": 0.5038434600651962, "grad_norm": 8.665054419928172, "learning_rate": 5.108474729213842e-06, "loss": 0.114129638671875, "step": 58270 }, { "epoch": 0.5038866935867394, "grad_norm": 0.2960368659219461, "learning_rate": 5.1083297961370965e-06, "loss": 0.092010498046875, "step": 58275 }, { "epoch": 0.5039299271082827, "grad_norm": 36.87212861043012, "learning_rate": 5.10818485333692e-06, "loss": 0.239404296875, "step": 58280 }, { "epoch": 0.5039731606298259, "grad_norm": 2.0146760393337404, "learning_rate": 5.10803990081398e-06, "loss": 0.1269927978515625, "step": 58285 }, { "epoch": 0.5040163941513692, "grad_norm": 2.6508790944047855, "learning_rate": 5.107894938568948e-06, "loss": 0.1364429473876953, "step": 58290 }, { "epoch": 0.5040596276729125, "grad_norm": 0.3605493778205464, "learning_rate": 5.107749966602489e-06, "loss": 0.1863006591796875, "step": 58295 }, { "epoch": 0.5041028611944557, "grad_norm": 111.72431846754994, "learning_rate": 5.107604984915275e-06, "loss": 0.29903564453125, "step": 58300 }, { "epoch": 0.504146094715999, "grad_norm": 3.229780271984453, "learning_rate": 5.107459993507972e-06, "loss": 0.0803445816040039, "step": 58305 }, { "epoch": 0.5041893282375423, "grad_norm": 41.02010137088256, "learning_rate": 5.10731499238125e-06, "loss": 0.16351318359375, "step": 58310 }, { "epoch": 0.5042325617590855, "grad_norm": 1.7301687790526543, "learning_rate": 5.107169981535778e-06, "loss": 0.04747467041015625, "step": 58315 }, { "epoch": 0.5042757952806288, "grad_norm": 9.334574866162173, "learning_rate": 5.107024960972224e-06, "loss": 0.07204055786132812, "step": 58320 }, { "epoch": 0.504319028802172, "grad_norm": 16.18711723807522, "learning_rate": 5.106879930691257e-06, "loss": 0.08303966522216796, "step": 58325 }, { "epoch": 0.5043622623237153, "grad_norm": 0.3427434431550981, "learning_rate": 5.106734890693545e-06, "loss": 0.16353530883789064, "step": 58330 }, { "epoch": 0.5044054958452586, "grad_norm": 0.614324826449093, "learning_rate": 5.106589840979759e-06, "loss": 0.316021728515625, "step": 58335 }, { "epoch": 0.5044487293668019, "grad_norm": 5.9208285145734285, "learning_rate": 5.106444781550567e-06, "loss": 0.38200531005859373, "step": 58340 }, { "epoch": 0.5044919628883451, "grad_norm": 7.61442890748541, "learning_rate": 5.106299712406637e-06, "loss": 0.15279693603515626, "step": 58345 }, { "epoch": 0.5045351964098884, "grad_norm": 2.5050070883044637, "learning_rate": 5.106154633548639e-06, "loss": 0.21417999267578125, "step": 58350 }, { "epoch": 0.5045784299314316, "grad_norm": 5.179098813759307, "learning_rate": 5.106009544977242e-06, "loss": 0.120758056640625, "step": 58355 }, { "epoch": 0.5046216634529749, "grad_norm": 24.27585311477738, "learning_rate": 5.105864446693116e-06, "loss": 0.22369384765625, "step": 58360 }, { "epoch": 0.5046648969745182, "grad_norm": 6.326276457225325, "learning_rate": 5.105719338696927e-06, "loss": 0.39768524169921876, "step": 58365 }, { "epoch": 0.5047081304960614, "grad_norm": 0.9083646360855644, "learning_rate": 5.105574220989349e-06, "loss": 0.1221435546875, "step": 58370 }, { "epoch": 0.5047513640176047, "grad_norm": 19.885784224071873, "learning_rate": 5.105429093571047e-06, "loss": 0.345977783203125, "step": 58375 }, { "epoch": 0.5047945975391479, "grad_norm": 6.683351863634401, "learning_rate": 5.105283956442692e-06, "loss": 0.3087627410888672, "step": 58380 }, { "epoch": 0.5048378310606912, "grad_norm": 1.8643146372359651, "learning_rate": 5.1051388096049544e-06, "loss": 0.07810134887695312, "step": 58385 }, { "epoch": 0.5048810645822345, "grad_norm": 16.362531519318267, "learning_rate": 5.104993653058502e-06, "loss": 0.09706954956054688, "step": 58390 }, { "epoch": 0.5049242981037777, "grad_norm": 2.5880806151257345, "learning_rate": 5.104848486804006e-06, "loss": 0.034488677978515625, "step": 58395 }, { "epoch": 0.504967531625321, "grad_norm": 24.766466372410463, "learning_rate": 5.104703310842134e-06, "loss": 0.28097076416015626, "step": 58400 }, { "epoch": 0.5050107651468643, "grad_norm": 6.009972175660771, "learning_rate": 5.104558125173556e-06, "loss": 0.14974365234375, "step": 58405 }, { "epoch": 0.5050539986684075, "grad_norm": 20.04970843750249, "learning_rate": 5.104412929798942e-06, "loss": 0.33011550903320314, "step": 58410 }, { "epoch": 0.5050972321899508, "grad_norm": 6.7689411539896565, "learning_rate": 5.104267724718961e-06, "loss": 0.12264556884765625, "step": 58415 }, { "epoch": 0.5051404657114941, "grad_norm": 22.48436253593333, "learning_rate": 5.104122509934284e-06, "loss": 0.06851425170898437, "step": 58420 }, { "epoch": 0.5051836992330373, "grad_norm": 37.833701367590635, "learning_rate": 5.10397728544558e-06, "loss": 0.4619598388671875, "step": 58425 }, { "epoch": 0.5052269327545806, "grad_norm": 2.4635953386373286, "learning_rate": 5.103832051253518e-06, "loss": 0.03226776123046875, "step": 58430 }, { "epoch": 0.5052701662761239, "grad_norm": 0.6103709554708504, "learning_rate": 5.103686807358769e-06, "loss": 0.0495269775390625, "step": 58435 }, { "epoch": 0.5053133997976671, "grad_norm": 4.188157958855802, "learning_rate": 5.103541553762003e-06, "loss": 0.02479248046875, "step": 58440 }, { "epoch": 0.5053566333192104, "grad_norm": 2.842247905720996, "learning_rate": 5.103396290463887e-06, "loss": 0.06822586059570312, "step": 58445 }, { "epoch": 0.5053998668407537, "grad_norm": 22.305871941059245, "learning_rate": 5.103251017465095e-06, "loss": 0.25584259033203127, "step": 58450 }, { "epoch": 0.5054431003622969, "grad_norm": 4.468621334658594, "learning_rate": 5.103105734766296e-06, "loss": 0.17350387573242188, "step": 58455 }, { "epoch": 0.5054863338838401, "grad_norm": 13.756116856180252, "learning_rate": 5.102960442368157e-06, "loss": 0.15797119140625, "step": 58460 }, { "epoch": 0.5055295674053835, "grad_norm": 4.043960298942402, "learning_rate": 5.102815140271352e-06, "loss": 0.075299072265625, "step": 58465 }, { "epoch": 0.5055728009269267, "grad_norm": 21.062377500916103, "learning_rate": 5.102669828476548e-06, "loss": 0.16854248046875, "step": 58470 }, { "epoch": 0.5056160344484699, "grad_norm": 10.46666604276322, "learning_rate": 5.102524506984418e-06, "loss": 0.2656093597412109, "step": 58475 }, { "epoch": 0.5056592679700133, "grad_norm": 1.4947179200900549, "learning_rate": 5.102379175795629e-06, "loss": 0.10502395629882813, "step": 58480 }, { "epoch": 0.5057025014915565, "grad_norm": 25.269286611571943, "learning_rate": 5.102233834910853e-06, "loss": 0.6532150268554687, "step": 58485 }, { "epoch": 0.5057457350130997, "grad_norm": 7.199495661709095, "learning_rate": 5.102088484330762e-06, "loss": 0.1030843734741211, "step": 58490 }, { "epoch": 0.5057889685346431, "grad_norm": 0.5694076294788929, "learning_rate": 5.101943124056024e-06, "loss": 0.16009674072265626, "step": 58495 }, { "epoch": 0.5058322020561863, "grad_norm": 3.5924360263972046, "learning_rate": 5.10179775408731e-06, "loss": 0.0553680419921875, "step": 58500 }, { "epoch": 0.5058754355777295, "grad_norm": 22.060948886546292, "learning_rate": 5.10165237442529e-06, "loss": 0.20927734375, "step": 58505 }, { "epoch": 0.5059186690992729, "grad_norm": 3.2205793103306037, "learning_rate": 5.101506985070635e-06, "loss": 0.07080307006835937, "step": 58510 }, { "epoch": 0.5059619026208161, "grad_norm": 5.524930635475329, "learning_rate": 5.101361586024016e-06, "loss": 0.11331787109375, "step": 58515 }, { "epoch": 0.5060051361423593, "grad_norm": 4.886268608600875, "learning_rate": 5.101216177286103e-06, "loss": 0.137408447265625, "step": 58520 }, { "epoch": 0.5060483696639027, "grad_norm": 0.18749083258902666, "learning_rate": 5.101070758857567e-06, "loss": 0.38136444091796873, "step": 58525 }, { "epoch": 0.5060916031854459, "grad_norm": 1.459257000549839, "learning_rate": 5.100925330739077e-06, "loss": 0.28376922607421873, "step": 58530 }, { "epoch": 0.5061348367069891, "grad_norm": 6.700514114203435, "learning_rate": 5.100779892931307e-06, "loss": 0.16392478942871094, "step": 58535 }, { "epoch": 0.5061780702285323, "grad_norm": 24.70479567129934, "learning_rate": 5.100634445434924e-06, "loss": 0.16506175994873046, "step": 58540 }, { "epoch": 0.5062213037500757, "grad_norm": 28.549619074468787, "learning_rate": 5.100488988250601e-06, "loss": 0.1369333267211914, "step": 58545 }, { "epoch": 0.5062645372716189, "grad_norm": 0.9221089541494399, "learning_rate": 5.100343521379009e-06, "loss": 0.20063323974609376, "step": 58550 }, { "epoch": 0.5063077707931621, "grad_norm": 12.288190515160794, "learning_rate": 5.100198044820817e-06, "loss": 0.037371826171875, "step": 58555 }, { "epoch": 0.5063510043147055, "grad_norm": 16.377898091838535, "learning_rate": 5.100052558576698e-06, "loss": 0.0926116943359375, "step": 58560 }, { "epoch": 0.5063942378362487, "grad_norm": 7.9505958693353636, "learning_rate": 5.0999070626473226e-06, "loss": 0.055889892578125, "step": 58565 }, { "epoch": 0.5064374713577919, "grad_norm": 35.89469358547965, "learning_rate": 5.0997615570333605e-06, "loss": 0.21138458251953124, "step": 58570 }, { "epoch": 0.5064807048793353, "grad_norm": 25.475602280382542, "learning_rate": 5.099616041735484e-06, "loss": 0.082757568359375, "step": 58575 }, { "epoch": 0.5065239384008785, "grad_norm": 5.296214802328262, "learning_rate": 5.099470516754364e-06, "loss": 0.40057525634765623, "step": 58580 }, { "epoch": 0.5065671719224217, "grad_norm": 5.102919159794628, "learning_rate": 5.099324982090671e-06, "loss": 0.3072509765625, "step": 58585 }, { "epoch": 0.5066104054439651, "grad_norm": 3.5530869167239945, "learning_rate": 5.099179437745077e-06, "loss": 0.09729461669921875, "step": 58590 }, { "epoch": 0.5066536389655083, "grad_norm": 4.280234558801269, "learning_rate": 5.0990338837182534e-06, "loss": 0.1264495849609375, "step": 58595 }, { "epoch": 0.5066968724870515, "grad_norm": 33.373989174025404, "learning_rate": 5.098888320010871e-06, "loss": 0.4437126159667969, "step": 58600 }, { "epoch": 0.5067401060085949, "grad_norm": 4.67301590524691, "learning_rate": 5.0987427466236e-06, "loss": 0.11646728515625, "step": 58605 }, { "epoch": 0.5067833395301381, "grad_norm": 5.543277125297988, "learning_rate": 5.098597163557114e-06, "loss": 0.07039642333984375, "step": 58610 }, { "epoch": 0.5068265730516813, "grad_norm": 0.5556420454391626, "learning_rate": 5.0984515708120825e-06, "loss": 0.10761260986328125, "step": 58615 }, { "epoch": 0.5068698065732247, "grad_norm": 0.9052945941667316, "learning_rate": 5.098305968389179e-06, "loss": 0.14166107177734374, "step": 58620 }, { "epoch": 0.5069130400947679, "grad_norm": 7.504740575395125, "learning_rate": 5.098160356289072e-06, "loss": 0.15557022094726564, "step": 58625 }, { "epoch": 0.5069562736163111, "grad_norm": 50.31233149078877, "learning_rate": 5.098014734512437e-06, "loss": 0.46276702880859377, "step": 58630 }, { "epoch": 0.5069995071378544, "grad_norm": 1.1611046051279832, "learning_rate": 5.097869103059942e-06, "loss": 0.21932449340820312, "step": 58635 }, { "epoch": 0.5070427406593977, "grad_norm": 3.7508815487721234, "learning_rate": 5.0977234619322595e-06, "loss": 0.033217239379882815, "step": 58640 }, { "epoch": 0.5070859741809409, "grad_norm": 33.70322363118192, "learning_rate": 5.097577811130064e-06, "loss": 0.4752197265625, "step": 58645 }, { "epoch": 0.5071292077024842, "grad_norm": 0.08321564650164344, "learning_rate": 5.0974321506540226e-06, "loss": 0.0904632568359375, "step": 58650 }, { "epoch": 0.5071724412240275, "grad_norm": 2.4744869083617274, "learning_rate": 5.09728648050481e-06, "loss": 0.2097320556640625, "step": 58655 }, { "epoch": 0.5072156747455707, "grad_norm": 1.3151383835892925, "learning_rate": 5.097140800683099e-06, "loss": 0.0593658447265625, "step": 58660 }, { "epoch": 0.507258908267114, "grad_norm": 4.679476563582301, "learning_rate": 5.096995111189559e-06, "loss": 0.10181884765625, "step": 58665 }, { "epoch": 0.5073021417886573, "grad_norm": 0.7106470552490414, "learning_rate": 5.096849412024862e-06, "loss": 0.2187591552734375, "step": 58670 }, { "epoch": 0.5073453753102005, "grad_norm": 1.730552761199492, "learning_rate": 5.096703703189682e-06, "loss": 0.16534271240234374, "step": 58675 }, { "epoch": 0.5073886088317437, "grad_norm": 4.741715596713764, "learning_rate": 5.096557984684689e-06, "loss": 0.20013504028320311, "step": 58680 }, { "epoch": 0.5074318423532871, "grad_norm": 7.720441326934642, "learning_rate": 5.0964122565105555e-06, "loss": 0.20128173828125, "step": 58685 }, { "epoch": 0.5074750758748303, "grad_norm": 0.6846634854935398, "learning_rate": 5.096266518667955e-06, "loss": 0.1734039306640625, "step": 58690 }, { "epoch": 0.5075183093963735, "grad_norm": 9.572774462996378, "learning_rate": 5.096120771157558e-06, "loss": 0.649761962890625, "step": 58695 }, { "epoch": 0.5075615429179169, "grad_norm": 3.6097644797168082, "learning_rate": 5.0959750139800365e-06, "loss": 0.11805992126464844, "step": 58700 }, { "epoch": 0.5076047764394601, "grad_norm": 0.26115251175663134, "learning_rate": 5.095829247136065e-06, "loss": 0.19698333740234375, "step": 58705 }, { "epoch": 0.5076480099610033, "grad_norm": 20.771025831214327, "learning_rate": 5.095683470626314e-06, "loss": 0.13390655517578126, "step": 58710 }, { "epoch": 0.5076912434825466, "grad_norm": 43.305274286316845, "learning_rate": 5.095537684451455e-06, "loss": 0.17657241821289063, "step": 58715 }, { "epoch": 0.5077344770040899, "grad_norm": 36.889269441501234, "learning_rate": 5.0953918886121615e-06, "loss": 0.4003749847412109, "step": 58720 }, { "epoch": 0.5077777105256331, "grad_norm": 9.10772334582518, "learning_rate": 5.095246083109106e-06, "loss": 0.19374542236328124, "step": 58725 }, { "epoch": 0.5078209440471764, "grad_norm": 31.893392961145487, "learning_rate": 5.09510026794296e-06, "loss": 0.154791259765625, "step": 58730 }, { "epoch": 0.5078641775687197, "grad_norm": 3.950584077981243, "learning_rate": 5.094954443114398e-06, "loss": 0.0616485595703125, "step": 58735 }, { "epoch": 0.5079074110902629, "grad_norm": 1.4687318974209007, "learning_rate": 5.094808608624091e-06, "loss": 0.10392837524414063, "step": 58740 }, { "epoch": 0.5079506446118062, "grad_norm": 9.136558301225344, "learning_rate": 5.094662764472711e-06, "loss": 0.25933151245117186, "step": 58745 }, { "epoch": 0.5079938781333495, "grad_norm": 10.683130265151275, "learning_rate": 5.094516910660932e-06, "loss": 0.160009765625, "step": 58750 }, { "epoch": 0.5080371116548927, "grad_norm": 13.720063297472253, "learning_rate": 5.094371047189426e-06, "loss": 0.401495361328125, "step": 58755 }, { "epoch": 0.508080345176436, "grad_norm": 0.3649364401015869, "learning_rate": 5.094225174058866e-06, "loss": 0.12626380920410157, "step": 58760 }, { "epoch": 0.5081235786979793, "grad_norm": 0.8185729847270824, "learning_rate": 5.094079291269924e-06, "loss": 0.244781494140625, "step": 58765 }, { "epoch": 0.5081668122195225, "grad_norm": 40.97059757304982, "learning_rate": 5.093933398823273e-06, "loss": 0.22286949157714844, "step": 58770 }, { "epoch": 0.5082100457410658, "grad_norm": 1.1890230146814222, "learning_rate": 5.093787496719587e-06, "loss": 0.204071044921875, "step": 58775 }, { "epoch": 0.5082532792626091, "grad_norm": 8.930572830762483, "learning_rate": 5.093641584959538e-06, "loss": 0.27587738037109377, "step": 58780 }, { "epoch": 0.5082965127841523, "grad_norm": 26.26806872259497, "learning_rate": 5.093495663543798e-06, "loss": 0.14892120361328126, "step": 58785 }, { "epoch": 0.5083397463056956, "grad_norm": 0.35713482841810773, "learning_rate": 5.093349732473043e-06, "loss": 0.12375946044921875, "step": 58790 }, { "epoch": 0.5083829798272389, "grad_norm": 0.3275920903026398, "learning_rate": 5.0932037917479425e-06, "loss": 0.096063232421875, "step": 58795 }, { "epoch": 0.5084262133487821, "grad_norm": 14.160922056047271, "learning_rate": 5.093057841369172e-06, "loss": 0.12682723999023438, "step": 58800 }, { "epoch": 0.5084694468703254, "grad_norm": 27.71113388587178, "learning_rate": 5.092911881337403e-06, "loss": 0.17957611083984376, "step": 58805 }, { "epoch": 0.5085126803918686, "grad_norm": 1.063448537242153, "learning_rate": 5.092765911653309e-06, "loss": 0.089617919921875, "step": 58810 }, { "epoch": 0.5085559139134119, "grad_norm": 3.8541293300631563, "learning_rate": 5.0926199323175646e-06, "loss": 0.12195072174072266, "step": 58815 }, { "epoch": 0.5085991474349552, "grad_norm": 15.514965839527125, "learning_rate": 5.092473943330841e-06, "loss": 0.09091339111328126, "step": 58820 }, { "epoch": 0.5086423809564984, "grad_norm": 2.1756809637258376, "learning_rate": 5.092327944693813e-06, "loss": 0.24327850341796875, "step": 58825 }, { "epoch": 0.5086856144780417, "grad_norm": 0.09961726022386808, "learning_rate": 5.092181936407153e-06, "loss": 0.14097633361816406, "step": 58830 }, { "epoch": 0.508728847999585, "grad_norm": 11.570968561683424, "learning_rate": 5.092035918471535e-06, "loss": 0.13635101318359374, "step": 58835 }, { "epoch": 0.5087720815211282, "grad_norm": 27.151916399079738, "learning_rate": 5.091889890887632e-06, "loss": 0.20036544799804687, "step": 58840 }, { "epoch": 0.5088153150426715, "grad_norm": 5.224601551324328, "learning_rate": 5.091743853656118e-06, "loss": 0.06914215087890625, "step": 58845 }, { "epoch": 0.5088585485642148, "grad_norm": 36.14550745018215, "learning_rate": 5.0915978067776656e-06, "loss": 0.21033477783203125, "step": 58850 }, { "epoch": 0.508901782085758, "grad_norm": 2.552004457065109, "learning_rate": 5.09145175025295e-06, "loss": 0.14837493896484374, "step": 58855 }, { "epoch": 0.5089450156073013, "grad_norm": 8.99109271375701, "learning_rate": 5.091305684082643e-06, "loss": 0.07902145385742188, "step": 58860 }, { "epoch": 0.5089882491288445, "grad_norm": 3.002439197861348, "learning_rate": 5.091159608267419e-06, "loss": 0.2541351318359375, "step": 58865 }, { "epoch": 0.5090314826503878, "grad_norm": 9.569729866276736, "learning_rate": 5.091013522807952e-06, "loss": 0.11909637451171876, "step": 58870 }, { "epoch": 0.5090747161719311, "grad_norm": 26.62158625986108, "learning_rate": 5.090867427704916e-06, "loss": 0.2305389404296875, "step": 58875 }, { "epoch": 0.5091179496934743, "grad_norm": 0.12491133911116821, "learning_rate": 5.090721322958982e-06, "loss": 0.08560371398925781, "step": 58880 }, { "epoch": 0.5091611832150176, "grad_norm": 43.734477545418464, "learning_rate": 5.090575208570828e-06, "loss": 0.4970722198486328, "step": 58885 }, { "epoch": 0.5092044167365608, "grad_norm": 3.582041867138388, "learning_rate": 5.090429084541125e-06, "loss": 0.2341522216796875, "step": 58890 }, { "epoch": 0.5092476502581041, "grad_norm": 0.8240288100386229, "learning_rate": 5.090282950870548e-06, "loss": 0.05334320068359375, "step": 58895 }, { "epoch": 0.5092908837796474, "grad_norm": 8.303507620133505, "learning_rate": 5.090136807559771e-06, "loss": 0.147198486328125, "step": 58900 }, { "epoch": 0.5093341173011906, "grad_norm": 1.4341193260425498, "learning_rate": 5.089990654609467e-06, "loss": 0.18205947875976564, "step": 58905 }, { "epoch": 0.5093773508227339, "grad_norm": 0.5525265523259592, "learning_rate": 5.089844492020311e-06, "loss": 0.06371040344238281, "step": 58910 }, { "epoch": 0.5094205843442772, "grad_norm": 31.441794149854353, "learning_rate": 5.089698319792978e-06, "loss": 0.26961669921875, "step": 58915 }, { "epoch": 0.5094638178658204, "grad_norm": 2.466362766336231, "learning_rate": 5.08955213792814e-06, "loss": 0.13581981658935546, "step": 58920 }, { "epoch": 0.5095070513873637, "grad_norm": 4.104287296342296, "learning_rate": 5.089405946426472e-06, "loss": 0.2065887451171875, "step": 58925 }, { "epoch": 0.509550284908907, "grad_norm": 15.61915526597627, "learning_rate": 5.089259745288649e-06, "loss": 0.314697265625, "step": 58930 }, { "epoch": 0.5095935184304502, "grad_norm": 24.48360354700155, "learning_rate": 5.0891135345153445e-06, "loss": 0.70880126953125, "step": 58935 }, { "epoch": 0.5096367519519935, "grad_norm": 0.9140941425845702, "learning_rate": 5.088967314107232e-06, "loss": 0.25937576293945314, "step": 58940 }, { "epoch": 0.5096799854735368, "grad_norm": 63.90125618236149, "learning_rate": 5.088821084064988e-06, "loss": 0.12566680908203126, "step": 58945 }, { "epoch": 0.50972321899508, "grad_norm": 3.0149330106920305, "learning_rate": 5.088674844389285e-06, "loss": 0.2377471923828125, "step": 58950 }, { "epoch": 0.5097664525166233, "grad_norm": 9.484373028791733, "learning_rate": 5.088528595080799e-06, "loss": 0.05458984375, "step": 58955 }, { "epoch": 0.5098096860381666, "grad_norm": 70.44483692811991, "learning_rate": 5.088382336140203e-06, "loss": 0.2149810791015625, "step": 58960 }, { "epoch": 0.5098529195597098, "grad_norm": 4.909417827404448, "learning_rate": 5.088236067568173e-06, "loss": 0.19305410385131835, "step": 58965 }, { "epoch": 0.5098961530812531, "grad_norm": 9.770285645449558, "learning_rate": 5.088089789365382e-06, "loss": 0.24738616943359376, "step": 58970 }, { "epoch": 0.5099393866027964, "grad_norm": 1.2996827639434076, "learning_rate": 5.087943501532505e-06, "loss": 0.03849220275878906, "step": 58975 }, { "epoch": 0.5099826201243396, "grad_norm": 0.4539507467991199, "learning_rate": 5.087797204070218e-06, "loss": 0.040008544921875, "step": 58980 }, { "epoch": 0.5100258536458828, "grad_norm": 22.291340799539054, "learning_rate": 5.087650896979195e-06, "loss": 0.3010528564453125, "step": 58985 }, { "epoch": 0.5100690871674262, "grad_norm": 43.44855789254043, "learning_rate": 5.087504580260109e-06, "loss": 0.2984283447265625, "step": 58990 }, { "epoch": 0.5101123206889694, "grad_norm": 34.69304624381231, "learning_rate": 5.087358253913637e-06, "loss": 0.25619659423828123, "step": 58995 }, { "epoch": 0.5101555542105126, "grad_norm": 6.451268156262606, "learning_rate": 5.087211917940454e-06, "loss": 0.12373619079589844, "step": 59000 }, { "epoch": 0.510198787732056, "grad_norm": 4.905807088071401, "learning_rate": 5.087065572341233e-06, "loss": 0.280950927734375, "step": 59005 }, { "epoch": 0.5102420212535992, "grad_norm": 0.31864713301508707, "learning_rate": 5.086919217116651e-06, "loss": 0.13026885986328124, "step": 59010 }, { "epoch": 0.5102852547751424, "grad_norm": 4.320518625054249, "learning_rate": 5.0867728522673816e-06, "loss": 0.180218505859375, "step": 59015 }, { "epoch": 0.5103284882966858, "grad_norm": 1.6440194866711038, "learning_rate": 5.086626477794099e-06, "loss": 0.0623138427734375, "step": 59020 }, { "epoch": 0.510371721818229, "grad_norm": 11.707169610456825, "learning_rate": 5.086480093697481e-06, "loss": 0.24969329833984374, "step": 59025 }, { "epoch": 0.5104149553397722, "grad_norm": 12.435702158053568, "learning_rate": 5.0863336999782005e-06, "loss": 0.4763275146484375, "step": 59030 }, { "epoch": 0.5104581888613156, "grad_norm": 16.973918371306507, "learning_rate": 5.086187296636934e-06, "loss": 0.11416015625, "step": 59035 }, { "epoch": 0.5105014223828588, "grad_norm": 1.384795471630926, "learning_rate": 5.086040883674355e-06, "loss": 0.10225677490234375, "step": 59040 }, { "epoch": 0.510544655904402, "grad_norm": 2.952233752603848, "learning_rate": 5.085894461091141e-06, "loss": 0.39980316162109375, "step": 59045 }, { "epoch": 0.5105878894259454, "grad_norm": 2.3353646526535576, "learning_rate": 5.0857480288879646e-06, "loss": 0.1435546875, "step": 59050 }, { "epoch": 0.5106311229474886, "grad_norm": 0.4290517089143633, "learning_rate": 5.085601587065504e-06, "loss": 0.4010883331298828, "step": 59055 }, { "epoch": 0.5106743564690318, "grad_norm": 0.34571062976180006, "learning_rate": 5.085455135624432e-06, "loss": 0.2302093505859375, "step": 59060 }, { "epoch": 0.510717589990575, "grad_norm": 4.068035936556929, "learning_rate": 5.085308674565426e-06, "loss": 0.11572265625, "step": 59065 }, { "epoch": 0.5107608235121184, "grad_norm": 4.370410570805561, "learning_rate": 5.085162203889161e-06, "loss": 0.061289215087890626, "step": 59070 }, { "epoch": 0.5108040570336616, "grad_norm": 14.422230353374143, "learning_rate": 5.085015723596311e-06, "loss": 0.08495559692382812, "step": 59075 }, { "epoch": 0.5108472905552048, "grad_norm": 63.74539300608407, "learning_rate": 5.0848692336875535e-06, "loss": 0.250897216796875, "step": 59080 }, { "epoch": 0.5108905240767482, "grad_norm": 17.460815059978327, "learning_rate": 5.084722734163562e-06, "loss": 0.076043701171875, "step": 59085 }, { "epoch": 0.5109337575982914, "grad_norm": 6.789414542087135, "learning_rate": 5.0845762250250155e-06, "loss": 0.08145217895507813, "step": 59090 }, { "epoch": 0.5109769911198346, "grad_norm": 15.098742839437666, "learning_rate": 5.084429706272587e-06, "loss": 0.08673095703125, "step": 59095 }, { "epoch": 0.511020224641378, "grad_norm": 26.966245150419148, "learning_rate": 5.084283177906953e-06, "loss": 0.3405303955078125, "step": 59100 }, { "epoch": 0.5110634581629212, "grad_norm": 2.1311132363821415, "learning_rate": 5.084136639928788e-06, "loss": 0.3059967041015625, "step": 59105 }, { "epoch": 0.5111066916844644, "grad_norm": 2.2832247537817434, "learning_rate": 5.08399009233877e-06, "loss": 0.2314697265625, "step": 59110 }, { "epoch": 0.5111499252060078, "grad_norm": 6.173343692137104, "learning_rate": 5.083843535137574e-06, "loss": 0.22488784790039062, "step": 59115 }, { "epoch": 0.511193158727551, "grad_norm": 13.121079190180476, "learning_rate": 5.083696968325875e-06, "loss": 0.2446746826171875, "step": 59120 }, { "epoch": 0.5112363922490942, "grad_norm": 5.085688842160829, "learning_rate": 5.08355039190435e-06, "loss": 0.25798568725585935, "step": 59125 }, { "epoch": 0.5112796257706376, "grad_norm": 1.098994317702414, "learning_rate": 5.083403805873674e-06, "loss": 0.204718017578125, "step": 59130 }, { "epoch": 0.5113228592921808, "grad_norm": 15.177524639954633, "learning_rate": 5.0832572102345245e-06, "loss": 0.6225486755371094, "step": 59135 }, { "epoch": 0.511366092813724, "grad_norm": 8.866337322958053, "learning_rate": 5.083110604987577e-06, "loss": 0.06755218505859376, "step": 59140 }, { "epoch": 0.5114093263352674, "grad_norm": 5.0098223957377, "learning_rate": 5.082963990133507e-06, "loss": 0.137548828125, "step": 59145 }, { "epoch": 0.5114525598568106, "grad_norm": 8.498283908623394, "learning_rate": 5.0828173656729915e-06, "loss": 0.0727386474609375, "step": 59150 }, { "epoch": 0.5114957933783538, "grad_norm": 4.744285176591805, "learning_rate": 5.082670731606706e-06, "loss": 0.10928192138671874, "step": 59155 }, { "epoch": 0.511539026899897, "grad_norm": 0.44281170556190635, "learning_rate": 5.0825240879353275e-06, "loss": 0.029619598388671876, "step": 59160 }, { "epoch": 0.5115822604214404, "grad_norm": 16.166936198353394, "learning_rate": 5.082377434659532e-06, "loss": 0.10601959228515626, "step": 59165 }, { "epoch": 0.5116254939429836, "grad_norm": 46.49313758210071, "learning_rate": 5.0822307717799955e-06, "loss": 0.192767333984375, "step": 59170 }, { "epoch": 0.5116687274645269, "grad_norm": 24.630385528513738, "learning_rate": 5.082084099297394e-06, "loss": 0.2679290771484375, "step": 59175 }, { "epoch": 0.5117119609860702, "grad_norm": 12.698622254508383, "learning_rate": 5.0819374172124055e-06, "loss": 0.2843498229980469, "step": 59180 }, { "epoch": 0.5117551945076134, "grad_norm": 14.654585732968759, "learning_rate": 5.081790725525706e-06, "loss": 0.10948486328125, "step": 59185 }, { "epoch": 0.5117984280291566, "grad_norm": 17.50596905527523, "learning_rate": 5.081644024237971e-06, "loss": 0.07970848083496093, "step": 59190 }, { "epoch": 0.5118416615507, "grad_norm": 24.855036139434944, "learning_rate": 5.081497313349877e-06, "loss": 0.33095703125, "step": 59195 }, { "epoch": 0.5118848950722432, "grad_norm": 3.5314842883101947, "learning_rate": 5.081350592862102e-06, "loss": 0.157940673828125, "step": 59200 }, { "epoch": 0.5119281285937864, "grad_norm": 1.6485033728926168, "learning_rate": 5.081203862775323e-06, "loss": 0.19193572998046876, "step": 59205 }, { "epoch": 0.5119713621153298, "grad_norm": 3.0444676099532115, "learning_rate": 5.081057123090214e-06, "loss": 0.246649169921875, "step": 59210 }, { "epoch": 0.512014595636873, "grad_norm": 2.4915320224359006, "learning_rate": 5.080910373807454e-06, "loss": 0.0814239501953125, "step": 59215 }, { "epoch": 0.5120578291584162, "grad_norm": 4.237347595347943, "learning_rate": 5.08076361492772e-06, "loss": 0.0954681396484375, "step": 59220 }, { "epoch": 0.5121010626799596, "grad_norm": 14.023788134867871, "learning_rate": 5.080616846451687e-06, "loss": 0.18916397094726561, "step": 59225 }, { "epoch": 0.5121442962015028, "grad_norm": 8.49159377964977, "learning_rate": 5.0804700683800325e-06, "loss": 0.14756126403808595, "step": 59230 }, { "epoch": 0.512187529723046, "grad_norm": 0.9891672268513153, "learning_rate": 5.0803232807134356e-06, "loss": 0.1327239990234375, "step": 59235 }, { "epoch": 0.5122307632445893, "grad_norm": 1.9007459804217106, "learning_rate": 5.080176483452571e-06, "loss": 0.064422607421875, "step": 59240 }, { "epoch": 0.5122739967661326, "grad_norm": 2.461180449392055, "learning_rate": 5.0800296765981155e-06, "loss": 0.25933685302734377, "step": 59245 }, { "epoch": 0.5123172302876758, "grad_norm": 27.729081078796863, "learning_rate": 5.079882860150747e-06, "loss": 0.13911666870117187, "step": 59250 }, { "epoch": 0.5123604638092191, "grad_norm": 12.74039708218306, "learning_rate": 5.0797360341111425e-06, "loss": 0.15916748046875, "step": 59255 }, { "epoch": 0.5124036973307624, "grad_norm": 1.2771790114374602, "learning_rate": 5.07958919847998e-06, "loss": 0.23895606994628907, "step": 59260 }, { "epoch": 0.5124469308523056, "grad_norm": 4.891911199872715, "learning_rate": 5.079442353257936e-06, "loss": 0.14917678833007814, "step": 59265 }, { "epoch": 0.5124901643738489, "grad_norm": 41.097313885220935, "learning_rate": 5.079295498445686e-06, "loss": 0.4720344543457031, "step": 59270 }, { "epoch": 0.5125333978953922, "grad_norm": 2.6869858240901277, "learning_rate": 5.0791486340439104e-06, "loss": 0.48262786865234375, "step": 59275 }, { "epoch": 0.5125766314169354, "grad_norm": 8.363615034304427, "learning_rate": 5.079001760053285e-06, "loss": 0.309295654296875, "step": 59280 }, { "epoch": 0.5126198649384787, "grad_norm": 7.998791874685601, "learning_rate": 5.078854876474486e-06, "loss": 0.1290283203125, "step": 59285 }, { "epoch": 0.512663098460022, "grad_norm": 25.479433163468304, "learning_rate": 5.078707983308193e-06, "loss": 0.14662246704101561, "step": 59290 }, { "epoch": 0.5127063319815652, "grad_norm": 2.3003377484152647, "learning_rate": 5.078561080555082e-06, "loss": 0.17200260162353515, "step": 59295 }, { "epoch": 0.5127495655031085, "grad_norm": 7.9596147772471335, "learning_rate": 5.078414168215832e-06, "loss": 0.3182499885559082, "step": 59300 }, { "epoch": 0.5127927990246518, "grad_norm": 29.973903255454857, "learning_rate": 5.078267246291118e-06, "loss": 0.3337898254394531, "step": 59305 }, { "epoch": 0.512836032546195, "grad_norm": 0.13825789107348654, "learning_rate": 5.07812031478162e-06, "loss": 0.07801055908203125, "step": 59310 }, { "epoch": 0.5128792660677383, "grad_norm": 7.576966636111559, "learning_rate": 5.0779733736880146e-06, "loss": 0.167620849609375, "step": 59315 }, { "epoch": 0.5129224995892816, "grad_norm": 13.639938541063012, "learning_rate": 5.077826423010979e-06, "loss": 0.1040191650390625, "step": 59320 }, { "epoch": 0.5129657331108248, "grad_norm": 15.778995023463802, "learning_rate": 5.077679462751192e-06, "loss": 0.08518524169921875, "step": 59325 }, { "epoch": 0.5130089666323681, "grad_norm": 24.940769431433704, "learning_rate": 5.077532492909331e-06, "loss": 0.24249267578125, "step": 59330 }, { "epoch": 0.5130522001539113, "grad_norm": 2.4392613267101555, "learning_rate": 5.077385513486074e-06, "loss": 0.1479583740234375, "step": 59335 }, { "epoch": 0.5130954336754546, "grad_norm": 4.534651754354933, "learning_rate": 5.077238524482098e-06, "loss": 0.12086868286132812, "step": 59340 }, { "epoch": 0.5131386671969979, "grad_norm": 31.641883941694367, "learning_rate": 5.0770915258980825e-06, "loss": 0.127362060546875, "step": 59345 }, { "epoch": 0.5131819007185411, "grad_norm": 4.5270608117917215, "learning_rate": 5.076944517734703e-06, "loss": 0.0892974853515625, "step": 59350 }, { "epoch": 0.5132251342400844, "grad_norm": 1.5248976291696035, "learning_rate": 5.076797499992639e-06, "loss": 0.167047119140625, "step": 59355 }, { "epoch": 0.5132683677616277, "grad_norm": 1.9269779047546065, "learning_rate": 5.07665047267257e-06, "loss": 0.054186248779296876, "step": 59360 }, { "epoch": 0.5133116012831709, "grad_norm": 13.996142970397896, "learning_rate": 5.076503435775171e-06, "loss": 0.1128692626953125, "step": 59365 }, { "epoch": 0.5133548348047142, "grad_norm": 2.4097458183665186, "learning_rate": 5.076356389301121e-06, "loss": 0.04903564453125, "step": 59370 }, { "epoch": 0.5133980683262575, "grad_norm": 10.460879149542308, "learning_rate": 5.076209333251101e-06, "loss": 0.12706756591796875, "step": 59375 }, { "epoch": 0.5134413018478007, "grad_norm": 10.210687155413899, "learning_rate": 5.076062267625786e-06, "loss": 0.208642578125, "step": 59380 }, { "epoch": 0.513484535369344, "grad_norm": 2.543225379835866, "learning_rate": 5.075915192425855e-06, "loss": 0.0569610595703125, "step": 59385 }, { "epoch": 0.5135277688908872, "grad_norm": 4.045452754340043, "learning_rate": 5.075768107651987e-06, "loss": 0.0677642822265625, "step": 59390 }, { "epoch": 0.5135710024124305, "grad_norm": 5.439191250503907, "learning_rate": 5.07562101330486e-06, "loss": 0.3730316162109375, "step": 59395 }, { "epoch": 0.5136142359339738, "grad_norm": 53.54941608052907, "learning_rate": 5.075473909385151e-06, "loss": 0.36411056518554685, "step": 59400 }, { "epoch": 0.513657469455517, "grad_norm": 2.071579712842746, "learning_rate": 5.075326795893541e-06, "loss": 0.4244110107421875, "step": 59405 }, { "epoch": 0.5137007029770603, "grad_norm": 1.0960722588567882, "learning_rate": 5.075179672830707e-06, "loss": 0.2093578338623047, "step": 59410 }, { "epoch": 0.5137439364986035, "grad_norm": 16.12513347223254, "learning_rate": 5.075032540197328e-06, "loss": 0.22308502197265626, "step": 59415 }, { "epoch": 0.5137871700201468, "grad_norm": 2.445019764226785, "learning_rate": 5.074885397994082e-06, "loss": 0.17231521606445313, "step": 59420 }, { "epoch": 0.5138304035416901, "grad_norm": 5.109586532174078, "learning_rate": 5.074738246221647e-06, "loss": 0.04384260177612305, "step": 59425 }, { "epoch": 0.5138736370632333, "grad_norm": 25.83052040950035, "learning_rate": 5.074591084880704e-06, "loss": 0.13509521484375, "step": 59430 }, { "epoch": 0.5139168705847766, "grad_norm": 0.3506065636654452, "learning_rate": 5.074443913971929e-06, "loss": 0.3027740478515625, "step": 59435 }, { "epoch": 0.5139601041063199, "grad_norm": 25.043806090069186, "learning_rate": 5.074296733496003e-06, "loss": 0.3389869689941406, "step": 59440 }, { "epoch": 0.5140033376278631, "grad_norm": 0.3196185687667948, "learning_rate": 5.074149543453603e-06, "loss": 0.291400146484375, "step": 59445 }, { "epoch": 0.5140465711494064, "grad_norm": 2.9665045796171436, "learning_rate": 5.074002343845409e-06, "loss": 0.088641357421875, "step": 59450 }, { "epoch": 0.5140898046709497, "grad_norm": 4.3408317875090745, "learning_rate": 5.073855134672099e-06, "loss": 0.5882339477539062, "step": 59455 }, { "epoch": 0.5141330381924929, "grad_norm": 7.278642390170353, "learning_rate": 5.073707915934353e-06, "loss": 0.16670989990234375, "step": 59460 }, { "epoch": 0.5141762717140362, "grad_norm": 32.06879627700153, "learning_rate": 5.073560687632849e-06, "loss": 0.35320892333984377, "step": 59465 }, { "epoch": 0.5142195052355795, "grad_norm": 3.759691404323632, "learning_rate": 5.073413449768266e-06, "loss": 0.2673919677734375, "step": 59470 }, { "epoch": 0.5142627387571227, "grad_norm": 0.6112345619588178, "learning_rate": 5.073266202341284e-06, "loss": 0.44457855224609377, "step": 59475 }, { "epoch": 0.514305972278666, "grad_norm": 10.946473214907483, "learning_rate": 5.073118945352581e-06, "loss": 0.14841842651367188, "step": 59480 }, { "epoch": 0.5143492058002093, "grad_norm": 0.7713587810355039, "learning_rate": 5.072971678802836e-06, "loss": 0.17107601165771485, "step": 59485 }, { "epoch": 0.5143924393217525, "grad_norm": 3.540388956699685, "learning_rate": 5.07282440269273e-06, "loss": 0.1697784423828125, "step": 59490 }, { "epoch": 0.5144356728432958, "grad_norm": 0.4464269348637248, "learning_rate": 5.072677117022941e-06, "loss": 0.03015899658203125, "step": 59495 }, { "epoch": 0.5144789063648391, "grad_norm": 1.2022872847680466, "learning_rate": 5.072529821794148e-06, "loss": 0.16376018524169922, "step": 59500 }, { "epoch": 0.5145221398863823, "grad_norm": 9.227714814609445, "learning_rate": 5.07238251700703e-06, "loss": 0.17711734771728516, "step": 59505 }, { "epoch": 0.5145653734079255, "grad_norm": 3.7025363820132737, "learning_rate": 5.072235202662268e-06, "loss": 0.1356536865234375, "step": 59510 }, { "epoch": 0.5146086069294689, "grad_norm": 15.88292015107869, "learning_rate": 5.0720878787605386e-06, "loss": 0.12548828125, "step": 59515 }, { "epoch": 0.5146518404510121, "grad_norm": 1.5348200083258732, "learning_rate": 5.071940545302525e-06, "loss": 0.0881866455078125, "step": 59520 }, { "epoch": 0.5146950739725553, "grad_norm": 12.577339716406023, "learning_rate": 5.0717932022889034e-06, "loss": 0.12810287475585938, "step": 59525 }, { "epoch": 0.5147383074940987, "grad_norm": 1.250905241154783, "learning_rate": 5.0716458497203546e-06, "loss": 0.07296524047851563, "step": 59530 }, { "epoch": 0.5147815410156419, "grad_norm": 18.182182231015346, "learning_rate": 5.0714984875975595e-06, "loss": 0.16120147705078125, "step": 59535 }, { "epoch": 0.5148247745371851, "grad_norm": 7.294802314015349, "learning_rate": 5.071351115921196e-06, "loss": 0.18940238952636718, "step": 59540 }, { "epoch": 0.5148680080587285, "grad_norm": 4.757615431637147, "learning_rate": 5.071203734691943e-06, "loss": 0.06358604431152344, "step": 59545 }, { "epoch": 0.5149112415802717, "grad_norm": 19.987317146014288, "learning_rate": 5.071056343910483e-06, "loss": 0.5571197509765625, "step": 59550 }, { "epoch": 0.5149544751018149, "grad_norm": 27.75025945852767, "learning_rate": 5.070908943577493e-06, "loss": 0.40435590744018557, "step": 59555 }, { "epoch": 0.5149977086233583, "grad_norm": 0.6187023283166625, "learning_rate": 5.070761533693654e-06, "loss": 0.03312835693359375, "step": 59560 }, { "epoch": 0.5150409421449015, "grad_norm": 6.913171037381591, "learning_rate": 5.0706141142596465e-06, "loss": 0.084356689453125, "step": 59565 }, { "epoch": 0.5150841756664447, "grad_norm": 11.846897235089124, "learning_rate": 5.070466685276149e-06, "loss": 0.35721282958984374, "step": 59570 }, { "epoch": 0.515127409187988, "grad_norm": 1.6349432999569755, "learning_rate": 5.070319246743842e-06, "loss": 0.27717132568359376, "step": 59575 }, { "epoch": 0.5151706427095313, "grad_norm": 16.399597860410424, "learning_rate": 5.070171798663406e-06, "loss": 0.3347747802734375, "step": 59580 }, { "epoch": 0.5152138762310745, "grad_norm": 1.3539738735070592, "learning_rate": 5.07002434103552e-06, "loss": 0.24452056884765624, "step": 59585 }, { "epoch": 0.5152571097526177, "grad_norm": 18.62138867778035, "learning_rate": 5.069876873860865e-06, "loss": 0.3415283203125, "step": 59590 }, { "epoch": 0.5153003432741611, "grad_norm": 0.1127861881270224, "learning_rate": 5.069729397140121e-06, "loss": 0.24848175048828125, "step": 59595 }, { "epoch": 0.5153435767957043, "grad_norm": 0.7664117848932787, "learning_rate": 5.069581910873967e-06, "loss": 0.14158172607421876, "step": 59600 }, { "epoch": 0.5153868103172475, "grad_norm": 1.3526645256785148, "learning_rate": 5.069434415063086e-06, "loss": 0.09052848815917969, "step": 59605 }, { "epoch": 0.5154300438387909, "grad_norm": 7.478102254792163, "learning_rate": 5.069286909708154e-06, "loss": 0.0419464111328125, "step": 59610 }, { "epoch": 0.5154732773603341, "grad_norm": 3.936364516770839, "learning_rate": 5.069139394809855e-06, "loss": 0.14501953125, "step": 59615 }, { "epoch": 0.5155165108818773, "grad_norm": 5.4875826852922565, "learning_rate": 5.068991870368868e-06, "loss": 0.109368896484375, "step": 59620 }, { "epoch": 0.5155597444034207, "grad_norm": 2.8333969250131097, "learning_rate": 5.068844336385873e-06, "loss": 0.07511749267578124, "step": 59625 }, { "epoch": 0.5156029779249639, "grad_norm": 11.49839151258366, "learning_rate": 5.068696792861551e-06, "loss": 0.156689453125, "step": 59630 }, { "epoch": 0.5156462114465071, "grad_norm": 0.08124398864220854, "learning_rate": 5.068549239796582e-06, "loss": 0.33462371826171877, "step": 59635 }, { "epoch": 0.5156894449680505, "grad_norm": 13.123692186349006, "learning_rate": 5.068401677191646e-06, "loss": 0.5741912841796875, "step": 59640 }, { "epoch": 0.5157326784895937, "grad_norm": 44.5653763470699, "learning_rate": 5.0682541050474255e-06, "loss": 0.30888900756835935, "step": 59645 }, { "epoch": 0.5157759120111369, "grad_norm": 0.1676530599044638, "learning_rate": 5.068106523364598e-06, "loss": 0.16956939697265624, "step": 59650 }, { "epoch": 0.5158191455326803, "grad_norm": 7.630222511925509, "learning_rate": 5.0679589321438475e-06, "loss": 0.10035247802734375, "step": 59655 }, { "epoch": 0.5158623790542235, "grad_norm": 6.029278848322047, "learning_rate": 5.067811331385853e-06, "loss": 0.10216064453125, "step": 59660 }, { "epoch": 0.5159056125757667, "grad_norm": 38.68807174651956, "learning_rate": 5.0676637210912955e-06, "loss": 0.3706878662109375, "step": 59665 }, { "epoch": 0.5159488460973101, "grad_norm": 4.479255177239796, "learning_rate": 5.067516101260855e-06, "loss": 0.13076934814453126, "step": 59670 }, { "epoch": 0.5159920796188533, "grad_norm": 16.921715102835122, "learning_rate": 5.0673684718952135e-06, "loss": 0.43375701904296876, "step": 59675 }, { "epoch": 0.5160353131403965, "grad_norm": 12.967853412449067, "learning_rate": 5.067220832995051e-06, "loss": 0.11845245361328124, "step": 59680 }, { "epoch": 0.5160785466619398, "grad_norm": 25.072507609661688, "learning_rate": 5.067073184561049e-06, "loss": 0.15614013671875, "step": 59685 }, { "epoch": 0.5161217801834831, "grad_norm": 3.35619692209273, "learning_rate": 5.066925526593887e-06, "loss": 0.010178375244140624, "step": 59690 }, { "epoch": 0.5161650137050263, "grad_norm": 7.821127468067468, "learning_rate": 5.066777859094247e-06, "loss": 0.0968231201171875, "step": 59695 }, { "epoch": 0.5162082472265695, "grad_norm": 25.79405555974973, "learning_rate": 5.066630182062812e-06, "loss": 0.27145919799804685, "step": 59700 }, { "epoch": 0.5162514807481129, "grad_norm": 1.895996809195499, "learning_rate": 5.0664824955002604e-06, "loss": 0.04314079284667969, "step": 59705 }, { "epoch": 0.5162947142696561, "grad_norm": 34.12417024128441, "learning_rate": 5.066334799407274e-06, "loss": 0.3898162841796875, "step": 59710 }, { "epoch": 0.5163379477911993, "grad_norm": 8.923769230228631, "learning_rate": 5.066187093784534e-06, "loss": 0.23463668823242187, "step": 59715 }, { "epoch": 0.5163811813127427, "grad_norm": 19.323831711955794, "learning_rate": 5.0660393786327215e-06, "loss": 0.3820930480957031, "step": 59720 }, { "epoch": 0.5164244148342859, "grad_norm": 6.059257565245609, "learning_rate": 5.065891653952518e-06, "loss": 0.1874053955078125, "step": 59725 }, { "epoch": 0.5164676483558291, "grad_norm": 22.59041906501445, "learning_rate": 5.065743919744605e-06, "loss": 0.158966064453125, "step": 59730 }, { "epoch": 0.5165108818773725, "grad_norm": 4.314678265410695, "learning_rate": 5.065596176009664e-06, "loss": 0.21479339599609376, "step": 59735 }, { "epoch": 0.5165541153989157, "grad_norm": 27.20893682196532, "learning_rate": 5.065448422748375e-06, "loss": 0.37778892517089846, "step": 59740 }, { "epoch": 0.5165973489204589, "grad_norm": 0.1774430015535004, "learning_rate": 5.065300659961421e-06, "loss": 0.02821693420410156, "step": 59745 }, { "epoch": 0.5166405824420023, "grad_norm": 0.12058798203083054, "learning_rate": 5.065152887649482e-06, "loss": 0.08638076782226563, "step": 59750 }, { "epoch": 0.5166838159635455, "grad_norm": 12.98616267028898, "learning_rate": 5.065005105813241e-06, "loss": 0.23478546142578124, "step": 59755 }, { "epoch": 0.5167270494850887, "grad_norm": 10.519895245980594, "learning_rate": 5.06485731445338e-06, "loss": 0.16499176025390624, "step": 59760 }, { "epoch": 0.516770283006632, "grad_norm": 2.826911487219689, "learning_rate": 5.0647095135705776e-06, "loss": 0.22363739013671874, "step": 59765 }, { "epoch": 0.5168135165281753, "grad_norm": 26.858750891452353, "learning_rate": 5.064561703165518e-06, "loss": 0.14317779541015624, "step": 59770 }, { "epoch": 0.5168567500497185, "grad_norm": 1.525310477046843, "learning_rate": 5.0644138832388834e-06, "loss": 0.14378662109375, "step": 59775 }, { "epoch": 0.5168999835712618, "grad_norm": 1.5132447531351554, "learning_rate": 5.064266053791354e-06, "loss": 0.387017822265625, "step": 59780 }, { "epoch": 0.5169432170928051, "grad_norm": 0.746166397481333, "learning_rate": 5.0641182148236105e-06, "loss": 0.0821563720703125, "step": 59785 }, { "epoch": 0.5169864506143483, "grad_norm": 6.677243282677224, "learning_rate": 5.063970366336338e-06, "loss": 0.24132156372070312, "step": 59790 }, { "epoch": 0.5170296841358916, "grad_norm": 2.258925532801409, "learning_rate": 5.063822508330215e-06, "loss": 0.07856559753417969, "step": 59795 }, { "epoch": 0.5170729176574349, "grad_norm": 2.056545078362444, "learning_rate": 5.063674640805925e-06, "loss": 0.038143157958984375, "step": 59800 }, { "epoch": 0.5171161511789781, "grad_norm": 1.2824681617640485, "learning_rate": 5.063526763764151e-06, "loss": 0.07141780853271484, "step": 59805 }, { "epoch": 0.5171593847005214, "grad_norm": 17.86440240370547, "learning_rate": 5.063378877205574e-06, "loss": 0.13303070068359374, "step": 59810 }, { "epoch": 0.5172026182220647, "grad_norm": 20.832704064863744, "learning_rate": 5.063230981130875e-06, "loss": 0.107086181640625, "step": 59815 }, { "epoch": 0.5172458517436079, "grad_norm": 0.33960379369291316, "learning_rate": 5.063083075540737e-06, "loss": 0.13632354736328126, "step": 59820 }, { "epoch": 0.5172890852651512, "grad_norm": 40.8212158916321, "learning_rate": 5.062935160435843e-06, "loss": 0.20975341796875, "step": 59825 }, { "epoch": 0.5173323187866945, "grad_norm": 15.362157647411081, "learning_rate": 5.062787235816873e-06, "loss": 0.130810546875, "step": 59830 }, { "epoch": 0.5173755523082377, "grad_norm": 1.546457923045672, "learning_rate": 5.062639301684512e-06, "loss": 0.20924072265625, "step": 59835 }, { "epoch": 0.517418785829781, "grad_norm": 14.018679352196141, "learning_rate": 5.06249135803944e-06, "loss": 0.06518402099609374, "step": 59840 }, { "epoch": 0.5174620193513242, "grad_norm": 28.561980564368692, "learning_rate": 5.062343404882341e-06, "loss": 0.3486053466796875, "step": 59845 }, { "epoch": 0.5175052528728675, "grad_norm": 45.61798997833145, "learning_rate": 5.062195442213895e-06, "loss": 0.5012527465820312, "step": 59850 }, { "epoch": 0.5175484863944108, "grad_norm": 29.150442214874094, "learning_rate": 5.062047470034786e-06, "loss": 0.1537872314453125, "step": 59855 }, { "epoch": 0.517591719915954, "grad_norm": 4.774100181367215, "learning_rate": 5.0618994883456975e-06, "loss": 0.090887451171875, "step": 59860 }, { "epoch": 0.5176349534374973, "grad_norm": 21.326202243993908, "learning_rate": 5.06175149714731e-06, "loss": 0.14936981201171876, "step": 59865 }, { "epoch": 0.5176781869590406, "grad_norm": 2.6448318265831174, "learning_rate": 5.061603496440307e-06, "loss": 0.0982269287109375, "step": 59870 }, { "epoch": 0.5177214204805838, "grad_norm": 20.021922069080514, "learning_rate": 5.061455486225371e-06, "loss": 0.65511474609375, "step": 59875 }, { "epoch": 0.5177646540021271, "grad_norm": 17.024253170290613, "learning_rate": 5.061307466503184e-06, "loss": 0.7697662353515625, "step": 59880 }, { "epoch": 0.5178078875236704, "grad_norm": 0.395575225520129, "learning_rate": 5.061159437274429e-06, "loss": 0.13326492309570312, "step": 59885 }, { "epoch": 0.5178511210452136, "grad_norm": 1.7027060123397049, "learning_rate": 5.061011398539789e-06, "loss": 0.46235198974609376, "step": 59890 }, { "epoch": 0.5178943545667569, "grad_norm": 3.195575767359308, "learning_rate": 5.060863350299947e-06, "loss": 0.3417999267578125, "step": 59895 }, { "epoch": 0.5179375880883001, "grad_norm": 3.223160900140212, "learning_rate": 5.060715292555585e-06, "loss": 0.14192142486572265, "step": 59900 }, { "epoch": 0.5179808216098434, "grad_norm": 2.1553901970033467, "learning_rate": 5.060567225307386e-06, "loss": 0.21512222290039062, "step": 59905 }, { "epoch": 0.5180240551313867, "grad_norm": 18.311422232282382, "learning_rate": 5.0604191485560325e-06, "loss": 0.16412887573242188, "step": 59910 }, { "epoch": 0.51806728865293, "grad_norm": 0.16611797285472388, "learning_rate": 5.060271062302209e-06, "loss": 0.018089675903320314, "step": 59915 }, { "epoch": 0.5181105221744732, "grad_norm": 9.511844336430274, "learning_rate": 5.060122966546597e-06, "loss": 0.1725849151611328, "step": 59920 }, { "epoch": 0.5181537556960165, "grad_norm": 16.92081294689371, "learning_rate": 5.059974861289879e-06, "loss": 0.38531494140625, "step": 59925 }, { "epoch": 0.5181969892175597, "grad_norm": 3.9979035729053605, "learning_rate": 5.059826746532741e-06, "loss": 0.0634521484375, "step": 59930 }, { "epoch": 0.518240222739103, "grad_norm": 14.81915187649156, "learning_rate": 5.059678622275863e-06, "loss": 0.07991180419921876, "step": 59935 }, { "epoch": 0.5182834562606462, "grad_norm": 2.5640258853912234, "learning_rate": 5.059530488519929e-06, "loss": 0.063922119140625, "step": 59940 }, { "epoch": 0.5183266897821895, "grad_norm": 1.102381338904673, "learning_rate": 5.059382345265623e-06, "loss": 0.077301025390625, "step": 59945 }, { "epoch": 0.5183699233037328, "grad_norm": 0.22448523048956426, "learning_rate": 5.059234192513627e-06, "loss": 0.11273727416992188, "step": 59950 }, { "epoch": 0.518413156825276, "grad_norm": 13.620095892461311, "learning_rate": 5.059086030264625e-06, "loss": 0.329296875, "step": 59955 }, { "epoch": 0.5184563903468193, "grad_norm": 0.47470916290986964, "learning_rate": 5.0589378585193e-06, "loss": 0.060840606689453125, "step": 59960 }, { "epoch": 0.5184996238683626, "grad_norm": 18.84061531040756, "learning_rate": 5.058789677278335e-06, "loss": 0.51143798828125, "step": 59965 }, { "epoch": 0.5185428573899058, "grad_norm": 15.07148253839602, "learning_rate": 5.0586414865424146e-06, "loss": 0.12668275833129883, "step": 59970 }, { "epoch": 0.5185860909114491, "grad_norm": 28.593108091724645, "learning_rate": 5.058493286312222e-06, "loss": 0.16686744689941407, "step": 59975 }, { "epoch": 0.5186293244329924, "grad_norm": 24.532078203516967, "learning_rate": 5.05834507658844e-06, "loss": 0.0998382568359375, "step": 59980 }, { "epoch": 0.5186725579545356, "grad_norm": 18.01410324008087, "learning_rate": 5.058196857371752e-06, "loss": 0.11941680908203126, "step": 59985 }, { "epoch": 0.5187157914760789, "grad_norm": 1.8953776002909495, "learning_rate": 5.058048628662842e-06, "loss": 0.06193332672119141, "step": 59990 }, { "epoch": 0.5187590249976222, "grad_norm": 6.871014958004035, "learning_rate": 5.057900390462394e-06, "loss": 0.06414871215820313, "step": 59995 }, { "epoch": 0.5188022585191654, "grad_norm": 14.497801472228517, "learning_rate": 5.057752142771091e-06, "loss": 0.18307037353515626, "step": 60000 }, { "epoch": 0.5188454920407087, "grad_norm": 4.649432913301617, "learning_rate": 5.057603885589616e-06, "loss": 0.155364990234375, "step": 60005 }, { "epoch": 0.518888725562252, "grad_norm": 1.240392789910586, "learning_rate": 5.057455618918655e-06, "loss": 0.14400863647460938, "step": 60010 }, { "epoch": 0.5189319590837952, "grad_norm": 0.20543449812696493, "learning_rate": 5.05730734275889e-06, "loss": 0.1834014892578125, "step": 60015 }, { "epoch": 0.5189751926053384, "grad_norm": 1.2234442959771932, "learning_rate": 5.057159057111006e-06, "loss": 0.5161151885986328, "step": 60020 }, { "epoch": 0.5190184261268818, "grad_norm": 0.3705490697131077, "learning_rate": 5.057010761975685e-06, "loss": 0.04586029052734375, "step": 60025 }, { "epoch": 0.519061659648425, "grad_norm": 1.3329294827050218, "learning_rate": 5.056862457353613e-06, "loss": 0.19063262939453124, "step": 60030 }, { "epoch": 0.5191048931699682, "grad_norm": 5.360535080822689, "learning_rate": 5.056714143245473e-06, "loss": 0.16675949096679688, "step": 60035 }, { "epoch": 0.5191481266915116, "grad_norm": 26.334172102353037, "learning_rate": 5.056565819651948e-06, "loss": 0.16688194274902343, "step": 60040 }, { "epoch": 0.5191913602130548, "grad_norm": 0.5304787369306242, "learning_rate": 5.056417486573725e-06, "loss": 0.025719642639160156, "step": 60045 }, { "epoch": 0.519234593734598, "grad_norm": 22.871467624680157, "learning_rate": 5.056269144011485e-06, "loss": 0.160919189453125, "step": 60050 }, { "epoch": 0.5192778272561414, "grad_norm": 1.2999092638516823, "learning_rate": 5.0561207919659135e-06, "loss": 0.023087310791015624, "step": 60055 }, { "epoch": 0.5193210607776846, "grad_norm": 4.710336289685409, "learning_rate": 5.0559724304376955e-06, "loss": 0.15950546264648438, "step": 60060 }, { "epoch": 0.5193642942992278, "grad_norm": 1.3853500573355064, "learning_rate": 5.055824059427513e-06, "loss": 0.12784423828125, "step": 60065 }, { "epoch": 0.5194075278207712, "grad_norm": 22.343743338510095, "learning_rate": 5.0556756789360525e-06, "loss": 0.16311187744140626, "step": 60070 }, { "epoch": 0.5194507613423144, "grad_norm": 2.1744219528349196, "learning_rate": 5.055527288963997e-06, "loss": 0.1153839111328125, "step": 60075 }, { "epoch": 0.5194939948638576, "grad_norm": 1.0528612713833425, "learning_rate": 5.055378889512032e-06, "loss": 0.02093353271484375, "step": 60080 }, { "epoch": 0.519537228385401, "grad_norm": 26.092128265155832, "learning_rate": 5.055230480580839e-06, "loss": 0.158465576171875, "step": 60085 }, { "epoch": 0.5195804619069442, "grad_norm": 4.561351352379346, "learning_rate": 5.055082062171107e-06, "loss": 0.03303680419921875, "step": 60090 }, { "epoch": 0.5196236954284874, "grad_norm": 29.06224510969493, "learning_rate": 5.054933634283518e-06, "loss": 0.16787853240966796, "step": 60095 }, { "epoch": 0.5196669289500307, "grad_norm": 13.784815819364317, "learning_rate": 5.054785196918756e-06, "loss": 0.06646194458007812, "step": 60100 }, { "epoch": 0.519710162471574, "grad_norm": 37.43252721868929, "learning_rate": 5.054636750077506e-06, "loss": 0.267181396484375, "step": 60105 }, { "epoch": 0.5197533959931172, "grad_norm": 30.43982671049306, "learning_rate": 5.054488293760453e-06, "loss": 0.2052764892578125, "step": 60110 }, { "epoch": 0.5197966295146604, "grad_norm": 17.511416191885747, "learning_rate": 5.054339827968282e-06, "loss": 0.13010711669921876, "step": 60115 }, { "epoch": 0.5198398630362038, "grad_norm": 13.366663872881611, "learning_rate": 5.054191352701676e-06, "loss": 0.4038970947265625, "step": 60120 }, { "epoch": 0.519883096557747, "grad_norm": 3.1469301648027472, "learning_rate": 5.054042867961322e-06, "loss": 0.218402099609375, "step": 60125 }, { "epoch": 0.5199263300792902, "grad_norm": 11.64810089733048, "learning_rate": 5.0538943737479045e-06, "loss": 0.0816741943359375, "step": 60130 }, { "epoch": 0.5199695636008336, "grad_norm": 2.3202666049024967, "learning_rate": 5.053745870062106e-06, "loss": 0.3318641662597656, "step": 60135 }, { "epoch": 0.5200127971223768, "grad_norm": 0.49333718275474997, "learning_rate": 5.053597356904614e-06, "loss": 0.07333908081054688, "step": 60140 }, { "epoch": 0.52005603064392, "grad_norm": 1.0325443472425069, "learning_rate": 5.053448834276113e-06, "loss": 0.051073455810546876, "step": 60145 }, { "epoch": 0.5200992641654634, "grad_norm": 3.5955489069157727, "learning_rate": 5.053300302177286e-06, "loss": 0.21627197265625, "step": 60150 }, { "epoch": 0.5201424976870066, "grad_norm": 4.506283009788555, "learning_rate": 5.05315176060882e-06, "loss": 0.09333953857421876, "step": 60155 }, { "epoch": 0.5201857312085498, "grad_norm": 9.702454873683902, "learning_rate": 5.0530032095714e-06, "loss": 0.09951171875, "step": 60160 }, { "epoch": 0.5202289647300932, "grad_norm": 31.25899728480537, "learning_rate": 5.05285464906571e-06, "loss": 0.5737852096557617, "step": 60165 }, { "epoch": 0.5202721982516364, "grad_norm": 49.235044472879, "learning_rate": 5.052706079092436e-06, "loss": 0.37447509765625, "step": 60170 }, { "epoch": 0.5203154317731796, "grad_norm": 1.5129363122572266, "learning_rate": 5.0525574996522634e-06, "loss": 0.0668304443359375, "step": 60175 }, { "epoch": 0.520358665294723, "grad_norm": 4.406369435349332, "learning_rate": 5.052408910745876e-06, "loss": 0.115838623046875, "step": 60180 }, { "epoch": 0.5204018988162662, "grad_norm": 0.916101035202919, "learning_rate": 5.052260312373961e-06, "loss": 0.2054931640625, "step": 60185 }, { "epoch": 0.5204451323378094, "grad_norm": 5.862793009997696, "learning_rate": 5.052111704537202e-06, "loss": 0.1236175537109375, "step": 60190 }, { "epoch": 0.5204883658593527, "grad_norm": 5.351679317718771, "learning_rate": 5.0519630872362855e-06, "loss": 0.095263671875, "step": 60195 }, { "epoch": 0.520531599380896, "grad_norm": 8.784508716969954, "learning_rate": 5.0518144604718965e-06, "loss": 0.197686767578125, "step": 60200 }, { "epoch": 0.5205748329024392, "grad_norm": 4.409610845992689, "learning_rate": 5.0516658242447205e-06, "loss": 0.571722412109375, "step": 60205 }, { "epoch": 0.5206180664239825, "grad_norm": 18.57249004653862, "learning_rate": 5.051517178555444e-06, "loss": 0.127789306640625, "step": 60210 }, { "epoch": 0.5206612999455258, "grad_norm": 3.879391998902535, "learning_rate": 5.0513685234047495e-06, "loss": 0.14785690307617189, "step": 60215 }, { "epoch": 0.520704533467069, "grad_norm": 7.252928180153438, "learning_rate": 5.051219858793326e-06, "loss": 0.34532470703125, "step": 60220 }, { "epoch": 0.5207477669886122, "grad_norm": 2.2073982842389634, "learning_rate": 5.051071184721858e-06, "loss": 0.21720428466796876, "step": 60225 }, { "epoch": 0.5207910005101556, "grad_norm": 1.1208522536426342, "learning_rate": 5.05092250119103e-06, "loss": 0.102874755859375, "step": 60230 }, { "epoch": 0.5208342340316988, "grad_norm": 4.871470923255524, "learning_rate": 5.050773808201529e-06, "loss": 0.2171783447265625, "step": 60235 }, { "epoch": 0.520877467553242, "grad_norm": 0.32651968335393894, "learning_rate": 5.05062510575404e-06, "loss": 0.19432735443115234, "step": 60240 }, { "epoch": 0.5209207010747854, "grad_norm": 3.283648709430652, "learning_rate": 5.05047639384925e-06, "loss": 0.1200592041015625, "step": 60245 }, { "epoch": 0.5209639345963286, "grad_norm": 0.4223627015468828, "learning_rate": 5.050327672487844e-06, "loss": 0.18785552978515624, "step": 60250 }, { "epoch": 0.5210071681178718, "grad_norm": 4.446713199155556, "learning_rate": 5.050178941670507e-06, "loss": 0.06277008056640625, "step": 60255 }, { "epoch": 0.5210504016394152, "grad_norm": 21.738068130432477, "learning_rate": 5.0500302013979264e-06, "loss": 0.42490615844726565, "step": 60260 }, { "epoch": 0.5210936351609584, "grad_norm": 0.3288598197594526, "learning_rate": 5.0498814516707886e-06, "loss": 0.36050872802734374, "step": 60265 }, { "epoch": 0.5211368686825016, "grad_norm": 6.130055452970898, "learning_rate": 5.049732692489778e-06, "loss": 0.07384185791015625, "step": 60270 }, { "epoch": 0.521180102204045, "grad_norm": 9.12093919228983, "learning_rate": 5.0495839238555805e-06, "loss": 0.15239791870117186, "step": 60275 }, { "epoch": 0.5212233357255882, "grad_norm": 44.40059090775936, "learning_rate": 5.049435145768884e-06, "loss": 0.330712890625, "step": 60280 }, { "epoch": 0.5212665692471314, "grad_norm": 0.13151543735086704, "learning_rate": 5.049286358230373e-06, "loss": 0.15080337524414061, "step": 60285 }, { "epoch": 0.5213098027686747, "grad_norm": 2.2140587973629127, "learning_rate": 5.049137561240735e-06, "loss": 0.0648406982421875, "step": 60290 }, { "epoch": 0.521353036290218, "grad_norm": 4.0033204423755135, "learning_rate": 5.048988754800656e-06, "loss": 0.1151357650756836, "step": 60295 }, { "epoch": 0.5213962698117612, "grad_norm": 6.663228887642941, "learning_rate": 5.048839938910821e-06, "loss": 0.2095256805419922, "step": 60300 }, { "epoch": 0.5214395033333045, "grad_norm": 2.1582566462450767, "learning_rate": 5.048691113571918e-06, "loss": 0.1072998046875, "step": 60305 }, { "epoch": 0.5214827368548478, "grad_norm": 1.4490364140867351, "learning_rate": 5.048542278784632e-06, "loss": 0.2874885559082031, "step": 60310 }, { "epoch": 0.521525970376391, "grad_norm": 23.499953975143555, "learning_rate": 5.048393434549651e-06, "loss": 0.279656982421875, "step": 60315 }, { "epoch": 0.5215692038979343, "grad_norm": 3.024988752025712, "learning_rate": 5.04824458086766e-06, "loss": 0.20453014373779296, "step": 60320 }, { "epoch": 0.5216124374194776, "grad_norm": 0.6178622447568987, "learning_rate": 5.048095717739345e-06, "loss": 0.032575225830078124, "step": 60325 }, { "epoch": 0.5216556709410208, "grad_norm": 6.868860165367864, "learning_rate": 5.047946845165394e-06, "loss": 0.10335617065429688, "step": 60330 }, { "epoch": 0.5216989044625641, "grad_norm": 3.4854846720156347, "learning_rate": 5.047797963146494e-06, "loss": 0.10778427124023438, "step": 60335 }, { "epoch": 0.5217421379841074, "grad_norm": 3.7070930093515275, "learning_rate": 5.04764907168333e-06, "loss": 0.04321479797363281, "step": 60340 }, { "epoch": 0.5217853715056506, "grad_norm": 1.8080757216768506, "learning_rate": 5.047500170776589e-06, "loss": 0.541357421875, "step": 60345 }, { "epoch": 0.5218286050271939, "grad_norm": 12.597641421580256, "learning_rate": 5.04735126042696e-06, "loss": 0.0867950439453125, "step": 60350 }, { "epoch": 0.5218718385487372, "grad_norm": 1.2162793148819155, "learning_rate": 5.047202340635126e-06, "loss": 0.10424423217773438, "step": 60355 }, { "epoch": 0.5219150720702804, "grad_norm": 2.5299779087866217, "learning_rate": 5.047053411401776e-06, "loss": 0.11371078491210937, "step": 60360 }, { "epoch": 0.5219583055918237, "grad_norm": 0.49179365664297836, "learning_rate": 5.046904472727597e-06, "loss": 0.13408164978027343, "step": 60365 }, { "epoch": 0.5220015391133669, "grad_norm": 17.37808800279564, "learning_rate": 5.046755524613275e-06, "loss": 0.14152297973632813, "step": 60370 }, { "epoch": 0.5220447726349102, "grad_norm": 42.81595899960863, "learning_rate": 5.046606567059497e-06, "loss": 0.12232589721679688, "step": 60375 }, { "epoch": 0.5220880061564535, "grad_norm": 1.7524980234961824, "learning_rate": 5.046457600066952e-06, "loss": 0.38608551025390625, "step": 60380 }, { "epoch": 0.5221312396779967, "grad_norm": 19.591969863000287, "learning_rate": 5.046308623636324e-06, "loss": 0.1486541748046875, "step": 60385 }, { "epoch": 0.52217447319954, "grad_norm": 42.397047232023596, "learning_rate": 5.046159637768301e-06, "loss": 0.347509765625, "step": 60390 }, { "epoch": 0.5222177067210833, "grad_norm": 8.555407290666711, "learning_rate": 5.046010642463572e-06, "loss": 0.2381378173828125, "step": 60395 }, { "epoch": 0.5222609402426265, "grad_norm": 5.911832180320232, "learning_rate": 5.045861637722821e-06, "loss": 0.14569091796875, "step": 60400 }, { "epoch": 0.5223041737641698, "grad_norm": 6.4261446978130286, "learning_rate": 5.045712623546739e-06, "loss": 0.0608978271484375, "step": 60405 }, { "epoch": 0.522347407285713, "grad_norm": 25.75983134141889, "learning_rate": 5.045563599936011e-06, "loss": 0.451654052734375, "step": 60410 }, { "epoch": 0.5223906408072563, "grad_norm": 1.7685840353734328, "learning_rate": 5.045414566891322e-06, "loss": 0.054354095458984376, "step": 60415 }, { "epoch": 0.5224338743287996, "grad_norm": 23.724331577509744, "learning_rate": 5.045265524413364e-06, "loss": 0.15766220092773436, "step": 60420 }, { "epoch": 0.5224771078503428, "grad_norm": 3.2009782484455283, "learning_rate": 5.0451164725028214e-06, "loss": 0.14889068603515626, "step": 60425 }, { "epoch": 0.5225203413718861, "grad_norm": 8.830860926715705, "learning_rate": 5.044967411160383e-06, "loss": 0.3036949157714844, "step": 60430 }, { "epoch": 0.5225635748934294, "grad_norm": 4.414352810322143, "learning_rate": 5.044818340386734e-06, "loss": 0.11908493041992188, "step": 60435 }, { "epoch": 0.5226068084149726, "grad_norm": 14.91516778866376, "learning_rate": 5.0446692601825644e-06, "loss": 0.1948406219482422, "step": 60440 }, { "epoch": 0.5226500419365159, "grad_norm": 6.334233824061408, "learning_rate": 5.044520170548561e-06, "loss": 0.19788055419921874, "step": 60445 }, { "epoch": 0.5226932754580592, "grad_norm": 3.9372991907380603, "learning_rate": 5.044371071485411e-06, "loss": 0.18736801147460938, "step": 60450 }, { "epoch": 0.5227365089796024, "grad_norm": 8.387950496931257, "learning_rate": 5.044221962993802e-06, "loss": 0.578985595703125, "step": 60455 }, { "epoch": 0.5227797425011457, "grad_norm": 2.0616056918103665, "learning_rate": 5.044072845074423e-06, "loss": 0.023897171020507812, "step": 60460 }, { "epoch": 0.5228229760226889, "grad_norm": 6.189402320838331, "learning_rate": 5.04392371772796e-06, "loss": 0.06170196533203125, "step": 60465 }, { "epoch": 0.5228662095442322, "grad_norm": 0.5930222137155648, "learning_rate": 5.0437745809551e-06, "loss": 0.21861324310302735, "step": 60470 }, { "epoch": 0.5229094430657755, "grad_norm": 9.86976218095447, "learning_rate": 5.043625434756534e-06, "loss": 0.16544189453125, "step": 60475 }, { "epoch": 0.5229526765873187, "grad_norm": 3.73963216028839, "learning_rate": 5.043476279132949e-06, "loss": 0.054524993896484374, "step": 60480 }, { "epoch": 0.522995910108862, "grad_norm": 1.089723287516771, "learning_rate": 5.04332711408503e-06, "loss": 0.047649383544921875, "step": 60485 }, { "epoch": 0.5230391436304053, "grad_norm": 3.2217604562371194, "learning_rate": 5.043177939613468e-06, "loss": 0.214886474609375, "step": 60490 }, { "epoch": 0.5230823771519485, "grad_norm": 19.554669140360037, "learning_rate": 5.043028755718949e-06, "loss": 0.256671142578125, "step": 60495 }, { "epoch": 0.5231256106734918, "grad_norm": 0.19084420980079617, "learning_rate": 5.042879562402162e-06, "loss": 0.05196075439453125, "step": 60500 }, { "epoch": 0.5231688441950351, "grad_norm": 0.27772710653696536, "learning_rate": 5.042730359663795e-06, "loss": 0.01855297088623047, "step": 60505 }, { "epoch": 0.5232120777165783, "grad_norm": 26.50773396656689, "learning_rate": 5.042581147504537e-06, "loss": 0.311175537109375, "step": 60510 }, { "epoch": 0.5232553112381216, "grad_norm": 21.43315071910383, "learning_rate": 5.042431925925074e-06, "loss": 0.2216949462890625, "step": 60515 }, { "epoch": 0.5232985447596649, "grad_norm": 8.670532918786437, "learning_rate": 5.042282694926097e-06, "loss": 0.240673828125, "step": 60520 }, { "epoch": 0.5233417782812081, "grad_norm": 2.167034930127061, "learning_rate": 5.042133454508291e-06, "loss": 0.09323005676269532, "step": 60525 }, { "epoch": 0.5233850118027514, "grad_norm": 4.081406081754495, "learning_rate": 5.041984204672347e-06, "loss": 0.2584228515625, "step": 60530 }, { "epoch": 0.5234282453242947, "grad_norm": 1.0940462923046794, "learning_rate": 5.0418349454189516e-06, "loss": 0.10684356689453126, "step": 60535 }, { "epoch": 0.5234714788458379, "grad_norm": 3.4122508675863332, "learning_rate": 5.041685676748794e-06, "loss": 0.05778961181640625, "step": 60540 }, { "epoch": 0.5235147123673811, "grad_norm": 3.0157456776589404, "learning_rate": 5.041536398662563e-06, "loss": 0.12821502685546876, "step": 60545 }, { "epoch": 0.5235579458889245, "grad_norm": 7.490358859233511, "learning_rate": 5.041387111160946e-06, "loss": 0.06133270263671875, "step": 60550 }, { "epoch": 0.5236011794104677, "grad_norm": 3.47650798671677, "learning_rate": 5.041237814244632e-06, "loss": 0.12303314208984376, "step": 60555 }, { "epoch": 0.5236444129320109, "grad_norm": 5.646641267607, "learning_rate": 5.04108850791431e-06, "loss": 0.05628509521484375, "step": 60560 }, { "epoch": 0.5236876464535543, "grad_norm": 2.722355024486502, "learning_rate": 5.040939192170667e-06, "loss": 0.148052978515625, "step": 60565 }, { "epoch": 0.5237308799750975, "grad_norm": 0.46636760312256403, "learning_rate": 5.040789867014394e-06, "loss": 0.15655975341796874, "step": 60570 }, { "epoch": 0.5237741134966407, "grad_norm": 14.542095751047958, "learning_rate": 5.0406405324461786e-06, "loss": 0.1585357666015625, "step": 60575 }, { "epoch": 0.523817347018184, "grad_norm": 14.71961578631313, "learning_rate": 5.0404911884667085e-06, "loss": 0.377398681640625, "step": 60580 }, { "epoch": 0.5238605805397273, "grad_norm": 4.573260510335551, "learning_rate": 5.040341835076674e-06, "loss": 0.07494659423828125, "step": 60585 }, { "epoch": 0.5239038140612705, "grad_norm": 1.1897644747677139, "learning_rate": 5.040192472276764e-06, "loss": 0.113555908203125, "step": 60590 }, { "epoch": 0.5239470475828139, "grad_norm": 3.4098129741935086, "learning_rate": 5.0400431000676645e-06, "loss": 0.04595794677734375, "step": 60595 }, { "epoch": 0.5239902811043571, "grad_norm": 3.3820577149296933, "learning_rate": 5.039893718450068e-06, "loss": 0.07518272399902344, "step": 60600 }, { "epoch": 0.5240335146259003, "grad_norm": 1.336172772799598, "learning_rate": 5.039744327424661e-06, "loss": 0.13888931274414062, "step": 60605 }, { "epoch": 0.5240767481474436, "grad_norm": 1.0745816323879882, "learning_rate": 5.039594926992134e-06, "loss": 0.18732452392578125, "step": 60610 }, { "epoch": 0.5241199816689869, "grad_norm": 0.445953323297024, "learning_rate": 5.039445517153175e-06, "loss": 0.07272911071777344, "step": 60615 }, { "epoch": 0.5241632151905301, "grad_norm": 11.508217888684673, "learning_rate": 5.039296097908475e-06, "loss": 0.02939453125, "step": 60620 }, { "epoch": 0.5242064487120734, "grad_norm": 10.695255497135975, "learning_rate": 5.039146669258721e-06, "loss": 0.06516532897949219, "step": 60625 }, { "epoch": 0.5242496822336167, "grad_norm": 21.71462837314281, "learning_rate": 5.038997231204601e-06, "loss": 0.1693267822265625, "step": 60630 }, { "epoch": 0.5242929157551599, "grad_norm": 5.818313687466517, "learning_rate": 5.038847783746807e-06, "loss": 0.28237152099609375, "step": 60635 }, { "epoch": 0.5243361492767031, "grad_norm": 27.68406073399555, "learning_rate": 5.038698326886028e-06, "loss": 0.15374164581298827, "step": 60640 }, { "epoch": 0.5243793827982465, "grad_norm": 12.967722992933842, "learning_rate": 5.038548860622951e-06, "loss": 0.1215087890625, "step": 60645 }, { "epoch": 0.5244226163197897, "grad_norm": 4.852837120208411, "learning_rate": 5.038399384958267e-06, "loss": 0.09550247192382813, "step": 60650 }, { "epoch": 0.5244658498413329, "grad_norm": 10.991595937727862, "learning_rate": 5.038249899892667e-06, "loss": 0.16199798583984376, "step": 60655 }, { "epoch": 0.5245090833628763, "grad_norm": 28.825554676276226, "learning_rate": 5.038100405426837e-06, "loss": 0.3841888427734375, "step": 60660 }, { "epoch": 0.5245523168844195, "grad_norm": 32.806775044087956, "learning_rate": 5.037950901561467e-06, "loss": 0.15276947021484374, "step": 60665 }, { "epoch": 0.5245955504059627, "grad_norm": 53.43740458389931, "learning_rate": 5.03780138829725e-06, "loss": 0.60460205078125, "step": 60670 }, { "epoch": 0.5246387839275061, "grad_norm": 6.101269449797264, "learning_rate": 5.037651865634871e-06, "loss": 0.082952880859375, "step": 60675 }, { "epoch": 0.5246820174490493, "grad_norm": 1.2695263296346202, "learning_rate": 5.037502333575023e-06, "loss": 0.118072509765625, "step": 60680 }, { "epoch": 0.5247252509705925, "grad_norm": 10.129845400492057, "learning_rate": 5.037352792118393e-06, "loss": 0.23987941741943358, "step": 60685 }, { "epoch": 0.5247684844921359, "grad_norm": 19.302336967561345, "learning_rate": 5.037203241265673e-06, "loss": 0.125543212890625, "step": 60690 }, { "epoch": 0.5248117180136791, "grad_norm": 3.047851176404399, "learning_rate": 5.037053681017551e-06, "loss": 0.29411659240722654, "step": 60695 }, { "epoch": 0.5248549515352223, "grad_norm": 19.515782524366085, "learning_rate": 5.036904111374718e-06, "loss": 0.16043014526367189, "step": 60700 }, { "epoch": 0.5248981850567657, "grad_norm": 22.721577497940743, "learning_rate": 5.036754532337863e-06, "loss": 0.12007904052734375, "step": 60705 }, { "epoch": 0.5249414185783089, "grad_norm": 8.14015321839501, "learning_rate": 5.036604943907676e-06, "loss": 0.2078948974609375, "step": 60710 }, { "epoch": 0.5249846520998521, "grad_norm": 26.74976130989947, "learning_rate": 5.036455346084848e-06, "loss": 0.18668365478515625, "step": 60715 }, { "epoch": 0.5250278856213954, "grad_norm": 11.503132012782515, "learning_rate": 5.036305738870066e-06, "loss": 0.06389884948730469, "step": 60720 }, { "epoch": 0.5250711191429387, "grad_norm": 18.330894517476548, "learning_rate": 5.036156122264023e-06, "loss": 0.2260711669921875, "step": 60725 }, { "epoch": 0.5251143526644819, "grad_norm": 3.1089979275070965, "learning_rate": 5.036006496267408e-06, "loss": 0.0804107666015625, "step": 60730 }, { "epoch": 0.5251575861860251, "grad_norm": 0.31710303461709516, "learning_rate": 5.03585686088091e-06, "loss": 0.1166534423828125, "step": 60735 }, { "epoch": 0.5252008197075685, "grad_norm": 10.55217433211065, "learning_rate": 5.035707216105221e-06, "loss": 0.07460174560546876, "step": 60740 }, { "epoch": 0.5252440532291117, "grad_norm": 1.821438582676043, "learning_rate": 5.03555756194103e-06, "loss": 0.3294189453125, "step": 60745 }, { "epoch": 0.525287286750655, "grad_norm": 32.20265615252781, "learning_rate": 5.035407898389027e-06, "loss": 0.22791213989257814, "step": 60750 }, { "epoch": 0.5253305202721983, "grad_norm": 2.7796204118212544, "learning_rate": 5.035258225449903e-06, "loss": 0.0443511962890625, "step": 60755 }, { "epoch": 0.5253737537937415, "grad_norm": 2.951804700704762, "learning_rate": 5.0351085431243474e-06, "loss": 0.402545166015625, "step": 60760 }, { "epoch": 0.5254169873152847, "grad_norm": 2.0749965310615734, "learning_rate": 5.034958851413051e-06, "loss": 0.0235748291015625, "step": 60765 }, { "epoch": 0.5254602208368281, "grad_norm": 32.67301227484279, "learning_rate": 5.034809150316705e-06, "loss": 0.53636474609375, "step": 60770 }, { "epoch": 0.5255034543583713, "grad_norm": 32.45827627352755, "learning_rate": 5.034659439835998e-06, "loss": 0.09457244873046874, "step": 60775 }, { "epoch": 0.5255466878799145, "grad_norm": 4.7491526163379785, "learning_rate": 5.034509719971623e-06, "loss": 0.03565521240234375, "step": 60780 }, { "epoch": 0.5255899214014579, "grad_norm": 10.701915216425252, "learning_rate": 5.034359990724268e-06, "loss": 0.06509857177734375, "step": 60785 }, { "epoch": 0.5256331549230011, "grad_norm": 3.305101609824262, "learning_rate": 5.034210252094624e-06, "loss": 0.2739845275878906, "step": 60790 }, { "epoch": 0.5256763884445443, "grad_norm": 16.019271292392794, "learning_rate": 5.0340605040833824e-06, "loss": 0.5115463256835937, "step": 60795 }, { "epoch": 0.5257196219660877, "grad_norm": 2.221972468721752, "learning_rate": 5.033910746691233e-06, "loss": 0.3431270599365234, "step": 60800 }, { "epoch": 0.5257628554876309, "grad_norm": 13.830949092441944, "learning_rate": 5.033760979918868e-06, "loss": 0.06351814270019532, "step": 60805 }, { "epoch": 0.5258060890091741, "grad_norm": 0.7688603238242145, "learning_rate": 5.033611203766977e-06, "loss": 0.0472259521484375, "step": 60810 }, { "epoch": 0.5258493225307174, "grad_norm": 1.8793647546273202, "learning_rate": 5.03346141823625e-06, "loss": 0.20050048828125, "step": 60815 }, { "epoch": 0.5258925560522607, "grad_norm": 7.231522347339771, "learning_rate": 5.033311623327379e-06, "loss": 0.11746978759765625, "step": 60820 }, { "epoch": 0.5259357895738039, "grad_norm": 6.103602990650352, "learning_rate": 5.033161819041055e-06, "loss": 0.07427520751953125, "step": 60825 }, { "epoch": 0.5259790230953472, "grad_norm": 0.11638185081073527, "learning_rate": 5.0330120053779674e-06, "loss": 0.35734806060791013, "step": 60830 }, { "epoch": 0.5260222566168905, "grad_norm": 7.263304446236815, "learning_rate": 5.032862182338809e-06, "loss": 0.0685272216796875, "step": 60835 }, { "epoch": 0.5260654901384337, "grad_norm": 3.5244644265684784, "learning_rate": 5.0327123499242696e-06, "loss": 0.180517578125, "step": 60840 }, { "epoch": 0.526108723659977, "grad_norm": 15.188258824391006, "learning_rate": 5.03256250813504e-06, "loss": 0.164691162109375, "step": 60845 }, { "epoch": 0.5261519571815203, "grad_norm": 1.8015166162127736, "learning_rate": 5.0324126569718115e-06, "loss": 0.08157501220703126, "step": 60850 }, { "epoch": 0.5261951907030635, "grad_norm": 40.53097416169756, "learning_rate": 5.032262796435276e-06, "loss": 0.2256591796875, "step": 60855 }, { "epoch": 0.5262384242246068, "grad_norm": 14.38011320821747, "learning_rate": 5.032112926526124e-06, "loss": 0.265283203125, "step": 60860 }, { "epoch": 0.5262816577461501, "grad_norm": 6.654963478775008, "learning_rate": 5.031963047245046e-06, "loss": 0.18665084838867188, "step": 60865 }, { "epoch": 0.5263248912676933, "grad_norm": 37.67668135795326, "learning_rate": 5.031813158592735e-06, "loss": 0.469891357421875, "step": 60870 }, { "epoch": 0.5263681247892366, "grad_norm": 5.72820794984049, "learning_rate": 5.031663260569881e-06, "loss": 0.08554153442382813, "step": 60875 }, { "epoch": 0.5264113583107799, "grad_norm": 14.9196915398, "learning_rate": 5.031513353177175e-06, "loss": 0.272564697265625, "step": 60880 }, { "epoch": 0.5264545918323231, "grad_norm": 2.919283728259726, "learning_rate": 5.031363436415309e-06, "loss": 0.049587249755859375, "step": 60885 }, { "epoch": 0.5264978253538664, "grad_norm": 0.0879024670451879, "learning_rate": 5.031213510284975e-06, "loss": 0.1061859130859375, "step": 60890 }, { "epoch": 0.5265410588754096, "grad_norm": 3.7440037278473746, "learning_rate": 5.031063574786864e-06, "loss": 0.06773681640625, "step": 60895 }, { "epoch": 0.5265842923969529, "grad_norm": 1.3690297261561521, "learning_rate": 5.0309136299216665e-06, "loss": 0.02097015380859375, "step": 60900 }, { "epoch": 0.5266275259184962, "grad_norm": 0.25481145005276695, "learning_rate": 5.030763675690075e-06, "loss": 0.022124576568603515, "step": 60905 }, { "epoch": 0.5266707594400394, "grad_norm": 3.5490700797256594, "learning_rate": 5.030613712092781e-06, "loss": 0.134027099609375, "step": 60910 }, { "epoch": 0.5267139929615827, "grad_norm": 36.25340321957265, "learning_rate": 5.0304637391304755e-06, "loss": 0.6069664001464844, "step": 60915 }, { "epoch": 0.526757226483126, "grad_norm": 13.287437712054286, "learning_rate": 5.0303137568038515e-06, "loss": 0.2134033203125, "step": 60920 }, { "epoch": 0.5268004600046692, "grad_norm": 0.4777949581220546, "learning_rate": 5.030163765113599e-06, "loss": 0.1701873779296875, "step": 60925 }, { "epoch": 0.5268436935262125, "grad_norm": 2.7803119051002363, "learning_rate": 5.030013764060411e-06, "loss": 0.122686767578125, "step": 60930 }, { "epoch": 0.5268869270477557, "grad_norm": 25.78367040752776, "learning_rate": 5.029863753644978e-06, "loss": 0.08052444458007812, "step": 60935 }, { "epoch": 0.526930160569299, "grad_norm": 18.295358550405922, "learning_rate": 5.0297137338679945e-06, "loss": 0.06348800659179688, "step": 60940 }, { "epoch": 0.5269733940908423, "grad_norm": 14.513022709628828, "learning_rate": 5.02956370473015e-06, "loss": 0.17505645751953125, "step": 60945 }, { "epoch": 0.5270166276123855, "grad_norm": 1.8214003357850275, "learning_rate": 5.029413666232136e-06, "loss": 0.15538177490234376, "step": 60950 }, { "epoch": 0.5270598611339288, "grad_norm": 2.998116412964843, "learning_rate": 5.029263618374647e-06, "loss": 0.3200077056884766, "step": 60955 }, { "epoch": 0.5271030946554721, "grad_norm": 1.600055423756045, "learning_rate": 5.029113561158372e-06, "loss": 0.07519073486328125, "step": 60960 }, { "epoch": 0.5271463281770153, "grad_norm": 29.472065035080497, "learning_rate": 5.028963494584005e-06, "loss": 0.4765625, "step": 60965 }, { "epoch": 0.5271895616985586, "grad_norm": 20.16515625413148, "learning_rate": 5.0288134186522385e-06, "loss": 0.27558364868164065, "step": 60970 }, { "epoch": 0.5272327952201019, "grad_norm": 7.165872985932902, "learning_rate": 5.028663333363763e-06, "loss": 0.290130615234375, "step": 60975 }, { "epoch": 0.5272760287416451, "grad_norm": 0.580488323447724, "learning_rate": 5.028513238719272e-06, "loss": 0.2592803955078125, "step": 60980 }, { "epoch": 0.5273192622631884, "grad_norm": 23.60831910650052, "learning_rate": 5.0283631347194565e-06, "loss": 0.1360443115234375, "step": 60985 }, { "epoch": 0.5273624957847316, "grad_norm": 33.79792749119868, "learning_rate": 5.0282130213650095e-06, "loss": 0.19941558837890624, "step": 60990 }, { "epoch": 0.5274057293062749, "grad_norm": 8.110240610141897, "learning_rate": 5.028062898656623e-06, "loss": 0.0629852294921875, "step": 60995 }, { "epoch": 0.5274489628278182, "grad_norm": 2.0053061117823776, "learning_rate": 5.027912766594991e-06, "loss": 0.16453475952148439, "step": 61000 }, { "epoch": 0.5274921963493614, "grad_norm": 27.522972599763985, "learning_rate": 5.027762625180804e-06, "loss": 0.2882080078125, "step": 61005 }, { "epoch": 0.5275354298709047, "grad_norm": 7.831487141152584, "learning_rate": 5.027612474414754e-06, "loss": 0.11655120849609375, "step": 61010 }, { "epoch": 0.527578663392448, "grad_norm": 6.688232706876347, "learning_rate": 5.027462314297536e-06, "loss": 0.102008056640625, "step": 61015 }, { "epoch": 0.5276218969139912, "grad_norm": 56.70614373249409, "learning_rate": 5.027312144829839e-06, "loss": 0.500970458984375, "step": 61020 }, { "epoch": 0.5276651304355345, "grad_norm": 14.361106069454047, "learning_rate": 5.0271619660123584e-06, "loss": 0.18739471435546876, "step": 61025 }, { "epoch": 0.5277083639570778, "grad_norm": 33.79775245189958, "learning_rate": 5.027011777845786e-06, "loss": 0.381494140625, "step": 61030 }, { "epoch": 0.527751597478621, "grad_norm": 0.15959945321263733, "learning_rate": 5.0268615803308156e-06, "loss": 0.10991058349609376, "step": 61035 }, { "epoch": 0.5277948310001643, "grad_norm": 8.60277086711335, "learning_rate": 5.026711373468137e-06, "loss": 0.15377349853515626, "step": 61040 }, { "epoch": 0.5278380645217076, "grad_norm": 28.853202313818603, "learning_rate": 5.026561157258446e-06, "loss": 0.22263336181640625, "step": 61045 }, { "epoch": 0.5278812980432508, "grad_norm": 6.526309377945949, "learning_rate": 5.026410931702432e-06, "loss": 0.11348495483398438, "step": 61050 }, { "epoch": 0.5279245315647941, "grad_norm": 6.779818304430491, "learning_rate": 5.026260696800792e-06, "loss": 0.07586669921875, "step": 61055 }, { "epoch": 0.5279677650863374, "grad_norm": 10.120759241442414, "learning_rate": 5.026110452554216e-06, "loss": 0.22371368408203124, "step": 61060 }, { "epoch": 0.5280109986078806, "grad_norm": 4.475843982876067, "learning_rate": 5.025960198963397e-06, "loss": 0.09685173034667968, "step": 61065 }, { "epoch": 0.5280542321294238, "grad_norm": 3.7332538159369095, "learning_rate": 5.0258099360290285e-06, "loss": 0.2834918975830078, "step": 61070 }, { "epoch": 0.5280974656509672, "grad_norm": 1.2406434282555368, "learning_rate": 5.025659663751804e-06, "loss": 0.08550071716308594, "step": 61075 }, { "epoch": 0.5281406991725104, "grad_norm": 1.34207846354995, "learning_rate": 5.025509382132417e-06, "loss": 0.19530792236328126, "step": 61080 }, { "epoch": 0.5281839326940536, "grad_norm": 29.307141146309444, "learning_rate": 5.025359091171558e-06, "loss": 0.1988494873046875, "step": 61085 }, { "epoch": 0.528227166215597, "grad_norm": 24.294374244836302, "learning_rate": 5.025208790869922e-06, "loss": 0.15991668701171874, "step": 61090 }, { "epoch": 0.5282703997371402, "grad_norm": 1.7844017942628914, "learning_rate": 5.025058481228203e-06, "loss": 0.0557037353515625, "step": 61095 }, { "epoch": 0.5283136332586834, "grad_norm": 7.089182152459032, "learning_rate": 5.024908162247093e-06, "loss": 0.0692840576171875, "step": 61100 }, { "epoch": 0.5283568667802268, "grad_norm": 2.9168460214761027, "learning_rate": 5.0247578339272855e-06, "loss": 0.07988643646240234, "step": 61105 }, { "epoch": 0.52840010030177, "grad_norm": 1.9608516621596934, "learning_rate": 5.0246074962694744e-06, "loss": 0.1361083984375, "step": 61110 }, { "epoch": 0.5284433338233132, "grad_norm": 1.4524862973975647, "learning_rate": 5.024457149274351e-06, "loss": 0.08771495819091797, "step": 61115 }, { "epoch": 0.5284865673448566, "grad_norm": 14.841441345860439, "learning_rate": 5.024306792942611e-06, "loss": 0.266766357421875, "step": 61120 }, { "epoch": 0.5285298008663998, "grad_norm": 0.3736205021997576, "learning_rate": 5.024156427274947e-06, "loss": 0.07989301681518554, "step": 61125 }, { "epoch": 0.528573034387943, "grad_norm": 18.55503629638657, "learning_rate": 5.024006052272052e-06, "loss": 0.11578521728515626, "step": 61130 }, { "epoch": 0.5286162679094863, "grad_norm": 4.02978844141272, "learning_rate": 5.02385566793462e-06, "loss": 0.06352462768554687, "step": 61135 }, { "epoch": 0.5286595014310296, "grad_norm": 16.184601882244415, "learning_rate": 5.023705274263344e-06, "loss": 0.12215576171875, "step": 61140 }, { "epoch": 0.5287027349525728, "grad_norm": 6.581640879067845, "learning_rate": 5.023554871258919e-06, "loss": 0.15230560302734375, "step": 61145 }, { "epoch": 0.5287459684741161, "grad_norm": 2.191180581070319, "learning_rate": 5.023404458922038e-06, "loss": 0.0508087158203125, "step": 61150 }, { "epoch": 0.5287892019956594, "grad_norm": 1.9305288073252551, "learning_rate": 5.023254037253393e-06, "loss": 0.04097061157226563, "step": 61155 }, { "epoch": 0.5288324355172026, "grad_norm": 14.188247224026941, "learning_rate": 5.02310360625368e-06, "loss": 0.1585235595703125, "step": 61160 }, { "epoch": 0.5288756690387458, "grad_norm": 34.531117973176926, "learning_rate": 5.022953165923592e-06, "loss": 0.1431915283203125, "step": 61165 }, { "epoch": 0.5289189025602892, "grad_norm": 15.135295560775669, "learning_rate": 5.022802716263823e-06, "loss": 0.145684814453125, "step": 61170 }, { "epoch": 0.5289621360818324, "grad_norm": 7.447297818953242, "learning_rate": 5.0226522572750665e-06, "loss": 0.34654693603515624, "step": 61175 }, { "epoch": 0.5290053696033756, "grad_norm": 7.630251526335181, "learning_rate": 5.022501788958016e-06, "loss": 0.18269996643066405, "step": 61180 }, { "epoch": 0.529048603124919, "grad_norm": 39.08097314478949, "learning_rate": 5.022351311313367e-06, "loss": 0.24363555908203124, "step": 61185 }, { "epoch": 0.5290918366464622, "grad_norm": 2.3738476457903555, "learning_rate": 5.02220082434181e-06, "loss": 0.061102294921875, "step": 61190 }, { "epoch": 0.5291350701680054, "grad_norm": 0.9089330939776693, "learning_rate": 5.022050328044044e-06, "loss": 0.07621917724609376, "step": 61195 }, { "epoch": 0.5291783036895488, "grad_norm": 0.1388733520523295, "learning_rate": 5.02189982242076e-06, "loss": 0.09812660217285156, "step": 61200 }, { "epoch": 0.529221537211092, "grad_norm": 0.7527847095412671, "learning_rate": 5.021749307472652e-06, "loss": 0.06634712219238281, "step": 61205 }, { "epoch": 0.5292647707326352, "grad_norm": 13.230889840226194, "learning_rate": 5.021598783200416e-06, "loss": 0.11480064392089843, "step": 61210 }, { "epoch": 0.5293080042541786, "grad_norm": 0.3986872684683004, "learning_rate": 5.021448249604743e-06, "loss": 0.30268325805664065, "step": 61215 }, { "epoch": 0.5293512377757218, "grad_norm": 2.120813316466086, "learning_rate": 5.021297706686332e-06, "loss": 0.0866119384765625, "step": 61220 }, { "epoch": 0.529394471297265, "grad_norm": 7.195061857064411, "learning_rate": 5.021147154445873e-06, "loss": 0.2574485778808594, "step": 61225 }, { "epoch": 0.5294377048188084, "grad_norm": 2.9972985563002426, "learning_rate": 5.020996592884062e-06, "loss": 0.4555511474609375, "step": 61230 }, { "epoch": 0.5294809383403516, "grad_norm": 1.9395235598009388, "learning_rate": 5.020846022001593e-06, "loss": 0.04322357177734375, "step": 61235 }, { "epoch": 0.5295241718618948, "grad_norm": 27.84664251171026, "learning_rate": 5.020695441799161e-06, "loss": 0.1722391128540039, "step": 61240 }, { "epoch": 0.529567405383438, "grad_norm": 0.9323281762672613, "learning_rate": 5.020544852277461e-06, "loss": 0.1074737548828125, "step": 61245 }, { "epoch": 0.5296106389049814, "grad_norm": 0.3721635691281676, "learning_rate": 5.0203942534371855e-06, "loss": 0.02720470428466797, "step": 61250 }, { "epoch": 0.5296538724265246, "grad_norm": 19.212944626523427, "learning_rate": 5.020243645279031e-06, "loss": 0.08323707580566406, "step": 61255 }, { "epoch": 0.5296971059480678, "grad_norm": 4.67770062571488, "learning_rate": 5.020093027803691e-06, "loss": 0.1597259521484375, "step": 61260 }, { "epoch": 0.5297403394696112, "grad_norm": 10.37059256396749, "learning_rate": 5.01994240101186e-06, "loss": 0.07787017822265625, "step": 61265 }, { "epoch": 0.5297835729911544, "grad_norm": 4.246415300069037, "learning_rate": 5.019791764904234e-06, "loss": 0.059088134765625, "step": 61270 }, { "epoch": 0.5298268065126976, "grad_norm": 19.628919843077338, "learning_rate": 5.019641119481507e-06, "loss": 0.5797752380371094, "step": 61275 }, { "epoch": 0.529870040034241, "grad_norm": 27.88663398469511, "learning_rate": 5.019490464744372e-06, "loss": 0.183416748046875, "step": 61280 }, { "epoch": 0.5299132735557842, "grad_norm": 9.139899627465626, "learning_rate": 5.019339800693527e-06, "loss": 0.0350006103515625, "step": 61285 }, { "epoch": 0.5299565070773274, "grad_norm": 2.1894991273818007, "learning_rate": 5.019189127329665e-06, "loss": 0.112762451171875, "step": 61290 }, { "epoch": 0.5299997405988708, "grad_norm": 5.259079891347302, "learning_rate": 5.019038444653481e-06, "loss": 0.09023971557617187, "step": 61295 }, { "epoch": 0.530042974120414, "grad_norm": 34.32424948585085, "learning_rate": 5.01888775266567e-06, "loss": 0.22734909057617186, "step": 61300 }, { "epoch": 0.5300862076419572, "grad_norm": 4.019337207144552, "learning_rate": 5.018737051366926e-06, "loss": 0.06844863891601563, "step": 61305 }, { "epoch": 0.5301294411635006, "grad_norm": 20.106007059366025, "learning_rate": 5.018586340757947e-06, "loss": 0.206036376953125, "step": 61310 }, { "epoch": 0.5301726746850438, "grad_norm": 2.2766763493888367, "learning_rate": 5.018435620839425e-06, "loss": 0.4473297119140625, "step": 61315 }, { "epoch": 0.530215908206587, "grad_norm": 6.241570096441954, "learning_rate": 5.018284891612057e-06, "loss": 0.46949462890625, "step": 61320 }, { "epoch": 0.5302591417281303, "grad_norm": 16.688533370499172, "learning_rate": 5.018134153076537e-06, "loss": 0.15410003662109376, "step": 61325 }, { "epoch": 0.5303023752496736, "grad_norm": 6.059781155889411, "learning_rate": 5.017983405233561e-06, "loss": 0.306561279296875, "step": 61330 }, { "epoch": 0.5303456087712168, "grad_norm": 0.8116933092448254, "learning_rate": 5.017832648083823e-06, "loss": 0.02363243103027344, "step": 61335 }, { "epoch": 0.5303888422927601, "grad_norm": 6.68977994703583, "learning_rate": 5.01768188162802e-06, "loss": 0.10112457275390625, "step": 61340 }, { "epoch": 0.5304320758143034, "grad_norm": 14.702153543222625, "learning_rate": 5.017531105866845e-06, "loss": 0.14365158081054688, "step": 61345 }, { "epoch": 0.5304753093358466, "grad_norm": 0.3378727526748961, "learning_rate": 5.017380320800997e-06, "loss": 0.13987960815429687, "step": 61350 }, { "epoch": 0.5305185428573899, "grad_norm": 7.40804845419581, "learning_rate": 5.0172295264311675e-06, "loss": 0.07111129760742188, "step": 61355 }, { "epoch": 0.5305617763789332, "grad_norm": 3.295585540708523, "learning_rate": 5.017078722758054e-06, "loss": 0.17892723083496093, "step": 61360 }, { "epoch": 0.5306050099004764, "grad_norm": 8.42262379505072, "learning_rate": 5.016927909782352e-06, "loss": 0.35478515625, "step": 61365 }, { "epoch": 0.5306482434220197, "grad_norm": 13.873504382848777, "learning_rate": 5.016777087504757e-06, "loss": 0.23727569580078126, "step": 61370 }, { "epoch": 0.530691476943563, "grad_norm": 11.650166090164294, "learning_rate": 5.016626255925963e-06, "loss": 0.05111770629882813, "step": 61375 }, { "epoch": 0.5307347104651062, "grad_norm": 10.24480543343127, "learning_rate": 5.016475415046668e-06, "loss": 0.19844970703125, "step": 61380 }, { "epoch": 0.5307779439866495, "grad_norm": 9.586752619604308, "learning_rate": 5.016324564867567e-06, "loss": 0.4253795623779297, "step": 61385 }, { "epoch": 0.5308211775081928, "grad_norm": 36.746794459923215, "learning_rate": 5.016173705389354e-06, "loss": 0.21456565856933593, "step": 61390 }, { "epoch": 0.530864411029736, "grad_norm": 16.904256885081566, "learning_rate": 5.016022836612727e-06, "loss": 0.1432699203491211, "step": 61395 }, { "epoch": 0.5309076445512793, "grad_norm": 41.46318318911471, "learning_rate": 5.015871958538381e-06, "loss": 0.4136444091796875, "step": 61400 }, { "epoch": 0.5309508780728226, "grad_norm": 0.4367351917359215, "learning_rate": 5.01572107116701e-06, "loss": 0.24252243041992189, "step": 61405 }, { "epoch": 0.5309941115943658, "grad_norm": 1.6423191181525625, "learning_rate": 5.0155701744993135e-06, "loss": 0.1977325439453125, "step": 61410 }, { "epoch": 0.531037345115909, "grad_norm": 8.93346517422219, "learning_rate": 5.015419268535985e-06, "loss": 0.04122238159179688, "step": 61415 }, { "epoch": 0.5310805786374523, "grad_norm": 1.9349797692314479, "learning_rate": 5.01526835327772e-06, "loss": 0.10662841796875, "step": 61420 }, { "epoch": 0.5311238121589956, "grad_norm": 7.735807163519915, "learning_rate": 5.015117428725217e-06, "loss": 0.1288055419921875, "step": 61425 }, { "epoch": 0.5311670456805389, "grad_norm": 28.55256385900191, "learning_rate": 5.014966494879169e-06, "loss": 0.3568838119506836, "step": 61430 }, { "epoch": 0.5312102792020821, "grad_norm": 29.872553701073656, "learning_rate": 5.014815551740274e-06, "loss": 0.2221710205078125, "step": 61435 }, { "epoch": 0.5312535127236254, "grad_norm": 11.990704498358806, "learning_rate": 5.014664599309228e-06, "loss": 0.09202651977539063, "step": 61440 }, { "epoch": 0.5312967462451686, "grad_norm": 3.3998934199864053, "learning_rate": 5.0145136375867256e-06, "loss": 0.1007965087890625, "step": 61445 }, { "epoch": 0.5313399797667119, "grad_norm": 5.045240923080389, "learning_rate": 5.0143626665734656e-06, "loss": 0.072808837890625, "step": 61450 }, { "epoch": 0.5313832132882552, "grad_norm": 20.20998168348255, "learning_rate": 5.014211686270143e-06, "loss": 0.08595657348632812, "step": 61455 }, { "epoch": 0.5314264468097984, "grad_norm": 1.4197517845546819, "learning_rate": 5.014060696677453e-06, "loss": 0.09164581298828126, "step": 61460 }, { "epoch": 0.5314696803313417, "grad_norm": 19.008653587264114, "learning_rate": 5.013909697796094e-06, "loss": 0.0848541259765625, "step": 61465 }, { "epoch": 0.531512913852885, "grad_norm": 0.06414600511641495, "learning_rate": 5.013758689626761e-06, "loss": 0.17332763671875, "step": 61470 }, { "epoch": 0.5315561473744282, "grad_norm": 9.55375123824681, "learning_rate": 5.013607672170152e-06, "loss": 0.385113525390625, "step": 61475 }, { "epoch": 0.5315993808959715, "grad_norm": 1.261397103342932, "learning_rate": 5.01345664542696e-06, "loss": 0.0743682861328125, "step": 61480 }, { "epoch": 0.5316426144175148, "grad_norm": 3.8914204014310156, "learning_rate": 5.013305609397886e-06, "loss": 0.1075897216796875, "step": 61485 }, { "epoch": 0.531685847939058, "grad_norm": 1.2861483972403025, "learning_rate": 5.013154564083624e-06, "loss": 0.0884307861328125, "step": 61490 }, { "epoch": 0.5317290814606013, "grad_norm": 14.229453582451443, "learning_rate": 5.013003509484871e-06, "loss": 0.22603607177734375, "step": 61495 }, { "epoch": 0.5317723149821445, "grad_norm": 0.38697514572334735, "learning_rate": 5.0128524456023235e-06, "loss": 0.14812889099121093, "step": 61500 }, { "epoch": 0.5318155485036878, "grad_norm": 14.148298636188496, "learning_rate": 5.012701372436678e-06, "loss": 0.30843353271484375, "step": 61505 }, { "epoch": 0.5318587820252311, "grad_norm": 9.546249427841069, "learning_rate": 5.012550289988633e-06, "loss": 0.0716522216796875, "step": 61510 }, { "epoch": 0.5319020155467743, "grad_norm": 5.60820451791156, "learning_rate": 5.012399198258882e-06, "loss": 0.144329833984375, "step": 61515 }, { "epoch": 0.5319452490683176, "grad_norm": 0.3558527638858737, "learning_rate": 5.012248097248124e-06, "loss": 0.3224067687988281, "step": 61520 }, { "epoch": 0.5319884825898609, "grad_norm": 16.558364310679124, "learning_rate": 5.012096986957056e-06, "loss": 0.10224838256835937, "step": 61525 }, { "epoch": 0.5320317161114041, "grad_norm": 11.852594750334287, "learning_rate": 5.011945867386375e-06, "loss": 0.1215423583984375, "step": 61530 }, { "epoch": 0.5320749496329474, "grad_norm": 32.46843788566901, "learning_rate": 5.011794738536778e-06, "loss": 0.32906494140625, "step": 61535 }, { "epoch": 0.5321181831544907, "grad_norm": 2.6481909257854346, "learning_rate": 5.011643600408959e-06, "loss": 0.1917144775390625, "step": 61540 }, { "epoch": 0.5321614166760339, "grad_norm": 0.5895001297438618, "learning_rate": 5.011492453003619e-06, "loss": 0.39024200439453127, "step": 61545 }, { "epoch": 0.5322046501975772, "grad_norm": 5.240525816325731, "learning_rate": 5.011341296321454e-06, "loss": 0.1384105682373047, "step": 61550 }, { "epoch": 0.5322478837191205, "grad_norm": 17.426591261329957, "learning_rate": 5.01119013036316e-06, "loss": 0.15027694702148436, "step": 61555 }, { "epoch": 0.5322911172406637, "grad_norm": 7.193070103010632, "learning_rate": 5.011038955129435e-06, "loss": 0.120941162109375, "step": 61560 }, { "epoch": 0.532334350762207, "grad_norm": 7.831683360315983, "learning_rate": 5.010887770620976e-06, "loss": 0.1637969970703125, "step": 61565 }, { "epoch": 0.5323775842837503, "grad_norm": 10.906199491855627, "learning_rate": 5.0107365768384795e-06, "loss": 0.230828857421875, "step": 61570 }, { "epoch": 0.5324208178052935, "grad_norm": 6.679333548317462, "learning_rate": 5.010585373782645e-06, "loss": 0.121954345703125, "step": 61575 }, { "epoch": 0.5324640513268368, "grad_norm": 1.8840416767635453, "learning_rate": 5.010434161454168e-06, "loss": 0.3274658203125, "step": 61580 }, { "epoch": 0.5325072848483801, "grad_norm": 53.53469181047533, "learning_rate": 5.010282939853746e-06, "loss": 0.17048492431640624, "step": 61585 }, { "epoch": 0.5325505183699233, "grad_norm": 22.06267729735373, "learning_rate": 5.010131708982077e-06, "loss": 0.17246551513671876, "step": 61590 }, { "epoch": 0.5325937518914665, "grad_norm": 47.984129911116305, "learning_rate": 5.009980468839858e-06, "loss": 0.19869461059570312, "step": 61595 }, { "epoch": 0.5326369854130099, "grad_norm": 1.9143912400003276, "learning_rate": 5.0098292194277865e-06, "loss": 0.33382415771484375, "step": 61600 }, { "epoch": 0.5326802189345531, "grad_norm": 4.7637415552549545, "learning_rate": 5.009677960746561e-06, "loss": 0.0970916748046875, "step": 61605 }, { "epoch": 0.5327234524560963, "grad_norm": 19.80349831526252, "learning_rate": 5.0095266927968775e-06, "loss": 0.24241943359375, "step": 61610 }, { "epoch": 0.5327666859776397, "grad_norm": 4.528312450819428, "learning_rate": 5.009375415579434e-06, "loss": 0.1043670654296875, "step": 61615 }, { "epoch": 0.5328099194991829, "grad_norm": 34.61610819892928, "learning_rate": 5.00922412909493e-06, "loss": 0.2777740478515625, "step": 61620 }, { "epoch": 0.5328531530207261, "grad_norm": 23.828248587956757, "learning_rate": 5.0090728333440615e-06, "loss": 0.21449432373046876, "step": 61625 }, { "epoch": 0.5328963865422695, "grad_norm": 14.487574972796326, "learning_rate": 5.008921528327525e-06, "loss": 0.13088951110839844, "step": 61630 }, { "epoch": 0.5329396200638127, "grad_norm": 0.33945850627606233, "learning_rate": 5.008770214046022e-06, "loss": 0.06642799377441407, "step": 61635 }, { "epoch": 0.5329828535853559, "grad_norm": 10.57444485880408, "learning_rate": 5.008618890500248e-06, "loss": 0.2793769836425781, "step": 61640 }, { "epoch": 0.5330260871068992, "grad_norm": 25.025280138807663, "learning_rate": 5.0084675576909006e-06, "loss": 0.10722808837890625, "step": 61645 }, { "epoch": 0.5330693206284425, "grad_norm": 0.18378223057727489, "learning_rate": 5.008316215618678e-06, "loss": 0.2255523681640625, "step": 61650 }, { "epoch": 0.5331125541499857, "grad_norm": 2.1038853496897136, "learning_rate": 5.008164864284279e-06, "loss": 0.0334014892578125, "step": 61655 }, { "epoch": 0.533155787671529, "grad_norm": 9.203339954659556, "learning_rate": 5.0080135036884e-06, "loss": 0.1480804443359375, "step": 61660 }, { "epoch": 0.5331990211930723, "grad_norm": 3.2405654838306637, "learning_rate": 5.007862133831741e-06, "loss": 0.051019287109375, "step": 61665 }, { "epoch": 0.5332422547146155, "grad_norm": 40.41329893648348, "learning_rate": 5.007710754714999e-06, "loss": 0.27520599365234377, "step": 61670 }, { "epoch": 0.5332854882361587, "grad_norm": 0.4256255180001555, "learning_rate": 5.007559366338874e-06, "loss": 0.16578521728515624, "step": 61675 }, { "epoch": 0.5333287217577021, "grad_norm": 3.8271469448676423, "learning_rate": 5.007407968704061e-06, "loss": 0.11541328430175782, "step": 61680 }, { "epoch": 0.5333719552792453, "grad_norm": 9.583784872157189, "learning_rate": 5.00725656181126e-06, "loss": 0.06396331787109374, "step": 61685 }, { "epoch": 0.5334151888007885, "grad_norm": 2.8387202972769385, "learning_rate": 5.00710514566117e-06, "loss": 0.06010589599609375, "step": 61690 }, { "epoch": 0.5334584223223319, "grad_norm": 36.039265993173565, "learning_rate": 5.006953720254487e-06, "loss": 0.3034454345703125, "step": 61695 }, { "epoch": 0.5335016558438751, "grad_norm": 8.003377074753033, "learning_rate": 5.0068022855919115e-06, "loss": 0.460675048828125, "step": 61700 }, { "epoch": 0.5335448893654183, "grad_norm": 1.1666553805929596, "learning_rate": 5.006650841674141e-06, "loss": 0.175439453125, "step": 61705 }, { "epoch": 0.5335881228869617, "grad_norm": 45.350517224581125, "learning_rate": 5.006499388501874e-06, "loss": 0.31366119384765623, "step": 61710 }, { "epoch": 0.5336313564085049, "grad_norm": 1.8819662881914028, "learning_rate": 5.00634792607581e-06, "loss": 0.051569366455078126, "step": 61715 }, { "epoch": 0.5336745899300481, "grad_norm": 33.00067609190403, "learning_rate": 5.006196454396646e-06, "loss": 0.2245410919189453, "step": 61720 }, { "epoch": 0.5337178234515915, "grad_norm": 7.630342312884826, "learning_rate": 5.006044973465081e-06, "loss": 0.1376667022705078, "step": 61725 }, { "epoch": 0.5337610569731347, "grad_norm": 0.5808265423103668, "learning_rate": 5.005893483281813e-06, "loss": 0.07956657409667969, "step": 61730 }, { "epoch": 0.5338042904946779, "grad_norm": 1.7711563763983162, "learning_rate": 5.005741983847543e-06, "loss": 0.09769058227539062, "step": 61735 }, { "epoch": 0.5338475240162213, "grad_norm": 1.102338328625633, "learning_rate": 5.005590475162968e-06, "loss": 0.09283218383789063, "step": 61740 }, { "epoch": 0.5338907575377645, "grad_norm": 5.7629074059307, "learning_rate": 5.005438957228786e-06, "loss": 0.1125152587890625, "step": 61745 }, { "epoch": 0.5339339910593077, "grad_norm": 9.99574404923431, "learning_rate": 5.005287430045698e-06, "loss": 0.08738479614257813, "step": 61750 }, { "epoch": 0.5339772245808511, "grad_norm": 7.740973481761607, "learning_rate": 5.005135893614401e-06, "loss": 0.0744049072265625, "step": 61755 }, { "epoch": 0.5340204581023943, "grad_norm": 1.54653918741715, "learning_rate": 5.004984347935595e-06, "loss": 0.422528076171875, "step": 61760 }, { "epoch": 0.5340636916239375, "grad_norm": 22.167164337975798, "learning_rate": 5.004832793009977e-06, "loss": 0.18421173095703125, "step": 61765 }, { "epoch": 0.5341069251454807, "grad_norm": 6.388965589443219, "learning_rate": 5.004681228838249e-06, "loss": 0.2003631591796875, "step": 61770 }, { "epoch": 0.5341501586670241, "grad_norm": 10.984856796530316, "learning_rate": 5.0045296554211065e-06, "loss": 0.19011993408203126, "step": 61775 }, { "epoch": 0.5341933921885673, "grad_norm": 2.7393553639481287, "learning_rate": 5.004378072759252e-06, "loss": 0.030712890625, "step": 61780 }, { "epoch": 0.5342366257101105, "grad_norm": 18.579243359749334, "learning_rate": 5.004226480853382e-06, "loss": 0.079888916015625, "step": 61785 }, { "epoch": 0.5342798592316539, "grad_norm": 4.439534158207288, "learning_rate": 5.004074879704196e-06, "loss": 0.21446380615234376, "step": 61790 }, { "epoch": 0.5343230927531971, "grad_norm": 7.054786278435126, "learning_rate": 5.003923269312395e-06, "loss": 0.4635406494140625, "step": 61795 }, { "epoch": 0.5343663262747403, "grad_norm": 1.7736230340301795, "learning_rate": 5.003771649678677e-06, "loss": 0.06491241455078126, "step": 61800 }, { "epoch": 0.5344095597962837, "grad_norm": 22.11903232215577, "learning_rate": 5.00362002080374e-06, "loss": 0.24793243408203125, "step": 61805 }, { "epoch": 0.5344527933178269, "grad_norm": 27.037932562184448, "learning_rate": 5.003468382688286e-06, "loss": 0.255780029296875, "step": 61810 }, { "epoch": 0.5344960268393701, "grad_norm": 1.2153065569443575, "learning_rate": 5.003316735333011e-06, "loss": 0.16551780700683594, "step": 61815 }, { "epoch": 0.5345392603609135, "grad_norm": 5.696461144726234, "learning_rate": 5.003165078738618e-06, "loss": 0.12307891845703126, "step": 61820 }, { "epoch": 0.5345824938824567, "grad_norm": 23.02948801000081, "learning_rate": 5.003013412905804e-06, "loss": 0.10767974853515624, "step": 61825 }, { "epoch": 0.5346257274039999, "grad_norm": 5.40263019033317, "learning_rate": 5.002861737835269e-06, "loss": 0.20289306640625, "step": 61830 }, { "epoch": 0.5346689609255433, "grad_norm": 0.7460474629118491, "learning_rate": 5.002710053527712e-06, "loss": 0.2199127197265625, "step": 61835 }, { "epoch": 0.5347121944470865, "grad_norm": 0.5752279048962938, "learning_rate": 5.002558359983834e-06, "loss": 0.20742855072021485, "step": 61840 }, { "epoch": 0.5347554279686297, "grad_norm": 0.8357307815231886, "learning_rate": 5.002406657204334e-06, "loss": 0.1013336181640625, "step": 61845 }, { "epoch": 0.534798661490173, "grad_norm": 1.1415106445281356, "learning_rate": 5.002254945189911e-06, "loss": 0.20628738403320312, "step": 61850 }, { "epoch": 0.5348418950117163, "grad_norm": 3.045408996075672, "learning_rate": 5.002103223941265e-06, "loss": 0.14630126953125, "step": 61855 }, { "epoch": 0.5348851285332595, "grad_norm": 0.30150955465650325, "learning_rate": 5.001951493459097e-06, "loss": 0.29913330078125, "step": 61860 }, { "epoch": 0.5349283620548028, "grad_norm": 3.9771824252628414, "learning_rate": 5.001799753744104e-06, "loss": 0.15744552612304688, "step": 61865 }, { "epoch": 0.5349715955763461, "grad_norm": 8.426802832882593, "learning_rate": 5.0016480047969875e-06, "loss": 0.13282127380371095, "step": 61870 }, { "epoch": 0.5350148290978893, "grad_norm": 53.79290244346885, "learning_rate": 5.001496246618448e-06, "loss": 0.101678466796875, "step": 61875 }, { "epoch": 0.5350580626194326, "grad_norm": 17.985870823561893, "learning_rate": 5.001344479209186e-06, "loss": 0.13920822143554687, "step": 61880 }, { "epoch": 0.5351012961409759, "grad_norm": 3.0024626604824047, "learning_rate": 5.001192702569897e-06, "loss": 0.173480224609375, "step": 61885 }, { "epoch": 0.5351445296625191, "grad_norm": 9.943303840819734, "learning_rate": 5.001040916701286e-06, "loss": 0.0510009765625, "step": 61890 }, { "epoch": 0.5351877631840624, "grad_norm": 1.8587619054382416, "learning_rate": 5.000889121604051e-06, "loss": 0.04316253662109375, "step": 61895 }, { "epoch": 0.5352309967056057, "grad_norm": 34.60993714861635, "learning_rate": 5.000737317278892e-06, "loss": 0.3241233825683594, "step": 61900 }, { "epoch": 0.5352742302271489, "grad_norm": 0.27512490621733604, "learning_rate": 5.000585503726509e-06, "loss": 0.09819717407226562, "step": 61905 }, { "epoch": 0.5353174637486922, "grad_norm": 2.310572260306316, "learning_rate": 5.000433680947603e-06, "loss": 0.06484222412109375, "step": 61910 }, { "epoch": 0.5353606972702355, "grad_norm": 6.151099651902213, "learning_rate": 5.000281848942873e-06, "loss": 0.2369140625, "step": 61915 }, { "epoch": 0.5354039307917787, "grad_norm": 12.471722111297066, "learning_rate": 5.00013000771302e-06, "loss": 0.1627948760986328, "step": 61920 }, { "epoch": 0.535447164313322, "grad_norm": 11.856822011273092, "learning_rate": 4.999978157258745e-06, "loss": 0.4499176025390625, "step": 61925 }, { "epoch": 0.5354903978348653, "grad_norm": 3.094734562868667, "learning_rate": 4.999826297580746e-06, "loss": 0.08703079223632812, "step": 61930 }, { "epoch": 0.5355336313564085, "grad_norm": 5.090562535909471, "learning_rate": 4.999674428679726e-06, "loss": 0.15683441162109374, "step": 61935 }, { "epoch": 0.5355768648779518, "grad_norm": 10.320401417912693, "learning_rate": 4.999522550556384e-06, "loss": 0.08795051574707032, "step": 61940 }, { "epoch": 0.535620098399495, "grad_norm": 20.347446530692455, "learning_rate": 4.999370663211421e-06, "loss": 0.21405715942382814, "step": 61945 }, { "epoch": 0.5356633319210383, "grad_norm": 2.4810242791129467, "learning_rate": 4.999218766645536e-06, "loss": 0.39979400634765627, "step": 61950 }, { "epoch": 0.5357065654425816, "grad_norm": 15.154141756177335, "learning_rate": 4.999066860859432e-06, "loss": 0.13156890869140625, "step": 61955 }, { "epoch": 0.5357497989641248, "grad_norm": 2.6860505808498356, "learning_rate": 4.998914945853807e-06, "loss": 0.0283721923828125, "step": 61960 }, { "epoch": 0.5357930324856681, "grad_norm": 1.830066415464161, "learning_rate": 4.998763021629364e-06, "loss": 0.2475616455078125, "step": 61965 }, { "epoch": 0.5358362660072113, "grad_norm": 18.322192829495624, "learning_rate": 4.9986110881868015e-06, "loss": 0.11736907958984374, "step": 61970 }, { "epoch": 0.5358794995287546, "grad_norm": 7.106734945062029, "learning_rate": 4.998459145526822e-06, "loss": 0.16661376953125, "step": 61975 }, { "epoch": 0.5359227330502979, "grad_norm": 19.471946544668047, "learning_rate": 4.998307193650125e-06, "loss": 0.3442352294921875, "step": 61980 }, { "epoch": 0.5359659665718411, "grad_norm": 33.65810153528926, "learning_rate": 4.998155232557412e-06, "loss": 0.2804130554199219, "step": 61985 }, { "epoch": 0.5360092000933844, "grad_norm": 22.67848646880795, "learning_rate": 4.998003262249385e-06, "loss": 0.4632904052734375, "step": 61990 }, { "epoch": 0.5360524336149277, "grad_norm": 37.011995749832835, "learning_rate": 4.997851282726741e-06, "loss": 0.44466094970703124, "step": 61995 }, { "epoch": 0.5360956671364709, "grad_norm": 8.708852316395193, "learning_rate": 4.997699293990186e-06, "loss": 0.0514556884765625, "step": 62000 }, { "epoch": 0.5361389006580142, "grad_norm": 0.5321740879213046, "learning_rate": 4.997547296040417e-06, "loss": 0.06713104248046875, "step": 62005 }, { "epoch": 0.5361821341795575, "grad_norm": 5.124506476791877, "learning_rate": 4.997395288878136e-06, "loss": 0.307958984375, "step": 62010 }, { "epoch": 0.5362253677011007, "grad_norm": 10.030022576999402, "learning_rate": 4.997243272504045e-06, "loss": 0.1623382568359375, "step": 62015 }, { "epoch": 0.536268601222644, "grad_norm": 17.428371020594, "learning_rate": 4.997091246918845e-06, "loss": 0.17445907592773438, "step": 62020 }, { "epoch": 0.5363118347441872, "grad_norm": 1.1734869606913543, "learning_rate": 4.9969392121232356e-06, "loss": 0.3726104736328125, "step": 62025 }, { "epoch": 0.5363550682657305, "grad_norm": 1.0055214461262807, "learning_rate": 4.996787168117919e-06, "loss": 0.12009849548339843, "step": 62030 }, { "epoch": 0.5363983017872738, "grad_norm": 18.265794747178173, "learning_rate": 4.996635114903597e-06, "loss": 0.14145240783691407, "step": 62035 }, { "epoch": 0.536441535308817, "grad_norm": 2.6200391825409244, "learning_rate": 4.996483052480971e-06, "loss": 0.28477783203125, "step": 62040 }, { "epoch": 0.5364847688303603, "grad_norm": 23.451291415187338, "learning_rate": 4.9963309808507405e-06, "loss": 0.3263153076171875, "step": 62045 }, { "epoch": 0.5365280023519036, "grad_norm": 30.345394616849056, "learning_rate": 4.996178900013607e-06, "loss": 0.2162200927734375, "step": 62050 }, { "epoch": 0.5365712358734468, "grad_norm": 5.692101426321659, "learning_rate": 4.996026809970275e-06, "loss": 0.15313720703125, "step": 62055 }, { "epoch": 0.5366144693949901, "grad_norm": 18.707016277905762, "learning_rate": 4.995874710721442e-06, "loss": 0.22582244873046875, "step": 62060 }, { "epoch": 0.5366577029165334, "grad_norm": 43.23444743973761, "learning_rate": 4.995722602267811e-06, "loss": 0.20889892578125, "step": 62065 }, { "epoch": 0.5367009364380766, "grad_norm": 42.13169963828679, "learning_rate": 4.995570484610085e-06, "loss": 0.22325439453125, "step": 62070 }, { "epoch": 0.5367441699596199, "grad_norm": 26.11389660413531, "learning_rate": 4.9954183577489635e-06, "loss": 0.2374725341796875, "step": 62075 }, { "epoch": 0.5367874034811632, "grad_norm": 3.6288488179061007, "learning_rate": 4.99526622168515e-06, "loss": 0.249493408203125, "step": 62080 }, { "epoch": 0.5368306370027064, "grad_norm": 12.78041047800833, "learning_rate": 4.9951140764193445e-06, "loss": 0.1171234130859375, "step": 62085 }, { "epoch": 0.5368738705242497, "grad_norm": 3.6922639168349036, "learning_rate": 4.994961921952248e-06, "loss": 0.23956298828125, "step": 62090 }, { "epoch": 0.536917104045793, "grad_norm": 5.366856928011813, "learning_rate": 4.994809758284565e-06, "loss": 0.0364837646484375, "step": 62095 }, { "epoch": 0.5369603375673362, "grad_norm": 4.391014446033003, "learning_rate": 4.994657585416995e-06, "loss": 0.029193878173828125, "step": 62100 }, { "epoch": 0.5370035710888795, "grad_norm": 0.20404519975688876, "learning_rate": 4.994505403350239e-06, "loss": 0.0959197998046875, "step": 62105 }, { "epoch": 0.5370468046104228, "grad_norm": 0.7295833137152301, "learning_rate": 4.9943532120850015e-06, "loss": 0.05809478759765625, "step": 62110 }, { "epoch": 0.537090038131966, "grad_norm": 1.7860519468902827, "learning_rate": 4.994201011621985e-06, "loss": 0.07546157836914062, "step": 62115 }, { "epoch": 0.5371332716535092, "grad_norm": 0.2321726143580119, "learning_rate": 4.9940488019618875e-06, "loss": 0.0764862060546875, "step": 62120 }, { "epoch": 0.5371765051750526, "grad_norm": 4.109280749647618, "learning_rate": 4.993896583105414e-06, "loss": 0.13754920959472655, "step": 62125 }, { "epoch": 0.5372197386965958, "grad_norm": 6.261705900369135, "learning_rate": 4.993744355053265e-06, "loss": 0.0324371337890625, "step": 62130 }, { "epoch": 0.537262972218139, "grad_norm": 0.988452411507858, "learning_rate": 4.9935921178061425e-06, "loss": 0.05429840087890625, "step": 62135 }, { "epoch": 0.5373062057396824, "grad_norm": 17.117548213661355, "learning_rate": 4.993439871364751e-06, "loss": 0.2397705078125, "step": 62140 }, { "epoch": 0.5373494392612256, "grad_norm": 97.97955065473442, "learning_rate": 4.993287615729791e-06, "loss": 0.19125213623046874, "step": 62145 }, { "epoch": 0.5373926727827688, "grad_norm": 25.120350221829703, "learning_rate": 4.993135350901962e-06, "loss": 0.23453369140625, "step": 62150 }, { "epoch": 0.5374359063043121, "grad_norm": 31.762047140895454, "learning_rate": 4.992983076881972e-06, "loss": 0.10420951843261719, "step": 62155 }, { "epoch": 0.5374791398258554, "grad_norm": 10.346091040676425, "learning_rate": 4.992830793670519e-06, "loss": 0.19626846313476562, "step": 62160 }, { "epoch": 0.5375223733473986, "grad_norm": 20.45151866680968, "learning_rate": 4.9926785012683065e-06, "loss": 0.12545623779296874, "step": 62165 }, { "epoch": 0.537565606868942, "grad_norm": 8.809211990301682, "learning_rate": 4.992526199676036e-06, "loss": 0.1150421142578125, "step": 62170 }, { "epoch": 0.5376088403904852, "grad_norm": 12.375690046776292, "learning_rate": 4.992373888894412e-06, "loss": 0.10975799560546876, "step": 62175 }, { "epoch": 0.5376520739120284, "grad_norm": 13.18270658494109, "learning_rate": 4.9922215689241355e-06, "loss": 0.3920562744140625, "step": 62180 }, { "epoch": 0.5376953074335717, "grad_norm": 4.9054192470714755, "learning_rate": 4.992069239765908e-06, "loss": 0.49599151611328124, "step": 62185 }, { "epoch": 0.537738540955115, "grad_norm": 2.91470262154007, "learning_rate": 4.991916901420434e-06, "loss": 0.1333251953125, "step": 62190 }, { "epoch": 0.5377817744766582, "grad_norm": 7.917048582598327, "learning_rate": 4.9917645538884154e-06, "loss": 0.0591644287109375, "step": 62195 }, { "epoch": 0.5378250079982014, "grad_norm": 27.365995166521166, "learning_rate": 4.991612197170554e-06, "loss": 0.3071311950683594, "step": 62200 }, { "epoch": 0.5378682415197448, "grad_norm": 20.798338869513074, "learning_rate": 4.991459831267554e-06, "loss": 0.09171371459960938, "step": 62205 }, { "epoch": 0.537911475041288, "grad_norm": 13.693727818199374, "learning_rate": 4.991307456180117e-06, "loss": 0.1520923614501953, "step": 62210 }, { "epoch": 0.5379547085628312, "grad_norm": 1.3264341028124302, "learning_rate": 4.991155071908946e-06, "loss": 0.3759635925292969, "step": 62215 }, { "epoch": 0.5379979420843746, "grad_norm": 57.744065966365596, "learning_rate": 4.991002678454742e-06, "loss": 0.23402481079101561, "step": 62220 }, { "epoch": 0.5380411756059178, "grad_norm": 4.974120524223735, "learning_rate": 4.990850275818212e-06, "loss": 0.22563858032226564, "step": 62225 }, { "epoch": 0.538084409127461, "grad_norm": 8.179191511622106, "learning_rate": 4.990697864000056e-06, "loss": 0.1073028564453125, "step": 62230 }, { "epoch": 0.5381276426490044, "grad_norm": 11.591352253208981, "learning_rate": 4.990545443000977e-06, "loss": 0.264202880859375, "step": 62235 }, { "epoch": 0.5381708761705476, "grad_norm": 0.6428615030367566, "learning_rate": 4.990393012821678e-06, "loss": 0.04548425674438476, "step": 62240 }, { "epoch": 0.5382141096920908, "grad_norm": 13.782037157048943, "learning_rate": 4.990240573462863e-06, "loss": 0.136083984375, "step": 62245 }, { "epoch": 0.5382573432136342, "grad_norm": 0.2652564194046113, "learning_rate": 4.990088124925233e-06, "loss": 0.124383544921875, "step": 62250 }, { "epoch": 0.5383005767351774, "grad_norm": 20.76722105305847, "learning_rate": 4.989935667209494e-06, "loss": 0.24971237182617187, "step": 62255 }, { "epoch": 0.5383438102567206, "grad_norm": 1.302026391157295, "learning_rate": 4.989783200316347e-06, "loss": 0.10714941024780274, "step": 62260 }, { "epoch": 0.538387043778264, "grad_norm": 3.3233301972926883, "learning_rate": 4.989630724246495e-06, "loss": 0.24939422607421874, "step": 62265 }, { "epoch": 0.5384302772998072, "grad_norm": 1.7457838134102213, "learning_rate": 4.989478239000643e-06, "loss": 0.13114089965820314, "step": 62270 }, { "epoch": 0.5384735108213504, "grad_norm": 27.5084559660309, "learning_rate": 4.9893257445794925e-06, "loss": 0.1884441375732422, "step": 62275 }, { "epoch": 0.5385167443428938, "grad_norm": 23.604832785924998, "learning_rate": 4.9891732409837465e-06, "loss": 0.41022567749023436, "step": 62280 }, { "epoch": 0.538559977864437, "grad_norm": 2.3844994386264453, "learning_rate": 4.989020728214111e-06, "loss": 0.298187255859375, "step": 62285 }, { "epoch": 0.5386032113859802, "grad_norm": 8.76228536146049, "learning_rate": 4.988868206271287e-06, "loss": 0.30767822265625, "step": 62290 }, { "epoch": 0.5386464449075234, "grad_norm": 10.106874130508988, "learning_rate": 4.988715675155979e-06, "loss": 0.11443367004394531, "step": 62295 }, { "epoch": 0.5386896784290668, "grad_norm": 4.471899319532895, "learning_rate": 4.988563134868889e-06, "loss": 0.17056198120117189, "step": 62300 }, { "epoch": 0.53873291195061, "grad_norm": 9.340795460573318, "learning_rate": 4.988410585410722e-06, "loss": 0.21633453369140626, "step": 62305 }, { "epoch": 0.5387761454721532, "grad_norm": 10.689459077325708, "learning_rate": 4.988258026782182e-06, "loss": 0.137322998046875, "step": 62310 }, { "epoch": 0.5388193789936966, "grad_norm": 22.755213991718904, "learning_rate": 4.98810545898397e-06, "loss": 0.3515468597412109, "step": 62315 }, { "epoch": 0.5388626125152398, "grad_norm": 5.614846548331474, "learning_rate": 4.987952882016793e-06, "loss": 0.1970855712890625, "step": 62320 }, { "epoch": 0.538905846036783, "grad_norm": 93.02338853562833, "learning_rate": 4.987800295881351e-06, "loss": 0.6518936157226562, "step": 62325 }, { "epoch": 0.5389490795583264, "grad_norm": 2.3383943731992733, "learning_rate": 4.987647700578351e-06, "loss": 0.07755584716796875, "step": 62330 }, { "epoch": 0.5389923130798696, "grad_norm": 22.65646355110409, "learning_rate": 4.987495096108496e-06, "loss": 0.321685791015625, "step": 62335 }, { "epoch": 0.5390355466014128, "grad_norm": 33.257557385235586, "learning_rate": 4.987342482472488e-06, "loss": 0.13148212432861328, "step": 62340 }, { "epoch": 0.5390787801229562, "grad_norm": 13.446993106651222, "learning_rate": 4.987189859671032e-06, "loss": 0.130230712890625, "step": 62345 }, { "epoch": 0.5391220136444994, "grad_norm": 1.708694954637702, "learning_rate": 4.9870372277048325e-06, "loss": 0.037412261962890624, "step": 62350 }, { "epoch": 0.5391652471660426, "grad_norm": 1.9350041166012057, "learning_rate": 4.9868845865745935e-06, "loss": 0.0475738525390625, "step": 62355 }, { "epoch": 0.539208480687586, "grad_norm": 2.793515582123683, "learning_rate": 4.9867319362810174e-06, "loss": 0.058905029296875, "step": 62360 }, { "epoch": 0.5392517142091292, "grad_norm": 33.55339688565548, "learning_rate": 4.986579276824809e-06, "loss": 0.08532485961914063, "step": 62365 }, { "epoch": 0.5392949477306724, "grad_norm": 5.697546992949083, "learning_rate": 4.986426608206674e-06, "loss": 0.1273895263671875, "step": 62370 }, { "epoch": 0.5393381812522157, "grad_norm": 32.58334597368, "learning_rate": 4.986273930427313e-06, "loss": 0.3612548828125, "step": 62375 }, { "epoch": 0.539381414773759, "grad_norm": 0.5149425416167862, "learning_rate": 4.986121243487434e-06, "loss": 0.2203369140625, "step": 62380 }, { "epoch": 0.5394246482953022, "grad_norm": 28.06693417592731, "learning_rate": 4.985968547387738e-06, "loss": 0.45821590423583985, "step": 62385 }, { "epoch": 0.5394678818168455, "grad_norm": 1.8518698781453309, "learning_rate": 4.985815842128931e-06, "loss": 0.07980060577392578, "step": 62390 }, { "epoch": 0.5395111153383888, "grad_norm": 17.941627908062216, "learning_rate": 4.985663127711716e-06, "loss": 0.428533935546875, "step": 62395 }, { "epoch": 0.539554348859932, "grad_norm": 0.21178439210940136, "learning_rate": 4.985510404136799e-06, "loss": 0.103485107421875, "step": 62400 }, { "epoch": 0.5395975823814753, "grad_norm": 9.341119338566713, "learning_rate": 4.985357671404884e-06, "loss": 0.1859375, "step": 62405 }, { "epoch": 0.5396408159030186, "grad_norm": 16.290699442512413, "learning_rate": 4.9852049295166746e-06, "loss": 0.2903293609619141, "step": 62410 }, { "epoch": 0.5396840494245618, "grad_norm": 0.8675057557044608, "learning_rate": 4.985052178472875e-06, "loss": 0.2836006164550781, "step": 62415 }, { "epoch": 0.5397272829461051, "grad_norm": 7.566008157006689, "learning_rate": 4.98489941827419e-06, "loss": 0.10753936767578125, "step": 62420 }, { "epoch": 0.5397705164676484, "grad_norm": 1.7251003992412424, "learning_rate": 4.984746648921324e-06, "loss": 0.14370880126953126, "step": 62425 }, { "epoch": 0.5398137499891916, "grad_norm": 1.2310718324135173, "learning_rate": 4.984593870414983e-06, "loss": 0.07596206665039062, "step": 62430 }, { "epoch": 0.5398569835107349, "grad_norm": 8.855694471008338, "learning_rate": 4.98444108275587e-06, "loss": 0.1758331298828125, "step": 62435 }, { "epoch": 0.5399002170322782, "grad_norm": 4.1150956694947824, "learning_rate": 4.9842882859446895e-06, "loss": 0.11917190551757813, "step": 62440 }, { "epoch": 0.5399434505538214, "grad_norm": 29.050241176307964, "learning_rate": 4.984135479982147e-06, "loss": 0.1512939453125, "step": 62445 }, { "epoch": 0.5399866840753647, "grad_norm": 1.2676882756671906, "learning_rate": 4.983982664868947e-06, "loss": 0.016647720336914064, "step": 62450 }, { "epoch": 0.540029917596908, "grad_norm": 52.591832705431045, "learning_rate": 4.983829840605795e-06, "loss": 0.24900970458984376, "step": 62455 }, { "epoch": 0.5400731511184512, "grad_norm": 24.356546468633766, "learning_rate": 4.983677007193394e-06, "loss": 0.25626220703125, "step": 62460 }, { "epoch": 0.5401163846399945, "grad_norm": 20.24625231028196, "learning_rate": 4.9835241646324505e-06, "loss": 0.10371627807617187, "step": 62465 }, { "epoch": 0.5401596181615377, "grad_norm": 0.297869831943087, "learning_rate": 4.983371312923669e-06, "loss": 0.030820465087890624, "step": 62470 }, { "epoch": 0.540202851683081, "grad_norm": 9.2521877149157, "learning_rate": 4.983218452067754e-06, "loss": 1.0760787963867187, "step": 62475 }, { "epoch": 0.5402460852046242, "grad_norm": 1.8429931344110757, "learning_rate": 4.983065582065411e-06, "loss": 0.27274551391601565, "step": 62480 }, { "epoch": 0.5402893187261675, "grad_norm": 31.919188928109136, "learning_rate": 4.982912702917344e-06, "loss": 0.6957763671875, "step": 62485 }, { "epoch": 0.5403325522477108, "grad_norm": 2.5059819873533566, "learning_rate": 4.982759814624258e-06, "loss": 0.053003692626953126, "step": 62490 }, { "epoch": 0.540375785769254, "grad_norm": 1.7458292983050199, "learning_rate": 4.982606917186861e-06, "loss": 0.0634765625, "step": 62495 }, { "epoch": 0.5404190192907973, "grad_norm": 4.358465929207919, "learning_rate": 4.982454010605856e-06, "loss": 0.16764392852783203, "step": 62500 }, { "epoch": 0.5404622528123406, "grad_norm": 8.592844738286994, "learning_rate": 4.982301094881947e-06, "loss": 0.242266845703125, "step": 62505 }, { "epoch": 0.5405054863338838, "grad_norm": 35.90284471419332, "learning_rate": 4.982148170015841e-06, "loss": 0.5561294555664062, "step": 62510 }, { "epoch": 0.5405487198554271, "grad_norm": 41.66393764358975, "learning_rate": 4.981995236008243e-06, "loss": 0.5066009521484375, "step": 62515 }, { "epoch": 0.5405919533769704, "grad_norm": 5.55830746470856, "learning_rate": 4.981842292859858e-06, "loss": 0.266015625, "step": 62520 }, { "epoch": 0.5406351868985136, "grad_norm": 7.851011067587743, "learning_rate": 4.981689340571392e-06, "loss": 0.09356536865234374, "step": 62525 }, { "epoch": 0.5406784204200569, "grad_norm": 26.023748036233766, "learning_rate": 4.98153637914355e-06, "loss": 0.1885711669921875, "step": 62530 }, { "epoch": 0.5407216539416002, "grad_norm": 15.287049898971247, "learning_rate": 4.9813834085770355e-06, "loss": 0.18172607421875, "step": 62535 }, { "epoch": 0.5407648874631434, "grad_norm": 9.85399836397549, "learning_rate": 4.981230428872557e-06, "loss": 0.0674102783203125, "step": 62540 }, { "epoch": 0.5408081209846867, "grad_norm": 36.55191086475514, "learning_rate": 4.981077440030819e-06, "loss": 0.16417236328125, "step": 62545 }, { "epoch": 0.5408513545062299, "grad_norm": 22.305966905282624, "learning_rate": 4.980924442052527e-06, "loss": 0.6950302124023438, "step": 62550 }, { "epoch": 0.5408945880277732, "grad_norm": 0.7225088625456735, "learning_rate": 4.980771434938386e-06, "loss": 0.027409744262695313, "step": 62555 }, { "epoch": 0.5409378215493165, "grad_norm": 60.6097444547725, "learning_rate": 4.980618418689102e-06, "loss": 0.31286087036132815, "step": 62560 }, { "epoch": 0.5409810550708597, "grad_norm": 3.0343316160261553, "learning_rate": 4.980465393305381e-06, "loss": 0.61416015625, "step": 62565 }, { "epoch": 0.541024288592403, "grad_norm": 22.138683955343176, "learning_rate": 4.980312358787929e-06, "loss": 0.12869224548339844, "step": 62570 }, { "epoch": 0.5410675221139463, "grad_norm": 1.097244252224793, "learning_rate": 4.980159315137451e-06, "loss": 0.0570281982421875, "step": 62575 }, { "epoch": 0.5411107556354895, "grad_norm": 1.8488919227009781, "learning_rate": 4.980006262354653e-06, "loss": 0.09125747680664062, "step": 62580 }, { "epoch": 0.5411539891570328, "grad_norm": 6.757696523247361, "learning_rate": 4.979853200440242e-06, "loss": 0.059357833862304685, "step": 62585 }, { "epoch": 0.5411972226785761, "grad_norm": 0.396932170839413, "learning_rate": 4.979700129394922e-06, "loss": 0.021489715576171874, "step": 62590 }, { "epoch": 0.5412404562001193, "grad_norm": 14.075387207138771, "learning_rate": 4.9795470492194e-06, "loss": 0.2210296630859375, "step": 62595 }, { "epoch": 0.5412836897216626, "grad_norm": 1.6044671961100538, "learning_rate": 4.979393959914382e-06, "loss": 0.07033157348632812, "step": 62600 }, { "epoch": 0.5413269232432059, "grad_norm": 27.259662178986616, "learning_rate": 4.979240861480575e-06, "loss": 0.42185401916503906, "step": 62605 }, { "epoch": 0.5413701567647491, "grad_norm": 31.738112047329807, "learning_rate": 4.979087753918683e-06, "loss": 0.5085433959960938, "step": 62610 }, { "epoch": 0.5414133902862924, "grad_norm": 4.464698735147008, "learning_rate": 4.978934637229413e-06, "loss": 0.21676177978515626, "step": 62615 }, { "epoch": 0.5414566238078357, "grad_norm": 5.9982394769653276, "learning_rate": 4.978781511413472e-06, "loss": 0.18484649658203126, "step": 62620 }, { "epoch": 0.5414998573293789, "grad_norm": 3.1137124171819104, "learning_rate": 4.978628376471565e-06, "loss": 0.2676511764526367, "step": 62625 }, { "epoch": 0.5415430908509222, "grad_norm": 6.181203178408766, "learning_rate": 4.978475232404399e-06, "loss": 0.1842620849609375, "step": 62630 }, { "epoch": 0.5415863243724655, "grad_norm": 4.88167524570093, "learning_rate": 4.978322079212681e-06, "loss": 0.0520904541015625, "step": 62635 }, { "epoch": 0.5416295578940087, "grad_norm": 10.879588682415932, "learning_rate": 4.978168916897114e-06, "loss": 0.11636962890625, "step": 62640 }, { "epoch": 0.5416727914155519, "grad_norm": 2.8180923272379395, "learning_rate": 4.9780157454584094e-06, "loss": 0.20685272216796874, "step": 62645 }, { "epoch": 0.5417160249370953, "grad_norm": 10.779933467415962, "learning_rate": 4.977862564897269e-06, "loss": 0.068060302734375, "step": 62650 }, { "epoch": 0.5417592584586385, "grad_norm": 6.406559757566243, "learning_rate": 4.977709375214402e-06, "loss": 0.48529205322265623, "step": 62655 }, { "epoch": 0.5418024919801817, "grad_norm": 22.589788973752178, "learning_rate": 4.977556176410515e-06, "loss": 0.1569122314453125, "step": 62660 }, { "epoch": 0.541845725501725, "grad_norm": 7.14611296174679, "learning_rate": 4.977402968486312e-06, "loss": 0.28492431640625, "step": 62665 }, { "epoch": 0.5418889590232683, "grad_norm": 0.8186488469588341, "learning_rate": 4.977249751442503e-06, "loss": 0.25410003662109376, "step": 62670 }, { "epoch": 0.5419321925448115, "grad_norm": 2.3616700280527696, "learning_rate": 4.977096525279791e-06, "loss": 0.18049049377441406, "step": 62675 }, { "epoch": 0.5419754260663548, "grad_norm": 15.099533719143357, "learning_rate": 4.976943289998886e-06, "loss": 0.055844879150390624, "step": 62680 }, { "epoch": 0.5420186595878981, "grad_norm": 0.19278022243317755, "learning_rate": 4.976790045600492e-06, "loss": 0.32527999877929686, "step": 62685 }, { "epoch": 0.5420618931094413, "grad_norm": 23.09468596813384, "learning_rate": 4.976636792085318e-06, "loss": 0.2158477783203125, "step": 62690 }, { "epoch": 0.5421051266309846, "grad_norm": 42.15041217030848, "learning_rate": 4.976483529454069e-06, "loss": 0.16599502563476562, "step": 62695 }, { "epoch": 0.5421483601525279, "grad_norm": 31.74650916900967, "learning_rate": 4.976330257707453e-06, "loss": 0.22301025390625, "step": 62700 }, { "epoch": 0.5421915936740711, "grad_norm": 0.6387721199544085, "learning_rate": 4.976176976846176e-06, "loss": 0.08256988525390625, "step": 62705 }, { "epoch": 0.5422348271956144, "grad_norm": 0.06356291807134826, "learning_rate": 4.9760236868709456e-06, "loss": 0.19658393859863282, "step": 62710 }, { "epoch": 0.5422780607171577, "grad_norm": 0.40773502043470583, "learning_rate": 4.975870387782469e-06, "loss": 0.01632843017578125, "step": 62715 }, { "epoch": 0.5423212942387009, "grad_norm": 33.7980448516257, "learning_rate": 4.975717079581454e-06, "loss": 0.3091703414916992, "step": 62720 }, { "epoch": 0.5423645277602441, "grad_norm": 1.5147671895412762, "learning_rate": 4.975563762268604e-06, "loss": 0.2662483215332031, "step": 62725 }, { "epoch": 0.5424077612817875, "grad_norm": 14.423302452110576, "learning_rate": 4.97541043584463e-06, "loss": 0.0832977294921875, "step": 62730 }, { "epoch": 0.5424509948033307, "grad_norm": 2.686540954769561, "learning_rate": 4.975257100310236e-06, "loss": 0.44765777587890626, "step": 62735 }, { "epoch": 0.5424942283248739, "grad_norm": 18.236974919046148, "learning_rate": 4.975103755666132e-06, "loss": 0.21531982421875, "step": 62740 }, { "epoch": 0.5425374618464173, "grad_norm": 10.934952848413623, "learning_rate": 4.974950401913023e-06, "loss": 0.17136154174804688, "step": 62745 }, { "epoch": 0.5425806953679605, "grad_norm": 34.158265766513125, "learning_rate": 4.974797039051619e-06, "loss": 0.18226242065429688, "step": 62750 }, { "epoch": 0.5426239288895037, "grad_norm": 3.652693614060662, "learning_rate": 4.9746436670826246e-06, "loss": 0.03426361083984375, "step": 62755 }, { "epoch": 0.5426671624110471, "grad_norm": 0.853807317758858, "learning_rate": 4.974490286006748e-06, "loss": 0.10775585174560547, "step": 62760 }, { "epoch": 0.5427103959325903, "grad_norm": 0.10297451366572336, "learning_rate": 4.974336895824697e-06, "loss": 0.07254180908203126, "step": 62765 }, { "epoch": 0.5427536294541335, "grad_norm": 3.7629004110735322, "learning_rate": 4.974183496537179e-06, "loss": 0.08939743041992188, "step": 62770 }, { "epoch": 0.5427968629756769, "grad_norm": 10.681799873600701, "learning_rate": 4.974030088144901e-06, "loss": 0.22850265502929687, "step": 62775 }, { "epoch": 0.5428400964972201, "grad_norm": 43.71482424236227, "learning_rate": 4.97387667064857e-06, "loss": 0.20648193359375, "step": 62780 }, { "epoch": 0.5428833300187633, "grad_norm": 13.45384824541788, "learning_rate": 4.9737232440488945e-06, "loss": 0.124853515625, "step": 62785 }, { "epoch": 0.5429265635403067, "grad_norm": 1.0970668284348721, "learning_rate": 4.973569808346581e-06, "loss": 0.15101852416992187, "step": 62790 }, { "epoch": 0.5429697970618499, "grad_norm": 6.651087713996622, "learning_rate": 4.97341636354234e-06, "loss": 0.2827606201171875, "step": 62795 }, { "epoch": 0.5430130305833931, "grad_norm": 9.55816968862363, "learning_rate": 4.973262909636876e-06, "loss": 0.064727783203125, "step": 62800 }, { "epoch": 0.5430562641049363, "grad_norm": 18.588107179664725, "learning_rate": 4.973109446630898e-06, "loss": 0.0790802001953125, "step": 62805 }, { "epoch": 0.5430994976264797, "grad_norm": 13.897028059314732, "learning_rate": 4.972955974525112e-06, "loss": 0.07901725769042969, "step": 62810 }, { "epoch": 0.5431427311480229, "grad_norm": 1.339210744487122, "learning_rate": 4.972802493320229e-06, "loss": 0.3228515625, "step": 62815 }, { "epoch": 0.5431859646695661, "grad_norm": 12.854785450308496, "learning_rate": 4.972649003016955e-06, "loss": 0.424346923828125, "step": 62820 }, { "epoch": 0.5432291981911095, "grad_norm": 1.0948498968386324, "learning_rate": 4.9724955036159985e-06, "loss": 0.06905784606933593, "step": 62825 }, { "epoch": 0.5432724317126527, "grad_norm": 17.029147048386793, "learning_rate": 4.972341995118066e-06, "loss": 0.2300628662109375, "step": 62830 }, { "epoch": 0.5433156652341959, "grad_norm": 12.08278132636943, "learning_rate": 4.972188477523867e-06, "loss": 0.15459060668945312, "step": 62835 }, { "epoch": 0.5433588987557393, "grad_norm": 2.080132733335746, "learning_rate": 4.972034950834109e-06, "loss": 0.2243663787841797, "step": 62840 }, { "epoch": 0.5434021322772825, "grad_norm": 0.777726594909214, "learning_rate": 4.9718814150495e-06, "loss": 0.19114990234375, "step": 62845 }, { "epoch": 0.5434453657988257, "grad_norm": 26.852374116722654, "learning_rate": 4.971727870170748e-06, "loss": 0.136517333984375, "step": 62850 }, { "epoch": 0.5434885993203691, "grad_norm": 14.563141796436213, "learning_rate": 4.971574316198562e-06, "loss": 0.19165191650390626, "step": 62855 }, { "epoch": 0.5435318328419123, "grad_norm": 3.706508417975262, "learning_rate": 4.971420753133649e-06, "loss": 0.15220317840576172, "step": 62860 }, { "epoch": 0.5435750663634555, "grad_norm": 1.0540191084624133, "learning_rate": 4.971267180976718e-06, "loss": 0.33673553466796874, "step": 62865 }, { "epoch": 0.5436182998849989, "grad_norm": 15.537050655903926, "learning_rate": 4.971113599728476e-06, "loss": 0.07860755920410156, "step": 62870 }, { "epoch": 0.5436615334065421, "grad_norm": 1.0078527279037324, "learning_rate": 4.970960009389633e-06, "loss": 0.017123031616210937, "step": 62875 }, { "epoch": 0.5437047669280853, "grad_norm": 14.281112765111832, "learning_rate": 4.970806409960897e-06, "loss": 0.13095169067382811, "step": 62880 }, { "epoch": 0.5437480004496287, "grad_norm": 2.212725556010016, "learning_rate": 4.970652801442975e-06, "loss": 0.1513427734375, "step": 62885 }, { "epoch": 0.5437912339711719, "grad_norm": 31.976433185282517, "learning_rate": 4.9704991838365765e-06, "loss": 0.08205795288085938, "step": 62890 }, { "epoch": 0.5438344674927151, "grad_norm": 16.675135547161695, "learning_rate": 4.9703455571424104e-06, "loss": 0.1482513427734375, "step": 62895 }, { "epoch": 0.5438777010142584, "grad_norm": 1.222012765173669, "learning_rate": 4.970191921361185e-06, "loss": 0.1058013916015625, "step": 62900 }, { "epoch": 0.5439209345358017, "grad_norm": 1.5383243729848168, "learning_rate": 4.970038276493608e-06, "loss": 0.08751068115234376, "step": 62905 }, { "epoch": 0.5439641680573449, "grad_norm": 20.03863043181459, "learning_rate": 4.969884622540388e-06, "loss": 0.08498764038085938, "step": 62910 }, { "epoch": 0.5440074015788882, "grad_norm": 13.16820474045554, "learning_rate": 4.969730959502235e-06, "loss": 0.212872314453125, "step": 62915 }, { "epoch": 0.5440506351004315, "grad_norm": 2.064220772461373, "learning_rate": 4.969577287379857e-06, "loss": 0.15272178649902343, "step": 62920 }, { "epoch": 0.5440938686219747, "grad_norm": 10.283944517607702, "learning_rate": 4.969423606173962e-06, "loss": 0.1422698974609375, "step": 62925 }, { "epoch": 0.544137102143518, "grad_norm": 17.121529467820043, "learning_rate": 4.96926991588526e-06, "loss": 0.07326507568359375, "step": 62930 }, { "epoch": 0.5441803356650613, "grad_norm": 3.558341730305562, "learning_rate": 4.969116216514458e-06, "loss": 0.20645751953125, "step": 62935 }, { "epoch": 0.5442235691866045, "grad_norm": 17.506013671024856, "learning_rate": 4.968962508062267e-06, "loss": 0.26924762725830076, "step": 62940 }, { "epoch": 0.5442668027081478, "grad_norm": 15.580620706846597, "learning_rate": 4.968808790529395e-06, "loss": 0.30260009765625, "step": 62945 }, { "epoch": 0.5443100362296911, "grad_norm": 23.700905968523347, "learning_rate": 4.96865506391655e-06, "loss": 0.4227436065673828, "step": 62950 }, { "epoch": 0.5443532697512343, "grad_norm": 12.186694182877169, "learning_rate": 4.968501328224442e-06, "loss": 0.1295745849609375, "step": 62955 }, { "epoch": 0.5443965032727776, "grad_norm": 6.570533165481122, "learning_rate": 4.968347583453781e-06, "loss": 0.19963302612304687, "step": 62960 }, { "epoch": 0.5444397367943209, "grad_norm": 2.2212678321591346, "learning_rate": 4.968193829605273e-06, "loss": 0.6379318237304688, "step": 62965 }, { "epoch": 0.5444829703158641, "grad_norm": 2.4448882819922093, "learning_rate": 4.96804006667963e-06, "loss": 0.17780532836914062, "step": 62970 }, { "epoch": 0.5445262038374074, "grad_norm": 3.7968296269107835, "learning_rate": 4.96788629467756e-06, "loss": 0.3319831848144531, "step": 62975 }, { "epoch": 0.5445694373589506, "grad_norm": 0.12269359827834837, "learning_rate": 4.9677325135997734e-06, "loss": 0.4581935882568359, "step": 62980 }, { "epoch": 0.5446126708804939, "grad_norm": 1.8695321468803219, "learning_rate": 4.967578723446976e-06, "loss": 0.031878662109375, "step": 62985 }, { "epoch": 0.5446559044020371, "grad_norm": 0.16125871271157616, "learning_rate": 4.9674249242198816e-06, "loss": 0.17078704833984376, "step": 62990 }, { "epoch": 0.5446991379235804, "grad_norm": 26.772631612554346, "learning_rate": 4.967271115919196e-06, "loss": 0.5803943634033203, "step": 62995 }, { "epoch": 0.5447423714451237, "grad_norm": 3.371554083618623, "learning_rate": 4.967117298545631e-06, "loss": 0.6954376220703125, "step": 63000 }, { "epoch": 0.544785604966667, "grad_norm": 7.5034035845048805, "learning_rate": 4.966963472099894e-06, "loss": 0.03372650146484375, "step": 63005 }, { "epoch": 0.5448288384882102, "grad_norm": 1.3750943068292518, "learning_rate": 4.9668096365826946e-06, "loss": 0.028191375732421874, "step": 63010 }, { "epoch": 0.5448720720097535, "grad_norm": 5.022053276142194, "learning_rate": 4.966655791994743e-06, "loss": 0.07970962524414063, "step": 63015 }, { "epoch": 0.5449153055312967, "grad_norm": 22.317208208900254, "learning_rate": 4.96650193833675e-06, "loss": 0.18458404541015624, "step": 63020 }, { "epoch": 0.54495853905284, "grad_norm": 11.337836003100257, "learning_rate": 4.966348075609423e-06, "loss": 0.07074127197265626, "step": 63025 }, { "epoch": 0.5450017725743833, "grad_norm": 3.92913197890516, "learning_rate": 4.966194203813473e-06, "loss": 0.09093780517578125, "step": 63030 }, { "epoch": 0.5450450060959265, "grad_norm": 1.1121895839690186, "learning_rate": 4.966040322949608e-06, "loss": 0.12630386352539064, "step": 63035 }, { "epoch": 0.5450882396174698, "grad_norm": 2.3278494337968434, "learning_rate": 4.965886433018539e-06, "loss": 0.09703826904296875, "step": 63040 }, { "epoch": 0.5451314731390131, "grad_norm": 13.366857782839384, "learning_rate": 4.965732534020976e-06, "loss": 0.11288604736328126, "step": 63045 }, { "epoch": 0.5451747066605563, "grad_norm": 18.44766301749058, "learning_rate": 4.965578625957628e-06, "loss": 0.257696533203125, "step": 63050 }, { "epoch": 0.5452179401820996, "grad_norm": 36.01099913960632, "learning_rate": 4.965424708829206e-06, "loss": 0.19802703857421874, "step": 63055 }, { "epoch": 0.5452611737036429, "grad_norm": 18.779852881812126, "learning_rate": 4.965270782636417e-06, "loss": 0.22333889007568358, "step": 63060 }, { "epoch": 0.5453044072251861, "grad_norm": 23.767879878584186, "learning_rate": 4.965116847379974e-06, "loss": 0.14580230712890624, "step": 63065 }, { "epoch": 0.5453476407467294, "grad_norm": 7.427650574163851, "learning_rate": 4.964962903060586e-06, "loss": 0.124365234375, "step": 63070 }, { "epoch": 0.5453908742682726, "grad_norm": 1.046489921110981, "learning_rate": 4.964808949678963e-06, "loss": 0.06816635131835938, "step": 63075 }, { "epoch": 0.5454341077898159, "grad_norm": 2.7904716736816604, "learning_rate": 4.9646549872358135e-06, "loss": 0.17053680419921874, "step": 63080 }, { "epoch": 0.5454773413113592, "grad_norm": 3.0579281145977397, "learning_rate": 4.96450101573185e-06, "loss": 0.13739662170410155, "step": 63085 }, { "epoch": 0.5455205748329024, "grad_norm": 20.6735220077465, "learning_rate": 4.96434703516778e-06, "loss": 0.2129974365234375, "step": 63090 }, { "epoch": 0.5455638083544457, "grad_norm": 0.9248116335343461, "learning_rate": 4.964193045544316e-06, "loss": 0.13956985473632813, "step": 63095 }, { "epoch": 0.545607041875989, "grad_norm": 11.248193441391976, "learning_rate": 4.9640390468621675e-06, "loss": 0.16613922119140626, "step": 63100 }, { "epoch": 0.5456502753975322, "grad_norm": 3.5726986003332684, "learning_rate": 4.9638850391220435e-06, "loss": 0.04984893798828125, "step": 63105 }, { "epoch": 0.5456935089190755, "grad_norm": 7.394254802713638, "learning_rate": 4.963731022324655e-06, "loss": 0.17826690673828124, "step": 63110 }, { "epoch": 0.5457367424406188, "grad_norm": 1.0908276291066958, "learning_rate": 4.963576996470714e-06, "loss": 0.1369293212890625, "step": 63115 }, { "epoch": 0.545779975962162, "grad_norm": 68.31552870774941, "learning_rate": 4.9634229615609285e-06, "loss": 0.2800506591796875, "step": 63120 }, { "epoch": 0.5458232094837053, "grad_norm": 1.068199042451712, "learning_rate": 4.96326891759601e-06, "loss": 0.110894775390625, "step": 63125 }, { "epoch": 0.5458664430052486, "grad_norm": 19.252224130777574, "learning_rate": 4.963114864576669e-06, "loss": 0.3202392578125, "step": 63130 }, { "epoch": 0.5459096765267918, "grad_norm": 1.065053080832001, "learning_rate": 4.962960802503615e-06, "loss": 0.2922523498535156, "step": 63135 }, { "epoch": 0.5459529100483351, "grad_norm": 49.26276379545063, "learning_rate": 4.962806731377559e-06, "loss": 0.45194091796875, "step": 63140 }, { "epoch": 0.5459961435698784, "grad_norm": 7.724206353202386, "learning_rate": 4.962652651199213e-06, "loss": 0.19860992431640626, "step": 63145 }, { "epoch": 0.5460393770914216, "grad_norm": 0.13134435649788284, "learning_rate": 4.962498561969286e-06, "loss": 0.40167884826660155, "step": 63150 }, { "epoch": 0.5460826106129648, "grad_norm": 6.58187911466336, "learning_rate": 4.962344463688489e-06, "loss": 0.097503662109375, "step": 63155 }, { "epoch": 0.5461258441345082, "grad_norm": 15.9417641127438, "learning_rate": 4.9621903563575325e-06, "loss": 0.24794387817382812, "step": 63160 }, { "epoch": 0.5461690776560514, "grad_norm": 17.218714745892665, "learning_rate": 4.962036239977127e-06, "loss": 0.26620635986328123, "step": 63165 }, { "epoch": 0.5462123111775946, "grad_norm": 2.777823154153115, "learning_rate": 4.9618821145479854e-06, "loss": 0.21563796997070311, "step": 63170 }, { "epoch": 0.546255544699138, "grad_norm": 29.064597587666114, "learning_rate": 4.961727980070815e-06, "loss": 0.26450119018554685, "step": 63175 }, { "epoch": 0.5462987782206812, "grad_norm": 24.258673707855003, "learning_rate": 4.961573836546329e-06, "loss": 0.14971160888671875, "step": 63180 }, { "epoch": 0.5463420117422244, "grad_norm": 11.401562668703566, "learning_rate": 4.961419683975238e-06, "loss": 0.1674053192138672, "step": 63185 }, { "epoch": 0.5463852452637677, "grad_norm": 20.946065700536817, "learning_rate": 4.961265522358252e-06, "loss": 0.2120682716369629, "step": 63190 }, { "epoch": 0.546428478785311, "grad_norm": 3.049950675638369, "learning_rate": 4.961111351696084e-06, "loss": 0.40660858154296875, "step": 63195 }, { "epoch": 0.5464717123068542, "grad_norm": 94.18869148037636, "learning_rate": 4.960957171989443e-06, "loss": 0.27032470703125, "step": 63200 }, { "epoch": 0.5465149458283975, "grad_norm": 0.264652142510099, "learning_rate": 4.9608029832390415e-06, "loss": 0.07553977966308593, "step": 63205 }, { "epoch": 0.5465581793499408, "grad_norm": 5.17141678324712, "learning_rate": 4.960648785445589e-06, "loss": 0.233538818359375, "step": 63210 }, { "epoch": 0.546601412871484, "grad_norm": 2.1725218164626106, "learning_rate": 4.960494578609798e-06, "loss": 0.018574905395507813, "step": 63215 }, { "epoch": 0.5466446463930273, "grad_norm": 4.5078544068496145, "learning_rate": 4.960340362732379e-06, "loss": 0.026215362548828124, "step": 63220 }, { "epoch": 0.5466878799145706, "grad_norm": 3.457225668153734, "learning_rate": 4.960186137814044e-06, "loss": 0.42821807861328126, "step": 63225 }, { "epoch": 0.5467311134361138, "grad_norm": 18.103125107989268, "learning_rate": 4.9600319038555034e-06, "loss": 0.312567138671875, "step": 63230 }, { "epoch": 0.5467743469576571, "grad_norm": 27.431534554158475, "learning_rate": 4.959877660857468e-06, "loss": 0.3121063232421875, "step": 63235 }, { "epoch": 0.5468175804792004, "grad_norm": 5.553461252319264, "learning_rate": 4.959723408820652e-06, "loss": 0.6870689392089844, "step": 63240 }, { "epoch": 0.5468608140007436, "grad_norm": 0.8569401579172933, "learning_rate": 4.959569147745763e-06, "loss": 0.04816131591796875, "step": 63245 }, { "epoch": 0.5469040475222868, "grad_norm": 1.2442955000271985, "learning_rate": 4.9594148776335145e-06, "loss": 0.1670379638671875, "step": 63250 }, { "epoch": 0.5469472810438302, "grad_norm": 3.927716129798256, "learning_rate": 4.9592605984846186e-06, "loss": 0.08089599609375, "step": 63255 }, { "epoch": 0.5469905145653734, "grad_norm": 7.624051527973244, "learning_rate": 4.959106310299785e-06, "loss": 0.10861968994140625, "step": 63260 }, { "epoch": 0.5470337480869166, "grad_norm": 51.791260573953394, "learning_rate": 4.958952013079727e-06, "loss": 0.47835693359375, "step": 63265 }, { "epoch": 0.54707698160846, "grad_norm": 6.086982789184343, "learning_rate": 4.958797706825155e-06, "loss": 0.09106216430664063, "step": 63270 }, { "epoch": 0.5471202151300032, "grad_norm": 8.220710287445163, "learning_rate": 4.9586433915367815e-06, "loss": 0.184710693359375, "step": 63275 }, { "epoch": 0.5471634486515464, "grad_norm": 24.433085187154397, "learning_rate": 4.958489067215317e-06, "loss": 0.38686065673828124, "step": 63280 }, { "epoch": 0.5472066821730898, "grad_norm": 1.6188595810327373, "learning_rate": 4.958334733861474e-06, "loss": 0.11289482116699219, "step": 63285 }, { "epoch": 0.547249915694633, "grad_norm": 3.0643296813064804, "learning_rate": 4.9581803914759655e-06, "loss": 0.3055908203125, "step": 63290 }, { "epoch": 0.5472931492161762, "grad_norm": 2.5961250096755872, "learning_rate": 4.958026040059501e-06, "loss": 0.08429794311523438, "step": 63295 }, { "epoch": 0.5473363827377196, "grad_norm": 0.2883121377758635, "learning_rate": 4.957871679612793e-06, "loss": 0.0773895263671875, "step": 63300 }, { "epoch": 0.5473796162592628, "grad_norm": 0.9388272165382071, "learning_rate": 4.957717310136555e-06, "loss": 0.0712127685546875, "step": 63305 }, { "epoch": 0.547422849780806, "grad_norm": 2.315243185026376, "learning_rate": 4.957562931631496e-06, "loss": 0.050336456298828124, "step": 63310 }, { "epoch": 0.5474660833023494, "grad_norm": 17.16038957582936, "learning_rate": 4.957408544098331e-06, "loss": 0.2815132141113281, "step": 63315 }, { "epoch": 0.5475093168238926, "grad_norm": 4.278576217840107, "learning_rate": 4.957254147537771e-06, "loss": 0.060104751586914064, "step": 63320 }, { "epoch": 0.5475525503454358, "grad_norm": 12.53738860021597, "learning_rate": 4.957099741950527e-06, "loss": 0.1420360565185547, "step": 63325 }, { "epoch": 0.547595783866979, "grad_norm": 21.202487096582736, "learning_rate": 4.956945327337313e-06, "loss": 0.1857666015625, "step": 63330 }, { "epoch": 0.5476390173885224, "grad_norm": 1.1183184724995079, "learning_rate": 4.956790903698839e-06, "loss": 0.2114501953125, "step": 63335 }, { "epoch": 0.5476822509100656, "grad_norm": 19.98915068395306, "learning_rate": 4.9566364710358184e-06, "loss": 0.28203277587890624, "step": 63340 }, { "epoch": 0.5477254844316088, "grad_norm": 7.889919933173626, "learning_rate": 4.956482029348964e-06, "loss": 0.0799591064453125, "step": 63345 }, { "epoch": 0.5477687179531522, "grad_norm": 3.110441039532483, "learning_rate": 4.956327578638987e-06, "loss": 0.20758056640625, "step": 63350 }, { "epoch": 0.5478119514746954, "grad_norm": 1.3808310000330046, "learning_rate": 4.9561731189066e-06, "loss": 0.14508514404296874, "step": 63355 }, { "epoch": 0.5478551849962386, "grad_norm": 5.600060086665769, "learning_rate": 4.956018650152515e-06, "loss": 0.43604736328125, "step": 63360 }, { "epoch": 0.547898418517782, "grad_norm": 65.1623864968369, "learning_rate": 4.955864172377445e-06, "loss": 0.47533378601074217, "step": 63365 }, { "epoch": 0.5479416520393252, "grad_norm": 5.460006639066618, "learning_rate": 4.955709685582103e-06, "loss": 0.10802764892578125, "step": 63370 }, { "epoch": 0.5479848855608684, "grad_norm": 18.412203011584687, "learning_rate": 4.9555551897672005e-06, "loss": 0.16691131591796876, "step": 63375 }, { "epoch": 0.5480281190824118, "grad_norm": 1.9141872867501513, "learning_rate": 4.955400684933449e-06, "loss": 0.17135772705078126, "step": 63380 }, { "epoch": 0.548071352603955, "grad_norm": 4.35585715856134, "learning_rate": 4.955246171081564e-06, "loss": 0.05980224609375, "step": 63385 }, { "epoch": 0.5481145861254982, "grad_norm": 9.370758710674039, "learning_rate": 4.955091648212256e-06, "loss": 0.15159912109375, "step": 63390 }, { "epoch": 0.5481578196470416, "grad_norm": 21.17023860383011, "learning_rate": 4.9549371163262374e-06, "loss": 0.3137481689453125, "step": 63395 }, { "epoch": 0.5482010531685848, "grad_norm": 12.935164013942188, "learning_rate": 4.9547825754242225e-06, "loss": 0.10931625366210937, "step": 63400 }, { "epoch": 0.548244286690128, "grad_norm": 14.935819912909675, "learning_rate": 4.954628025506922e-06, "loss": 0.15094146728515626, "step": 63405 }, { "epoch": 0.5482875202116714, "grad_norm": 61.58682410930661, "learning_rate": 4.9544734665750505e-06, "loss": 0.6516983032226562, "step": 63410 }, { "epoch": 0.5483307537332146, "grad_norm": 10.036110663925413, "learning_rate": 4.95431889862932e-06, "loss": 0.13997802734375, "step": 63415 }, { "epoch": 0.5483739872547578, "grad_norm": 15.516773415304492, "learning_rate": 4.954164321670443e-06, "loss": 0.2723114013671875, "step": 63420 }, { "epoch": 0.5484172207763011, "grad_norm": 6.6407157753950194, "learning_rate": 4.954009735699133e-06, "loss": 0.04305610656738281, "step": 63425 }, { "epoch": 0.5484604542978444, "grad_norm": 0.3128899603065627, "learning_rate": 4.953855140716103e-06, "loss": 0.06319198608398438, "step": 63430 }, { "epoch": 0.5485036878193876, "grad_norm": 39.95366565970146, "learning_rate": 4.953700536722066e-06, "loss": 0.3909912109375, "step": 63435 }, { "epoch": 0.5485469213409309, "grad_norm": 5.0752311606700165, "learning_rate": 4.953545923717734e-06, "loss": 0.26591796875, "step": 63440 }, { "epoch": 0.5485901548624742, "grad_norm": 9.638799085717068, "learning_rate": 4.953391301703821e-06, "loss": 0.309295654296875, "step": 63445 }, { "epoch": 0.5486333883840174, "grad_norm": 26.762492322743864, "learning_rate": 4.9532366706810404e-06, "loss": 0.0995880126953125, "step": 63450 }, { "epoch": 0.5486766219055607, "grad_norm": 1.398800371487656, "learning_rate": 4.953082030650105e-06, "loss": 0.0487762451171875, "step": 63455 }, { "epoch": 0.548719855427104, "grad_norm": 1.444621189914032, "learning_rate": 4.952927381611727e-06, "loss": 0.364569091796875, "step": 63460 }, { "epoch": 0.5487630889486472, "grad_norm": 3.2461455806796065, "learning_rate": 4.952772723566621e-06, "loss": 0.2274566650390625, "step": 63465 }, { "epoch": 0.5488063224701905, "grad_norm": 0.415895396971579, "learning_rate": 4.9526180565155e-06, "loss": 0.22926559448242187, "step": 63470 }, { "epoch": 0.5488495559917338, "grad_norm": 0.9838433068638673, "learning_rate": 4.952463380459076e-06, "loss": 0.03140830993652344, "step": 63475 }, { "epoch": 0.548892789513277, "grad_norm": 2.94252763304863, "learning_rate": 4.952308695398065e-06, "loss": 0.2010162353515625, "step": 63480 }, { "epoch": 0.5489360230348203, "grad_norm": 16.6278961450564, "learning_rate": 4.952154001333177e-06, "loss": 0.228717041015625, "step": 63485 }, { "epoch": 0.5489792565563636, "grad_norm": 0.7967045894467377, "learning_rate": 4.951999298265128e-06, "loss": 0.05481719970703125, "step": 63490 }, { "epoch": 0.5490224900779068, "grad_norm": 7.537613446215508, "learning_rate": 4.9518445861946316e-06, "loss": 0.087744140625, "step": 63495 }, { "epoch": 0.54906572359945, "grad_norm": 128.56736432123128, "learning_rate": 4.951689865122399e-06, "loss": 0.2836029052734375, "step": 63500 }, { "epoch": 0.5491089571209933, "grad_norm": 12.360524106152337, "learning_rate": 4.9515351350491466e-06, "loss": 0.3676250457763672, "step": 63505 }, { "epoch": 0.5491521906425366, "grad_norm": 0.3556085575561065, "learning_rate": 4.951380395975586e-06, "loss": 0.09072418212890625, "step": 63510 }, { "epoch": 0.5491954241640798, "grad_norm": 31.916105973567937, "learning_rate": 4.951225647902431e-06, "loss": 0.3564777374267578, "step": 63515 }, { "epoch": 0.5492386576856231, "grad_norm": 9.904315791472262, "learning_rate": 4.951070890830396e-06, "loss": 0.10263519287109375, "step": 63520 }, { "epoch": 0.5492818912071664, "grad_norm": 25.179845456398468, "learning_rate": 4.950916124760195e-06, "loss": 0.18747482299804688, "step": 63525 }, { "epoch": 0.5493251247287096, "grad_norm": 31.74900965584002, "learning_rate": 4.9507613496925405e-06, "loss": 0.147222900390625, "step": 63530 }, { "epoch": 0.5493683582502529, "grad_norm": 4.069981955502241, "learning_rate": 4.9506065656281474e-06, "loss": 0.09751644134521484, "step": 63535 }, { "epoch": 0.5494115917717962, "grad_norm": 17.694474133022332, "learning_rate": 4.950451772567728e-06, "loss": 0.0994293212890625, "step": 63540 }, { "epoch": 0.5494548252933394, "grad_norm": 6.999660452132123, "learning_rate": 4.950296970512e-06, "loss": 0.205126953125, "step": 63545 }, { "epoch": 0.5494980588148827, "grad_norm": 3.669380949256623, "learning_rate": 4.950142159461673e-06, "loss": 0.21876220703125, "step": 63550 }, { "epoch": 0.549541292336426, "grad_norm": 11.219492644812762, "learning_rate": 4.949987339417463e-06, "loss": 0.1431488037109375, "step": 63555 }, { "epoch": 0.5495845258579692, "grad_norm": 32.52073323734785, "learning_rate": 4.949832510380083e-06, "loss": 0.33892669677734377, "step": 63560 }, { "epoch": 0.5496277593795125, "grad_norm": 26.086671244200872, "learning_rate": 4.949677672350249e-06, "loss": 0.477825927734375, "step": 63565 }, { "epoch": 0.5496709929010558, "grad_norm": 28.06310063089184, "learning_rate": 4.949522825328672e-06, "loss": 0.236663818359375, "step": 63570 }, { "epoch": 0.549714226422599, "grad_norm": 0.14725433290788828, "learning_rate": 4.949367969316069e-06, "loss": 0.2099334716796875, "step": 63575 }, { "epoch": 0.5497574599441423, "grad_norm": 19.164975844917173, "learning_rate": 4.949213104313155e-06, "loss": 0.466571044921875, "step": 63580 }, { "epoch": 0.5498006934656856, "grad_norm": 2.4115539161200354, "learning_rate": 4.94905823032064e-06, "loss": 0.12330322265625, "step": 63585 }, { "epoch": 0.5498439269872288, "grad_norm": 2.529484403264169, "learning_rate": 4.9489033473392415e-06, "loss": 0.12895278930664061, "step": 63590 }, { "epoch": 0.5498871605087721, "grad_norm": 0.5160447114126417, "learning_rate": 4.948748455369672e-06, "loss": 0.3008598327636719, "step": 63595 }, { "epoch": 0.5499303940303153, "grad_norm": 16.759303585769132, "learning_rate": 4.948593554412648e-06, "loss": 0.14520111083984374, "step": 63600 }, { "epoch": 0.5499736275518586, "grad_norm": 29.243856154782243, "learning_rate": 4.948438644468883e-06, "loss": 0.19669113159179688, "step": 63605 }, { "epoch": 0.5500168610734019, "grad_norm": 12.414883626576232, "learning_rate": 4.94828372553909e-06, "loss": 0.4201618194580078, "step": 63610 }, { "epoch": 0.5500600945949451, "grad_norm": 26.308541509661804, "learning_rate": 4.948128797623985e-06, "loss": 0.3399782180786133, "step": 63615 }, { "epoch": 0.5501033281164884, "grad_norm": 20.02303057288452, "learning_rate": 4.947973860724282e-06, "loss": 0.535748291015625, "step": 63620 }, { "epoch": 0.5501465616380317, "grad_norm": 25.24745829339774, "learning_rate": 4.947818914840696e-06, "loss": 0.30631179809570314, "step": 63625 }, { "epoch": 0.5501897951595749, "grad_norm": 1.1939318212912842, "learning_rate": 4.947663959973942e-06, "loss": 0.14481658935546876, "step": 63630 }, { "epoch": 0.5502330286811182, "grad_norm": 6.150816890652966, "learning_rate": 4.947508996124733e-06, "loss": 0.05680084228515625, "step": 63635 }, { "epoch": 0.5502762622026615, "grad_norm": 31.739121574540345, "learning_rate": 4.947354023293784e-06, "loss": 0.2346435546875, "step": 63640 }, { "epoch": 0.5503194957242047, "grad_norm": 25.519976159694522, "learning_rate": 4.947199041481811e-06, "loss": 0.1926300048828125, "step": 63645 }, { "epoch": 0.550362729245748, "grad_norm": 8.12450464406765, "learning_rate": 4.947044050689529e-06, "loss": 0.21979217529296874, "step": 63650 }, { "epoch": 0.5504059627672913, "grad_norm": 17.644623930981968, "learning_rate": 4.94688905091765e-06, "loss": 0.1301513671875, "step": 63655 }, { "epoch": 0.5504491962888345, "grad_norm": 12.335802778024275, "learning_rate": 4.946734042166892e-06, "loss": 0.07162399291992187, "step": 63660 }, { "epoch": 0.5504924298103778, "grad_norm": 14.319945000819164, "learning_rate": 4.946579024437967e-06, "loss": 0.06774444580078125, "step": 63665 }, { "epoch": 0.550535663331921, "grad_norm": 0.8112386425908578, "learning_rate": 4.9464239977315945e-06, "loss": 0.04023284912109375, "step": 63670 }, { "epoch": 0.5505788968534643, "grad_norm": 7.225776576972544, "learning_rate": 4.946268962048484e-06, "loss": 0.37596817016601564, "step": 63675 }, { "epoch": 0.5506221303750075, "grad_norm": 28.39172120712706, "learning_rate": 4.946113917389354e-06, "loss": 0.187188720703125, "step": 63680 }, { "epoch": 0.5506653638965509, "grad_norm": 24.101233336255365, "learning_rate": 4.945958863754918e-06, "loss": 0.3891582489013672, "step": 63685 }, { "epoch": 0.5507085974180941, "grad_norm": 11.367685149710654, "learning_rate": 4.945803801145893e-06, "loss": 0.0878488540649414, "step": 63690 }, { "epoch": 0.5507518309396373, "grad_norm": 16.26594172536211, "learning_rate": 4.945648729562992e-06, "loss": 0.278076171875, "step": 63695 }, { "epoch": 0.5507950644611807, "grad_norm": 13.528794908764198, "learning_rate": 4.94549364900693e-06, "loss": 0.234295654296875, "step": 63700 }, { "epoch": 0.5508382979827239, "grad_norm": 1.586281391283713, "learning_rate": 4.945338559478424e-06, "loss": 0.08461456298828125, "step": 63705 }, { "epoch": 0.5508815315042671, "grad_norm": 0.2522536982859769, "learning_rate": 4.9451834609781895e-06, "loss": 0.11059112548828125, "step": 63710 }, { "epoch": 0.5509247650258104, "grad_norm": 46.65384695946864, "learning_rate": 4.945028353506939e-06, "loss": 0.3195507049560547, "step": 63715 }, { "epoch": 0.5509679985473537, "grad_norm": 4.6823922559432445, "learning_rate": 4.944873237065391e-06, "loss": 0.16708755493164062, "step": 63720 }, { "epoch": 0.5510112320688969, "grad_norm": 2.7848822540610394, "learning_rate": 4.944718111654259e-06, "loss": 0.2027618408203125, "step": 63725 }, { "epoch": 0.5510544655904402, "grad_norm": 1.921754399834776, "learning_rate": 4.944562977274258e-06, "loss": 0.316796875, "step": 63730 }, { "epoch": 0.5510976991119835, "grad_norm": 32.26734591702538, "learning_rate": 4.9444078339261056e-06, "loss": 0.21163330078125, "step": 63735 }, { "epoch": 0.5511409326335267, "grad_norm": 18.74450871987047, "learning_rate": 4.944252681610516e-06, "loss": 0.22316207885742187, "step": 63740 }, { "epoch": 0.55118416615507, "grad_norm": 10.513905412195633, "learning_rate": 4.944097520328204e-06, "loss": 0.371246337890625, "step": 63745 }, { "epoch": 0.5512273996766133, "grad_norm": 4.6408457417154265, "learning_rate": 4.943942350079887e-06, "loss": 0.10367431640625, "step": 63750 }, { "epoch": 0.5512706331981565, "grad_norm": 31.699498410456297, "learning_rate": 4.943787170866278e-06, "loss": 0.2211700439453125, "step": 63755 }, { "epoch": 0.5513138667196998, "grad_norm": 1.557020285318394, "learning_rate": 4.9436319826880954e-06, "loss": 0.2316356658935547, "step": 63760 }, { "epoch": 0.5513571002412431, "grad_norm": 1.7962088944394952, "learning_rate": 4.943476785546054e-06, "loss": 0.23180999755859374, "step": 63765 }, { "epoch": 0.5514003337627863, "grad_norm": 0.14645829258494966, "learning_rate": 4.94332157944087e-06, "loss": 0.32658977508544923, "step": 63770 }, { "epoch": 0.5514435672843295, "grad_norm": 38.481991069366934, "learning_rate": 4.943166364373258e-06, "loss": 0.1485198974609375, "step": 63775 }, { "epoch": 0.5514868008058729, "grad_norm": 0.15652356238380485, "learning_rate": 4.9430111403439345e-06, "loss": 0.06547660827636718, "step": 63780 }, { "epoch": 0.5515300343274161, "grad_norm": 7.874660224239283, "learning_rate": 4.942855907353615e-06, "loss": 0.05215187072753906, "step": 63785 }, { "epoch": 0.5515732678489593, "grad_norm": 7.772140127896184, "learning_rate": 4.942700665403017e-06, "loss": 0.35374298095703127, "step": 63790 }, { "epoch": 0.5516165013705027, "grad_norm": 21.27286904673641, "learning_rate": 4.942545414492853e-06, "loss": 0.3637298583984375, "step": 63795 }, { "epoch": 0.5516597348920459, "grad_norm": 4.029474597149719, "learning_rate": 4.942390154623842e-06, "loss": 0.09335556030273437, "step": 63800 }, { "epoch": 0.5517029684135891, "grad_norm": 16.37501202646185, "learning_rate": 4.9422348857967e-06, "loss": 0.24947662353515626, "step": 63805 }, { "epoch": 0.5517462019351325, "grad_norm": 39.43733947690524, "learning_rate": 4.942079608012142e-06, "loss": 0.35012969970703123, "step": 63810 }, { "epoch": 0.5517894354566757, "grad_norm": 17.357932165053782, "learning_rate": 4.941924321270884e-06, "loss": 0.19313507080078124, "step": 63815 }, { "epoch": 0.5518326689782189, "grad_norm": 11.890895624093494, "learning_rate": 4.941769025573643e-06, "loss": 0.136041259765625, "step": 63820 }, { "epoch": 0.5518759024997623, "grad_norm": 15.48527733507027, "learning_rate": 4.941613720921135e-06, "loss": 0.11638031005859376, "step": 63825 }, { "epoch": 0.5519191360213055, "grad_norm": 4.370270282160991, "learning_rate": 4.941458407314075e-06, "loss": 0.13992538452148437, "step": 63830 }, { "epoch": 0.5519623695428487, "grad_norm": 1.1868906991932162, "learning_rate": 4.941303084753182e-06, "loss": 0.05508270263671875, "step": 63835 }, { "epoch": 0.5520056030643921, "grad_norm": 9.077468677211119, "learning_rate": 4.941147753239169e-06, "loss": 0.0527679443359375, "step": 63840 }, { "epoch": 0.5520488365859353, "grad_norm": 2.2623753356372442, "learning_rate": 4.940992412772755e-06, "loss": 0.051399612426757814, "step": 63845 }, { "epoch": 0.5520920701074785, "grad_norm": 0.7785723961820038, "learning_rate": 4.940837063354656e-06, "loss": 0.03383941650390625, "step": 63850 }, { "epoch": 0.5521353036290217, "grad_norm": 5.798970780973608, "learning_rate": 4.940681704985588e-06, "loss": 0.22915802001953126, "step": 63855 }, { "epoch": 0.5521785371505651, "grad_norm": 10.906785881786899, "learning_rate": 4.940526337666266e-06, "loss": 0.2466033935546875, "step": 63860 }, { "epoch": 0.5522217706721083, "grad_norm": 1.4331884500881111, "learning_rate": 4.940370961397409e-06, "loss": 0.1528900146484375, "step": 63865 }, { "epoch": 0.5522650041936515, "grad_norm": 10.190105161976184, "learning_rate": 4.940215576179732e-06, "loss": 0.03507537841796875, "step": 63870 }, { "epoch": 0.5523082377151949, "grad_norm": 10.590053233156532, "learning_rate": 4.940060182013953e-06, "loss": 0.09295921325683594, "step": 63875 }, { "epoch": 0.5523514712367381, "grad_norm": 8.169281706653896, "learning_rate": 4.9399047789007874e-06, "loss": 0.12366409301757812, "step": 63880 }, { "epoch": 0.5523947047582813, "grad_norm": 10.103165862201907, "learning_rate": 4.939749366840953e-06, "loss": 0.4764259338378906, "step": 63885 }, { "epoch": 0.5524379382798247, "grad_norm": 33.6749657768642, "learning_rate": 4.939593945835165e-06, "loss": 0.14141159057617186, "step": 63890 }, { "epoch": 0.5524811718013679, "grad_norm": 87.5076978767378, "learning_rate": 4.939438515884142e-06, "loss": 0.18324203491210939, "step": 63895 }, { "epoch": 0.5525244053229111, "grad_norm": 15.579327701605107, "learning_rate": 4.939283076988599e-06, "loss": 0.10371780395507812, "step": 63900 }, { "epoch": 0.5525676388444545, "grad_norm": 31.90300065166613, "learning_rate": 4.939127629149254e-06, "loss": 0.3205596923828125, "step": 63905 }, { "epoch": 0.5526108723659977, "grad_norm": 3.1147757292577167, "learning_rate": 4.9389721723668245e-06, "loss": 0.15931930541992187, "step": 63910 }, { "epoch": 0.5526541058875409, "grad_norm": 0.8072037645157075, "learning_rate": 4.938816706642026e-06, "loss": 0.6019241333007812, "step": 63915 }, { "epoch": 0.5526973394090843, "grad_norm": 6.816731715414492, "learning_rate": 4.9386612319755765e-06, "loss": 0.10760498046875, "step": 63920 }, { "epoch": 0.5527405729306275, "grad_norm": 5.890834706447313, "learning_rate": 4.938505748368193e-06, "loss": 0.1958587646484375, "step": 63925 }, { "epoch": 0.5527838064521707, "grad_norm": 20.928900618855973, "learning_rate": 4.938350255820592e-06, "loss": 0.12377548217773438, "step": 63930 }, { "epoch": 0.5528270399737141, "grad_norm": 3.5344601051229048, "learning_rate": 4.93819475433349e-06, "loss": 0.26632843017578123, "step": 63935 }, { "epoch": 0.5528702734952573, "grad_norm": 40.69401657529324, "learning_rate": 4.938039243907606e-06, "loss": 0.20706787109375, "step": 63940 }, { "epoch": 0.5529135070168005, "grad_norm": 9.236393914590721, "learning_rate": 4.937883724543656e-06, "loss": 0.26380462646484376, "step": 63945 }, { "epoch": 0.5529567405383438, "grad_norm": 24.93233039527676, "learning_rate": 4.937728196242358e-06, "loss": 0.21067962646484376, "step": 63950 }, { "epoch": 0.5529999740598871, "grad_norm": 30.744890303250223, "learning_rate": 4.9375726590044274e-06, "loss": 0.1840179443359375, "step": 63955 }, { "epoch": 0.5530432075814303, "grad_norm": 5.256422844708782, "learning_rate": 4.937417112830584e-06, "loss": 0.052679443359375, "step": 63960 }, { "epoch": 0.5530864411029736, "grad_norm": 0.7624719886561172, "learning_rate": 4.937261557721544e-06, "loss": 0.11634979248046876, "step": 63965 }, { "epoch": 0.5531296746245169, "grad_norm": 3.2980217068651183, "learning_rate": 4.937105993678024e-06, "loss": 0.145355224609375, "step": 63970 }, { "epoch": 0.5531729081460601, "grad_norm": 2.382817165348892, "learning_rate": 4.936950420700744e-06, "loss": 0.05747299194335938, "step": 63975 }, { "epoch": 0.5532161416676034, "grad_norm": 9.941040929889843, "learning_rate": 4.936794838790419e-06, "loss": 0.194866943359375, "step": 63980 }, { "epoch": 0.5532593751891467, "grad_norm": 6.758349941608321, "learning_rate": 4.936639247947766e-06, "loss": 0.07703857421875, "step": 63985 }, { "epoch": 0.5533026087106899, "grad_norm": 4.446979377313447, "learning_rate": 4.9364836481735054e-06, "loss": 0.066937255859375, "step": 63990 }, { "epoch": 0.5533458422322332, "grad_norm": 18.146891663909166, "learning_rate": 4.9363280394683536e-06, "loss": 0.28404998779296875, "step": 63995 }, { "epoch": 0.5533890757537765, "grad_norm": 18.296163690175522, "learning_rate": 4.936172421833027e-06, "loss": 0.06783733367919922, "step": 64000 }, { "epoch": 0.5534323092753197, "grad_norm": 6.110610641542058, "learning_rate": 4.936016795268246e-06, "loss": 0.3171363830566406, "step": 64005 }, { "epoch": 0.553475542796863, "grad_norm": 3.3358320834222375, "learning_rate": 4.935861159774725e-06, "loss": 0.11667633056640625, "step": 64010 }, { "epoch": 0.5535187763184063, "grad_norm": 6.2066540829405, "learning_rate": 4.935705515353183e-06, "loss": 0.15128402709960936, "step": 64015 }, { "epoch": 0.5535620098399495, "grad_norm": 2.105792323856726, "learning_rate": 4.935549862004339e-06, "loss": 0.7000968933105469, "step": 64020 }, { "epoch": 0.5536052433614927, "grad_norm": 14.507273454949644, "learning_rate": 4.93539419972891e-06, "loss": 0.237060546875, "step": 64025 }, { "epoch": 0.553648476883036, "grad_norm": 2.0277048706836176, "learning_rate": 4.935238528527614e-06, "loss": 0.02520599365234375, "step": 64030 }, { "epoch": 0.5536917104045793, "grad_norm": 67.77672854991621, "learning_rate": 4.935082848401169e-06, "loss": 0.35106697082519533, "step": 64035 }, { "epoch": 0.5537349439261225, "grad_norm": 2.5139052154573553, "learning_rate": 4.934927159350292e-06, "loss": 0.2444427490234375, "step": 64040 }, { "epoch": 0.5537781774476658, "grad_norm": 15.336783008475393, "learning_rate": 4.9347714613757035e-06, "loss": 0.12431106567382813, "step": 64045 }, { "epoch": 0.5538214109692091, "grad_norm": 20.498819544392695, "learning_rate": 4.934615754478118e-06, "loss": 0.21707763671875, "step": 64050 }, { "epoch": 0.5538646444907523, "grad_norm": 7.048506828562369, "learning_rate": 4.934460038658257e-06, "loss": 0.396380615234375, "step": 64055 }, { "epoch": 0.5539078780122956, "grad_norm": 38.58616292529129, "learning_rate": 4.934304313916838e-06, "loss": 0.2073577880859375, "step": 64060 }, { "epoch": 0.5539511115338389, "grad_norm": 6.873947686310068, "learning_rate": 4.934148580254577e-06, "loss": 0.13981170654296876, "step": 64065 }, { "epoch": 0.5539943450553821, "grad_norm": 3.194979367152315, "learning_rate": 4.933992837672193e-06, "loss": 0.12966766357421874, "step": 64070 }, { "epoch": 0.5540375785769254, "grad_norm": 34.925159285552816, "learning_rate": 4.933837086170407e-06, "loss": 0.23170166015625, "step": 64075 }, { "epoch": 0.5540808120984687, "grad_norm": 7.676256129710436, "learning_rate": 4.933681325749933e-06, "loss": 0.10100631713867188, "step": 64080 }, { "epoch": 0.5541240456200119, "grad_norm": 2.0794474200782873, "learning_rate": 4.933525556411493e-06, "loss": 0.10705299377441406, "step": 64085 }, { "epoch": 0.5541672791415552, "grad_norm": 3.4138505234631373, "learning_rate": 4.9333697781558044e-06, "loss": 0.03908729553222656, "step": 64090 }, { "epoch": 0.5542105126630985, "grad_norm": 1.7604080541919365, "learning_rate": 4.933213990983584e-06, "loss": 0.12395858764648438, "step": 64095 }, { "epoch": 0.5542537461846417, "grad_norm": 4.854071982413523, "learning_rate": 4.933058194895552e-06, "loss": 0.4145225524902344, "step": 64100 }, { "epoch": 0.554296979706185, "grad_norm": 16.393350428832203, "learning_rate": 4.932902389892427e-06, "loss": 0.16568984985351562, "step": 64105 }, { "epoch": 0.5543402132277283, "grad_norm": 8.01428080934992, "learning_rate": 4.932746575974926e-06, "loss": 0.2429412841796875, "step": 64110 }, { "epoch": 0.5543834467492715, "grad_norm": 8.872677949695674, "learning_rate": 4.932590753143769e-06, "loss": 0.053537940979003905, "step": 64115 }, { "epoch": 0.5544266802708148, "grad_norm": 17.39033484129503, "learning_rate": 4.932434921399675e-06, "loss": 0.1573486328125, "step": 64120 }, { "epoch": 0.554469913792358, "grad_norm": 45.75689735217389, "learning_rate": 4.932279080743361e-06, "loss": 0.11469268798828125, "step": 64125 }, { "epoch": 0.5545131473139013, "grad_norm": 20.871353427621383, "learning_rate": 4.9321232311755474e-06, "loss": 0.122906494140625, "step": 64130 }, { "epoch": 0.5545563808354446, "grad_norm": 0.8579279131775625, "learning_rate": 4.931967372696951e-06, "loss": 0.11465301513671874, "step": 64135 }, { "epoch": 0.5545996143569878, "grad_norm": 12.406435031491114, "learning_rate": 4.931811505308292e-06, "loss": 0.23943710327148438, "step": 64140 }, { "epoch": 0.5546428478785311, "grad_norm": 2.301356710814878, "learning_rate": 4.931655629010289e-06, "loss": 0.056545448303222653, "step": 64145 }, { "epoch": 0.5546860814000744, "grad_norm": 3.753128141229569, "learning_rate": 4.931499743803662e-06, "loss": 0.171337890625, "step": 64150 }, { "epoch": 0.5547293149216176, "grad_norm": 0.1294315063029083, "learning_rate": 4.931343849689128e-06, "loss": 0.44498291015625, "step": 64155 }, { "epoch": 0.5547725484431609, "grad_norm": 2.956180624390118, "learning_rate": 4.9311879466674065e-06, "loss": 0.03000640869140625, "step": 64160 }, { "epoch": 0.5548157819647042, "grad_norm": 0.49377903608951373, "learning_rate": 4.931032034739218e-06, "loss": 0.08708953857421875, "step": 64165 }, { "epoch": 0.5548590154862474, "grad_norm": 147.93019243127685, "learning_rate": 4.930876113905279e-06, "loss": 0.29728240966796876, "step": 64170 }, { "epoch": 0.5549022490077907, "grad_norm": 0.5670919668140447, "learning_rate": 4.93072018416631e-06, "loss": 0.010934829711914062, "step": 64175 }, { "epoch": 0.554945482529334, "grad_norm": 39.44717269681122, "learning_rate": 4.9305642455230315e-06, "loss": 0.131561279296875, "step": 64180 }, { "epoch": 0.5549887160508772, "grad_norm": 21.919374221425034, "learning_rate": 4.930408297976161e-06, "loss": 0.36069526672363283, "step": 64185 }, { "epoch": 0.5550319495724205, "grad_norm": 0.640779232096657, "learning_rate": 4.930252341526417e-06, "loss": 0.16117210388183595, "step": 64190 }, { "epoch": 0.5550751830939638, "grad_norm": 0.696868035294177, "learning_rate": 4.93009637617452e-06, "loss": 0.0375244140625, "step": 64195 }, { "epoch": 0.555118416615507, "grad_norm": 1.2522346502381414, "learning_rate": 4.92994040192119e-06, "loss": 0.2446308135986328, "step": 64200 }, { "epoch": 0.5551616501370502, "grad_norm": 10.846781873401083, "learning_rate": 4.929784418767145e-06, "loss": 0.25307159423828124, "step": 64205 }, { "epoch": 0.5552048836585936, "grad_norm": 0.35045918670836684, "learning_rate": 4.929628426713104e-06, "loss": 0.23640823364257812, "step": 64210 }, { "epoch": 0.5552481171801368, "grad_norm": 1.4679911948609756, "learning_rate": 4.929472425759788e-06, "loss": 0.0466888427734375, "step": 64215 }, { "epoch": 0.55529135070168, "grad_norm": 5.711846466164028, "learning_rate": 4.9293164159079155e-06, "loss": 0.048223876953125, "step": 64220 }, { "epoch": 0.5553345842232233, "grad_norm": 5.269590029290739, "learning_rate": 4.929160397158206e-06, "loss": 0.1429290771484375, "step": 64225 }, { "epoch": 0.5553778177447666, "grad_norm": 14.220751088355653, "learning_rate": 4.929004369511379e-06, "loss": 0.09864501953125, "step": 64230 }, { "epoch": 0.5554210512663098, "grad_norm": 6.611764627540555, "learning_rate": 4.928848332968155e-06, "loss": 0.0929656982421875, "step": 64235 }, { "epoch": 0.5554642847878531, "grad_norm": 18.626325152642675, "learning_rate": 4.928692287529252e-06, "loss": 0.16351356506347656, "step": 64240 }, { "epoch": 0.5555075183093964, "grad_norm": 7.03149984397072, "learning_rate": 4.928536233195391e-06, "loss": 0.12046051025390625, "step": 64245 }, { "epoch": 0.5555507518309396, "grad_norm": 0.08163197750529336, "learning_rate": 4.928380169967292e-06, "loss": 0.08212509155273437, "step": 64250 }, { "epoch": 0.5555939853524829, "grad_norm": 10.799514596569514, "learning_rate": 4.928224097845673e-06, "loss": 0.442236328125, "step": 64255 }, { "epoch": 0.5556372188740262, "grad_norm": 28.757925726923112, "learning_rate": 4.928068016831256e-06, "loss": 0.10123538970947266, "step": 64260 }, { "epoch": 0.5556804523955694, "grad_norm": 5.613253188797798, "learning_rate": 4.9279119269247585e-06, "loss": 0.0802581787109375, "step": 64265 }, { "epoch": 0.5557236859171127, "grad_norm": 1.1748026293178813, "learning_rate": 4.9277558281269016e-06, "loss": 0.1518157958984375, "step": 64270 }, { "epoch": 0.555766919438656, "grad_norm": 7.58502952959344, "learning_rate": 4.9275997204384055e-06, "loss": 0.20487403869628906, "step": 64275 }, { "epoch": 0.5558101529601992, "grad_norm": 0.19825528279384413, "learning_rate": 4.92744360385999e-06, "loss": 0.06813430786132812, "step": 64280 }, { "epoch": 0.5558533864817424, "grad_norm": 0.42412027233671573, "learning_rate": 4.927287478392375e-06, "loss": 0.2793975830078125, "step": 64285 }, { "epoch": 0.5558966200032858, "grad_norm": 0.9244281609983381, "learning_rate": 4.92713134403628e-06, "loss": 0.20822372436523437, "step": 64290 }, { "epoch": 0.555939853524829, "grad_norm": 4.840767290641476, "learning_rate": 4.926975200792426e-06, "loss": 0.0297454833984375, "step": 64295 }, { "epoch": 0.5559830870463722, "grad_norm": 25.218712832286542, "learning_rate": 4.926819048661533e-06, "loss": 0.2789794921875, "step": 64300 }, { "epoch": 0.5560263205679156, "grad_norm": 2.518343290817628, "learning_rate": 4.92666288764432e-06, "loss": 0.15789260864257812, "step": 64305 }, { "epoch": 0.5560695540894588, "grad_norm": 22.408631694736602, "learning_rate": 4.926506717741508e-06, "loss": 0.12556991577148438, "step": 64310 }, { "epoch": 0.556112787611002, "grad_norm": 1.5756670112958426, "learning_rate": 4.9263505389538174e-06, "loss": 0.09760284423828125, "step": 64315 }, { "epoch": 0.5561560211325454, "grad_norm": 0.24823281493900806, "learning_rate": 4.926194351281968e-06, "loss": 0.09253482818603516, "step": 64320 }, { "epoch": 0.5561992546540886, "grad_norm": 14.188743968674359, "learning_rate": 4.9260381547266815e-06, "loss": 0.30385818481445315, "step": 64325 }, { "epoch": 0.5562424881756318, "grad_norm": 6.306334771870855, "learning_rate": 4.925881949288676e-06, "loss": 0.1425994873046875, "step": 64330 }, { "epoch": 0.5562857216971752, "grad_norm": 0.2788664015566465, "learning_rate": 4.925725734968674e-06, "loss": 0.14137115478515624, "step": 64335 }, { "epoch": 0.5563289552187184, "grad_norm": 4.671271504199601, "learning_rate": 4.925569511767395e-06, "loss": 0.08037567138671875, "step": 64340 }, { "epoch": 0.5563721887402616, "grad_norm": 0.9003387383241723, "learning_rate": 4.925413279685559e-06, "loss": 0.0423828125, "step": 64345 }, { "epoch": 0.556415422261805, "grad_norm": 28.15166843122734, "learning_rate": 4.925257038723887e-06, "loss": 0.19541015625, "step": 64350 }, { "epoch": 0.5564586557833482, "grad_norm": 5.845714093272009, "learning_rate": 4.925100788883101e-06, "loss": 0.15661582946777344, "step": 64355 }, { "epoch": 0.5565018893048914, "grad_norm": 2.5028627093436895, "learning_rate": 4.924944530163919e-06, "loss": 0.2341278076171875, "step": 64360 }, { "epoch": 0.5565451228264348, "grad_norm": 8.707951012668586, "learning_rate": 4.924788262567062e-06, "loss": 0.10905590057373046, "step": 64365 }, { "epoch": 0.556588356347978, "grad_norm": 14.018566415055236, "learning_rate": 4.924631986093254e-06, "loss": 0.4510833740234375, "step": 64370 }, { "epoch": 0.5566315898695212, "grad_norm": 7.886753180541905, "learning_rate": 4.924475700743212e-06, "loss": 0.18632354736328124, "step": 64375 }, { "epoch": 0.5566748233910644, "grad_norm": 20.396954806615643, "learning_rate": 4.924319406517659e-06, "loss": 0.20503387451171876, "step": 64380 }, { "epoch": 0.5567180569126078, "grad_norm": 12.76318268796108, "learning_rate": 4.9241631034173135e-06, "loss": 0.2260578155517578, "step": 64385 }, { "epoch": 0.556761290434151, "grad_norm": 6.901045510081799, "learning_rate": 4.924006791442899e-06, "loss": 0.14240570068359376, "step": 64390 }, { "epoch": 0.5568045239556942, "grad_norm": 1.7783112990036352, "learning_rate": 4.923850470595134e-06, "loss": 0.06625518798828126, "step": 64395 }, { "epoch": 0.5568477574772376, "grad_norm": 3.4733228524679163, "learning_rate": 4.923694140874741e-06, "loss": 0.134979248046875, "step": 64400 }, { "epoch": 0.5568909909987808, "grad_norm": 1.549436866482796, "learning_rate": 4.9235378022824415e-06, "loss": 0.16241455078125, "step": 64405 }, { "epoch": 0.556934224520324, "grad_norm": 1.615592705225635, "learning_rate": 4.9233814548189555e-06, "loss": 0.10533676147460938, "step": 64410 }, { "epoch": 0.5569774580418674, "grad_norm": 2.5011364563265768, "learning_rate": 4.923225098485004e-06, "loss": 0.1165863037109375, "step": 64415 }, { "epoch": 0.5570206915634106, "grad_norm": 7.79653688836088, "learning_rate": 4.923068733281308e-06, "loss": 0.04331817626953125, "step": 64420 }, { "epoch": 0.5570639250849538, "grad_norm": 12.21014481972256, "learning_rate": 4.922912359208589e-06, "loss": 0.13551025390625, "step": 64425 }, { "epoch": 0.5571071586064972, "grad_norm": 9.272346179963044, "learning_rate": 4.922755976267568e-06, "loss": 0.12784271240234374, "step": 64430 }, { "epoch": 0.5571503921280404, "grad_norm": 1.2205740651539394, "learning_rate": 4.922599584458966e-06, "loss": 0.04429779052734375, "step": 64435 }, { "epoch": 0.5571936256495836, "grad_norm": 4.93743862160503, "learning_rate": 4.922443183783506e-06, "loss": 0.295343017578125, "step": 64440 }, { "epoch": 0.557236859171127, "grad_norm": 40.238993919385464, "learning_rate": 4.922286774241906e-06, "loss": 0.336871337890625, "step": 64445 }, { "epoch": 0.5572800926926702, "grad_norm": 5.4578279745747595, "learning_rate": 4.92213035583489e-06, "loss": 0.1510498046875, "step": 64450 }, { "epoch": 0.5573233262142134, "grad_norm": 5.260166040458014, "learning_rate": 4.921973928563179e-06, "loss": 0.20890579223632813, "step": 64455 }, { "epoch": 0.5573665597357567, "grad_norm": 5.99792893296789, "learning_rate": 4.921817492427494e-06, "loss": 0.14068527221679689, "step": 64460 }, { "epoch": 0.5574097932573, "grad_norm": 3.1437137580842207, "learning_rate": 4.921661047428556e-06, "loss": 0.50489501953125, "step": 64465 }, { "epoch": 0.5574530267788432, "grad_norm": 0.8770256385777191, "learning_rate": 4.921504593567088e-06, "loss": 0.07677268981933594, "step": 64470 }, { "epoch": 0.5574962603003865, "grad_norm": 4.878732105692007, "learning_rate": 4.9213481308438105e-06, "loss": 0.15565872192382812, "step": 64475 }, { "epoch": 0.5575394938219298, "grad_norm": 9.870506304233443, "learning_rate": 4.9211916592594436e-06, "loss": 0.04000091552734375, "step": 64480 }, { "epoch": 0.557582727343473, "grad_norm": 3.5326485777558037, "learning_rate": 4.921035178814712e-06, "loss": 0.0588165283203125, "step": 64485 }, { "epoch": 0.5576259608650163, "grad_norm": 13.692582374628842, "learning_rate": 4.920878689510336e-06, "loss": 0.1697509765625, "step": 64490 }, { "epoch": 0.5576691943865596, "grad_norm": 0.49255156996525273, "learning_rate": 4.920722191347036e-06, "loss": 0.0357879638671875, "step": 64495 }, { "epoch": 0.5577124279081028, "grad_norm": 36.11418866208971, "learning_rate": 4.920565684325536e-06, "loss": 0.358843994140625, "step": 64500 }, { "epoch": 0.557755661429646, "grad_norm": 10.37626993898693, "learning_rate": 4.920409168446557e-06, "loss": 0.11043243408203125, "step": 64505 }, { "epoch": 0.5577988949511894, "grad_norm": 1.8519700110685724, "learning_rate": 4.9202526437108194e-06, "loss": 0.0633636474609375, "step": 64510 }, { "epoch": 0.5578421284727326, "grad_norm": 0.703147899164198, "learning_rate": 4.920096110119048e-06, "loss": 0.11213226318359375, "step": 64515 }, { "epoch": 0.5578853619942759, "grad_norm": 4.724798372472479, "learning_rate": 4.9199395676719616e-06, "loss": 0.02426490783691406, "step": 64520 }, { "epoch": 0.5579285955158192, "grad_norm": 2.7619008230333355, "learning_rate": 4.919783016370284e-06, "loss": 0.2710662841796875, "step": 64525 }, { "epoch": 0.5579718290373624, "grad_norm": 24.768113491853043, "learning_rate": 4.919626456214737e-06, "loss": 0.2301025390625, "step": 64530 }, { "epoch": 0.5580150625589057, "grad_norm": 4.311646640652846, "learning_rate": 4.919469887206041e-06, "loss": 0.1370098114013672, "step": 64535 }, { "epoch": 0.558058296080449, "grad_norm": 0.5259803193025437, "learning_rate": 4.9193133093449215e-06, "loss": 0.14666061401367186, "step": 64540 }, { "epoch": 0.5581015296019922, "grad_norm": 7.282578774225484, "learning_rate": 4.919156722632098e-06, "loss": 0.10312833786010742, "step": 64545 }, { "epoch": 0.5581447631235354, "grad_norm": 11.509823697442858, "learning_rate": 4.919000127068293e-06, "loss": 0.2929370880126953, "step": 64550 }, { "epoch": 0.5581879966450787, "grad_norm": 5.064927800065358, "learning_rate": 4.918843522654229e-06, "loss": 0.12768783569335937, "step": 64555 }, { "epoch": 0.558231230166622, "grad_norm": 31.999717970599523, "learning_rate": 4.918686909390628e-06, "loss": 0.1400848388671875, "step": 64560 }, { "epoch": 0.5582744636881652, "grad_norm": 0.7191299244804392, "learning_rate": 4.918530287278214e-06, "loss": 0.03527488708496094, "step": 64565 }, { "epoch": 0.5583176972097085, "grad_norm": 5.218899869263055, "learning_rate": 4.9183736563177055e-06, "loss": 0.099755859375, "step": 64570 }, { "epoch": 0.5583609307312518, "grad_norm": 16.79108538279851, "learning_rate": 4.918217016509828e-06, "loss": 0.32051239013671873, "step": 64575 }, { "epoch": 0.558404164252795, "grad_norm": 4.873846569236935, "learning_rate": 4.918060367855304e-06, "loss": 0.046665191650390625, "step": 64580 }, { "epoch": 0.5584473977743383, "grad_norm": 10.732624676671142, "learning_rate": 4.917903710354854e-06, "loss": 0.17674674987792968, "step": 64585 }, { "epoch": 0.5584906312958816, "grad_norm": 3.8826927422183455, "learning_rate": 4.917747044009202e-06, "loss": 0.088311767578125, "step": 64590 }, { "epoch": 0.5585338648174248, "grad_norm": 5.370266309133408, "learning_rate": 4.917590368819071e-06, "loss": 0.16388874053955077, "step": 64595 }, { "epoch": 0.5585770983389681, "grad_norm": 13.528125236445067, "learning_rate": 4.917433684785181e-06, "loss": 0.21986083984375, "step": 64600 }, { "epoch": 0.5586203318605114, "grad_norm": 10.855341540102106, "learning_rate": 4.917276991908257e-06, "loss": 0.02635345458984375, "step": 64605 }, { "epoch": 0.5586635653820546, "grad_norm": 6.416954270057677, "learning_rate": 4.917120290189021e-06, "loss": 0.0838958740234375, "step": 64610 }, { "epoch": 0.5587067989035979, "grad_norm": 2.5658951226272366, "learning_rate": 4.916963579628196e-06, "loss": 0.0888153076171875, "step": 64615 }, { "epoch": 0.5587500324251412, "grad_norm": 41.47540464420656, "learning_rate": 4.916806860226503e-06, "loss": 0.1702972412109375, "step": 64620 }, { "epoch": 0.5587932659466844, "grad_norm": 9.160592609556131, "learning_rate": 4.916650131984668e-06, "loss": 0.2317626953125, "step": 64625 }, { "epoch": 0.5588364994682277, "grad_norm": 14.740252751133427, "learning_rate": 4.9164933949034104e-06, "loss": 0.1726715087890625, "step": 64630 }, { "epoch": 0.5588797329897709, "grad_norm": 7.860668142982011, "learning_rate": 4.916336648983456e-06, "loss": 0.11837615966796874, "step": 64635 }, { "epoch": 0.5589229665113142, "grad_norm": 5.884380198223803, "learning_rate": 4.916179894225525e-06, "loss": 0.06550788879394531, "step": 64640 }, { "epoch": 0.5589662000328575, "grad_norm": 0.7892302616222288, "learning_rate": 4.916023130630342e-06, "loss": 0.12815704345703124, "step": 64645 }, { "epoch": 0.5590094335544007, "grad_norm": 26.294632873036292, "learning_rate": 4.91586635819863e-06, "loss": 0.2384552001953125, "step": 64650 }, { "epoch": 0.559052667075944, "grad_norm": 1.0384698855818981, "learning_rate": 4.915709576931112e-06, "loss": 0.30055809020996094, "step": 64655 }, { "epoch": 0.5590959005974873, "grad_norm": 22.28880984074767, "learning_rate": 4.91555278682851e-06, "loss": 0.13173370361328124, "step": 64660 }, { "epoch": 0.5591391341190305, "grad_norm": 49.445566739009664, "learning_rate": 4.915395987891548e-06, "loss": 0.104296875, "step": 64665 }, { "epoch": 0.5591823676405738, "grad_norm": 5.801549652788616, "learning_rate": 4.915239180120949e-06, "loss": 0.4674842834472656, "step": 64670 }, { "epoch": 0.5592256011621171, "grad_norm": 14.049487863765131, "learning_rate": 4.915082363517436e-06, "loss": 0.38155364990234375, "step": 64675 }, { "epoch": 0.5592688346836603, "grad_norm": 3.2669377807926625, "learning_rate": 4.914925538081732e-06, "loss": 0.0567352294921875, "step": 64680 }, { "epoch": 0.5593120682052036, "grad_norm": 4.7442096238145, "learning_rate": 4.9147687038145615e-06, "loss": 0.09154815673828125, "step": 64685 }, { "epoch": 0.5593553017267469, "grad_norm": 1.5437206446323384, "learning_rate": 4.9146118607166455e-06, "loss": 0.21395111083984375, "step": 64690 }, { "epoch": 0.5593985352482901, "grad_norm": 0.9038638994162909, "learning_rate": 4.914455008788711e-06, "loss": 0.15476417541503906, "step": 64695 }, { "epoch": 0.5594417687698334, "grad_norm": 25.683631602201274, "learning_rate": 4.914298148031478e-06, "loss": 0.25954132080078124, "step": 64700 }, { "epoch": 0.5594850022913767, "grad_norm": 12.704070300697621, "learning_rate": 4.91414127844567e-06, "loss": 0.12939224243164063, "step": 64705 }, { "epoch": 0.5595282358129199, "grad_norm": 11.423033476528644, "learning_rate": 4.913984400032013e-06, "loss": 0.3798736572265625, "step": 64710 }, { "epoch": 0.5595714693344632, "grad_norm": 5.048588269965394, "learning_rate": 4.913827512791229e-06, "loss": 0.02824859619140625, "step": 64715 }, { "epoch": 0.5596147028560065, "grad_norm": 12.706501292787703, "learning_rate": 4.913670616724041e-06, "loss": 0.036777496337890625, "step": 64720 }, { "epoch": 0.5596579363775497, "grad_norm": 7.754624984285275, "learning_rate": 4.913513711831174e-06, "loss": 0.10718231201171875, "step": 64725 }, { "epoch": 0.5597011698990929, "grad_norm": 2.43168970691101, "learning_rate": 4.91335679811335e-06, "loss": 0.30088958740234373, "step": 64730 }, { "epoch": 0.5597444034206362, "grad_norm": 5.8722248011890885, "learning_rate": 4.9131998755712935e-06, "loss": 0.16454925537109374, "step": 64735 }, { "epoch": 0.5597876369421795, "grad_norm": 2.2283867474328716, "learning_rate": 4.913042944205728e-06, "loss": 0.06299705505371093, "step": 64740 }, { "epoch": 0.5598308704637227, "grad_norm": 1.2140271173598147, "learning_rate": 4.912886004017378e-06, "loss": 0.07529335021972657, "step": 64745 }, { "epoch": 0.559874103985266, "grad_norm": 1.052640941474728, "learning_rate": 4.912729055006967e-06, "loss": 0.0468292236328125, "step": 64750 }, { "epoch": 0.5599173375068093, "grad_norm": 4.489833934524629, "learning_rate": 4.912572097175219e-06, "loss": 0.30818023681640627, "step": 64755 }, { "epoch": 0.5599605710283525, "grad_norm": 0.6743971423183068, "learning_rate": 4.9124151305228576e-06, "loss": 0.0958404541015625, "step": 64760 }, { "epoch": 0.5600038045498958, "grad_norm": 0.31749982761040185, "learning_rate": 4.9122581550506055e-06, "loss": 0.14597625732421876, "step": 64765 }, { "epoch": 0.5600470380714391, "grad_norm": 0.24804884947747116, "learning_rate": 4.912101170759188e-06, "loss": 0.18132858276367186, "step": 64770 }, { "epoch": 0.5600902715929823, "grad_norm": 7.192879648976281, "learning_rate": 4.911944177649329e-06, "loss": 0.2100677490234375, "step": 64775 }, { "epoch": 0.5601335051145256, "grad_norm": 1.1789109771791901, "learning_rate": 4.911787175721752e-06, "loss": 0.0942901611328125, "step": 64780 }, { "epoch": 0.5601767386360689, "grad_norm": 0.47854177298109646, "learning_rate": 4.9116301649771815e-06, "loss": 0.201885986328125, "step": 64785 }, { "epoch": 0.5602199721576121, "grad_norm": 14.493528917539901, "learning_rate": 4.911473145416343e-06, "loss": 0.18787155151367188, "step": 64790 }, { "epoch": 0.5602632056791554, "grad_norm": 25.568315055273615, "learning_rate": 4.911316117039958e-06, "loss": 0.526837158203125, "step": 64795 }, { "epoch": 0.5603064392006987, "grad_norm": 0.4005421727036246, "learning_rate": 4.911159079848752e-06, "loss": 0.13326568603515626, "step": 64800 }, { "epoch": 0.5603496727222419, "grad_norm": 3.7178136530168264, "learning_rate": 4.91100203384345e-06, "loss": 0.04969215393066406, "step": 64805 }, { "epoch": 0.5603929062437851, "grad_norm": 23.797828584160147, "learning_rate": 4.910844979024774e-06, "loss": 0.31846160888671876, "step": 64810 }, { "epoch": 0.5604361397653285, "grad_norm": 0.8479882325049576, "learning_rate": 4.910687915393451e-06, "loss": 0.1334247589111328, "step": 64815 }, { "epoch": 0.5604793732868717, "grad_norm": 1.0598993945320159, "learning_rate": 4.910530842950204e-06, "loss": 0.1898040771484375, "step": 64820 }, { "epoch": 0.5605226068084149, "grad_norm": 0.2947698795029786, "learning_rate": 4.9103737616957576e-06, "loss": 0.08523101806640625, "step": 64825 }, { "epoch": 0.5605658403299583, "grad_norm": 0.37400470363673405, "learning_rate": 4.9102166716308354e-06, "loss": 0.06418590545654297, "step": 64830 }, { "epoch": 0.5606090738515015, "grad_norm": 14.388573481614436, "learning_rate": 4.910059572756163e-06, "loss": 0.169384765625, "step": 64835 }, { "epoch": 0.5606523073730447, "grad_norm": 30.714183077538955, "learning_rate": 4.9099024650724655e-06, "loss": 0.067230224609375, "step": 64840 }, { "epoch": 0.5606955408945881, "grad_norm": 0.09261983418702734, "learning_rate": 4.909745348580466e-06, "loss": 0.16261138916015624, "step": 64845 }, { "epoch": 0.5607387744161313, "grad_norm": 8.535537769690235, "learning_rate": 4.909588223280889e-06, "loss": 0.1169952392578125, "step": 64850 }, { "epoch": 0.5607820079376745, "grad_norm": 1.9407923125133222, "learning_rate": 4.909431089174462e-06, "loss": 0.3448169708251953, "step": 64855 }, { "epoch": 0.5608252414592179, "grad_norm": 4.755103760576633, "learning_rate": 4.909273946261905e-06, "loss": 0.24154434204101563, "step": 64860 }, { "epoch": 0.5608684749807611, "grad_norm": 3.1897134810515, "learning_rate": 4.9091167945439475e-06, "loss": 0.233221435546875, "step": 64865 }, { "epoch": 0.5609117085023043, "grad_norm": 0.7499869056349351, "learning_rate": 4.90895963402131e-06, "loss": 0.1243438720703125, "step": 64870 }, { "epoch": 0.5609549420238477, "grad_norm": 0.21404206181244403, "learning_rate": 4.908802464694721e-06, "loss": 0.042928314208984374, "step": 64875 }, { "epoch": 0.5609981755453909, "grad_norm": 44.47125140731723, "learning_rate": 4.908645286564902e-06, "loss": 0.17690620422363282, "step": 64880 }, { "epoch": 0.5610414090669341, "grad_norm": 26.64898274687512, "learning_rate": 4.908488099632581e-06, "loss": 0.229974365234375, "step": 64885 }, { "epoch": 0.5610846425884775, "grad_norm": 20.75557206742936, "learning_rate": 4.908330903898482e-06, "loss": 0.165313720703125, "step": 64890 }, { "epoch": 0.5611278761100207, "grad_norm": 0.6406266164167935, "learning_rate": 4.908173699363328e-06, "loss": 0.09054031372070312, "step": 64895 }, { "epoch": 0.5611711096315639, "grad_norm": 0.7722413615329127, "learning_rate": 4.908016486027846e-06, "loss": 0.06290817260742188, "step": 64900 }, { "epoch": 0.5612143431531071, "grad_norm": 5.08899456914983, "learning_rate": 4.90785926389276e-06, "loss": 0.13249244689941406, "step": 64905 }, { "epoch": 0.5612575766746505, "grad_norm": 15.242114319785053, "learning_rate": 4.9077020329587975e-06, "loss": 0.18124542236328126, "step": 64910 }, { "epoch": 0.5613008101961937, "grad_norm": 1.119687396080866, "learning_rate": 4.90754479322668e-06, "loss": 0.0504241943359375, "step": 64915 }, { "epoch": 0.5613440437177369, "grad_norm": 9.128844764651433, "learning_rate": 4.907387544697136e-06, "loss": 0.0566925048828125, "step": 64920 }, { "epoch": 0.5613872772392803, "grad_norm": 20.722235667314507, "learning_rate": 4.907230287370887e-06, "loss": 0.1716156005859375, "step": 64925 }, { "epoch": 0.5614305107608235, "grad_norm": 4.263905764167684, "learning_rate": 4.907073021248662e-06, "loss": 0.07347984313964843, "step": 64930 }, { "epoch": 0.5614737442823667, "grad_norm": 8.64656133983265, "learning_rate": 4.9069157463311845e-06, "loss": 0.08393707275390624, "step": 64935 }, { "epoch": 0.5615169778039101, "grad_norm": 19.18237928074101, "learning_rate": 4.906758462619179e-06, "loss": 0.1560612678527832, "step": 64940 }, { "epoch": 0.5615602113254533, "grad_norm": 3.671984520616383, "learning_rate": 4.906601170113373e-06, "loss": 0.11699676513671875, "step": 64945 }, { "epoch": 0.5616034448469965, "grad_norm": 12.585116255586318, "learning_rate": 4.906443868814491e-06, "loss": 0.10054550170898438, "step": 64950 }, { "epoch": 0.5616466783685399, "grad_norm": 2.8736726231940213, "learning_rate": 4.906286558723258e-06, "loss": 0.051602935791015624, "step": 64955 }, { "epoch": 0.5616899118900831, "grad_norm": 11.912782499118968, "learning_rate": 4.906129239840401e-06, "loss": 0.20882339477539064, "step": 64960 }, { "epoch": 0.5617331454116263, "grad_norm": 52.00016392606325, "learning_rate": 4.905971912166643e-06, "loss": 0.3606201171875, "step": 64965 }, { "epoch": 0.5617763789331697, "grad_norm": 74.46875426429017, "learning_rate": 4.905814575702711e-06, "loss": 0.32291336059570314, "step": 64970 }, { "epoch": 0.5618196124547129, "grad_norm": 0.16375920151714263, "learning_rate": 4.905657230449331e-06, "loss": 0.060174560546875, "step": 64975 }, { "epoch": 0.5618628459762561, "grad_norm": 7.845312790500728, "learning_rate": 4.905499876407228e-06, "loss": 0.09848613739013672, "step": 64980 }, { "epoch": 0.5619060794977994, "grad_norm": 1.6499504904936126, "learning_rate": 4.905342513577128e-06, "loss": 0.3321226119995117, "step": 64985 }, { "epoch": 0.5619493130193427, "grad_norm": 3.7797583144135216, "learning_rate": 4.905185141959757e-06, "loss": 0.30327301025390624, "step": 64990 }, { "epoch": 0.5619925465408859, "grad_norm": 4.445143816718667, "learning_rate": 4.905027761555841e-06, "loss": 0.2752349853515625, "step": 64995 }, { "epoch": 0.5620357800624292, "grad_norm": 30.44504209235308, "learning_rate": 4.904870372366104e-06, "loss": 0.26395187377929685, "step": 65000 }, { "epoch": 0.5620790135839725, "grad_norm": 1.1689877862069264, "learning_rate": 4.904712974391274e-06, "loss": 0.05289878845214844, "step": 65005 }, { "epoch": 0.5621222471055157, "grad_norm": 1.6020619674692453, "learning_rate": 4.904555567632075e-06, "loss": 0.1529693603515625, "step": 65010 }, { "epoch": 0.562165480627059, "grad_norm": 0.7021000562083642, "learning_rate": 4.904398152089235e-06, "loss": 0.11346817016601562, "step": 65015 }, { "epoch": 0.5622087141486023, "grad_norm": 3.4898369209183153, "learning_rate": 4.904240727763479e-06, "loss": 0.059326171875, "step": 65020 }, { "epoch": 0.5622519476701455, "grad_norm": 12.794748588651608, "learning_rate": 4.904083294655532e-06, "loss": 0.14284820556640626, "step": 65025 }, { "epoch": 0.5622951811916888, "grad_norm": 12.730408863670114, "learning_rate": 4.903925852766123e-06, "loss": 0.20186920166015626, "step": 65030 }, { "epoch": 0.5623384147132321, "grad_norm": 31.775513960698806, "learning_rate": 4.903768402095975e-06, "loss": 0.09557723999023438, "step": 65035 }, { "epoch": 0.5623816482347753, "grad_norm": 6.712814306258559, "learning_rate": 4.903610942645814e-06, "loss": 0.30420379638671874, "step": 65040 }, { "epoch": 0.5624248817563186, "grad_norm": 11.722760377309767, "learning_rate": 4.90345347441637e-06, "loss": 0.17526702880859374, "step": 65045 }, { "epoch": 0.5624681152778619, "grad_norm": 8.593165703048141, "learning_rate": 4.9032959974083655e-06, "loss": 0.182763671875, "step": 65050 }, { "epoch": 0.5625113487994051, "grad_norm": 19.562762650292665, "learning_rate": 4.903138511622529e-06, "loss": 0.13987159729003906, "step": 65055 }, { "epoch": 0.5625545823209483, "grad_norm": 6.840107570993744, "learning_rate": 4.902981017059584e-06, "loss": 0.1765056610107422, "step": 65060 }, { "epoch": 0.5625978158424917, "grad_norm": 1.2479945936857144, "learning_rate": 4.90282351372026e-06, "loss": 0.063629150390625, "step": 65065 }, { "epoch": 0.5626410493640349, "grad_norm": 13.582534668587623, "learning_rate": 4.9026660016052824e-06, "loss": 0.2254425048828125, "step": 65070 }, { "epoch": 0.5626842828855781, "grad_norm": 0.4007205060318491, "learning_rate": 4.902508480715377e-06, "loss": 0.029329299926757812, "step": 65075 }, { "epoch": 0.5627275164071214, "grad_norm": 19.216635083411198, "learning_rate": 4.90235095105127e-06, "loss": 0.4155517578125, "step": 65080 }, { "epoch": 0.5627707499286647, "grad_norm": 1.6351181178302319, "learning_rate": 4.902193412613689e-06, "loss": 0.10004768371582032, "step": 65085 }, { "epoch": 0.5628139834502079, "grad_norm": 1.9317993683150916, "learning_rate": 4.90203586540336e-06, "loss": 0.523126220703125, "step": 65090 }, { "epoch": 0.5628572169717512, "grad_norm": 20.816687448395157, "learning_rate": 4.90187830942101e-06, "loss": 0.10073394775390625, "step": 65095 }, { "epoch": 0.5629004504932945, "grad_norm": 3.9871977609644413, "learning_rate": 4.9017207446673644e-06, "loss": 0.20686492919921876, "step": 65100 }, { "epoch": 0.5629436840148377, "grad_norm": 24.334899399962275, "learning_rate": 4.901563171143152e-06, "loss": 0.2813507080078125, "step": 65105 }, { "epoch": 0.562986917536381, "grad_norm": 5.5670888532540665, "learning_rate": 4.901405588849097e-06, "loss": 0.1900177001953125, "step": 65110 }, { "epoch": 0.5630301510579243, "grad_norm": 2.595485586351629, "learning_rate": 4.901247997785928e-06, "loss": 0.0992767333984375, "step": 65115 }, { "epoch": 0.5630733845794675, "grad_norm": 12.69826806302756, "learning_rate": 4.901090397954372e-06, "loss": 0.12848358154296874, "step": 65120 }, { "epoch": 0.5631166181010108, "grad_norm": 25.221158520773663, "learning_rate": 4.9009327893551535e-06, "loss": 0.0678131103515625, "step": 65125 }, { "epoch": 0.5631598516225541, "grad_norm": 1.3456482111086836, "learning_rate": 4.900775171989002e-06, "loss": 0.20782012939453126, "step": 65130 }, { "epoch": 0.5632030851440973, "grad_norm": 1.0780596955669428, "learning_rate": 4.900617545856644e-06, "loss": 0.180230712890625, "step": 65135 }, { "epoch": 0.5632463186656406, "grad_norm": 43.48433318296306, "learning_rate": 4.900459910958804e-06, "loss": 0.3644195556640625, "step": 65140 }, { "epoch": 0.5632895521871839, "grad_norm": 0.6300274495648889, "learning_rate": 4.900302267296212e-06, "loss": 0.024904251098632812, "step": 65145 }, { "epoch": 0.5633327857087271, "grad_norm": 5.660897144132779, "learning_rate": 4.900144614869593e-06, "loss": 0.059869384765625, "step": 65150 }, { "epoch": 0.5633760192302704, "grad_norm": 36.540181141528066, "learning_rate": 4.899986953679676e-06, "loss": 0.08835296630859375, "step": 65155 }, { "epoch": 0.5634192527518136, "grad_norm": 1.1278350473678111, "learning_rate": 4.899829283727187e-06, "loss": 0.3002288818359375, "step": 65160 }, { "epoch": 0.5634624862733569, "grad_norm": 33.74230553927536, "learning_rate": 4.899671605012852e-06, "loss": 0.2177276611328125, "step": 65165 }, { "epoch": 0.5635057197949002, "grad_norm": 0.791300001488574, "learning_rate": 4.899513917537401e-06, "loss": 0.18719406127929689, "step": 65170 }, { "epoch": 0.5635489533164434, "grad_norm": 0.1385600049805472, "learning_rate": 4.8993562213015585e-06, "loss": 0.05164413452148438, "step": 65175 }, { "epoch": 0.5635921868379867, "grad_norm": 5.4636670708327735, "learning_rate": 4.899198516306053e-06, "loss": 0.48396453857421873, "step": 65180 }, { "epoch": 0.56363542035953, "grad_norm": 1.0360998722406494, "learning_rate": 4.899040802551612e-06, "loss": 0.439306640625, "step": 65185 }, { "epoch": 0.5636786538810732, "grad_norm": 0.11877576921563492, "learning_rate": 4.898883080038963e-06, "loss": 0.2372974395751953, "step": 65190 }, { "epoch": 0.5637218874026165, "grad_norm": 30.84107842208073, "learning_rate": 4.898725348768833e-06, "loss": 0.5297821044921875, "step": 65195 }, { "epoch": 0.5637651209241598, "grad_norm": 4.021436676387727, "learning_rate": 4.898567608741949e-06, "loss": 0.15638961791992187, "step": 65200 }, { "epoch": 0.563808354445703, "grad_norm": 5.948736134746515, "learning_rate": 4.8984098599590385e-06, "loss": 0.08878555297851562, "step": 65205 }, { "epoch": 0.5638515879672463, "grad_norm": 37.28343424722485, "learning_rate": 4.89825210242083e-06, "loss": 0.262738037109375, "step": 65210 }, { "epoch": 0.5638948214887896, "grad_norm": 1.9516856742463555, "learning_rate": 4.8980943361280514e-06, "loss": 0.05612678527832031, "step": 65215 }, { "epoch": 0.5639380550103328, "grad_norm": 3.9549836253506334, "learning_rate": 4.897936561081429e-06, "loss": 0.1465576171875, "step": 65220 }, { "epoch": 0.5639812885318761, "grad_norm": 35.63527846061976, "learning_rate": 4.89777877728169e-06, "loss": 0.13986778259277344, "step": 65225 }, { "epoch": 0.5640245220534194, "grad_norm": 2.1433665345138997, "learning_rate": 4.897620984729563e-06, "loss": 0.1541778564453125, "step": 65230 }, { "epoch": 0.5640677555749626, "grad_norm": 20.015980573971586, "learning_rate": 4.897463183425777e-06, "loss": 0.3974945068359375, "step": 65235 }, { "epoch": 0.5641109890965059, "grad_norm": 63.01540938732532, "learning_rate": 4.897305373371057e-06, "loss": 0.33819122314453126, "step": 65240 }, { "epoch": 0.5641542226180492, "grad_norm": 20.010280966165475, "learning_rate": 4.8971475545661335e-06, "loss": 0.30675811767578126, "step": 65245 }, { "epoch": 0.5641974561395924, "grad_norm": 14.782583582871407, "learning_rate": 4.896989727011733e-06, "loss": 0.15394363403320313, "step": 65250 }, { "epoch": 0.5642406896611356, "grad_norm": 0.2631253147022526, "learning_rate": 4.896831890708583e-06, "loss": 0.17311019897460939, "step": 65255 }, { "epoch": 0.564283923182679, "grad_norm": 0.3679858563014709, "learning_rate": 4.896674045657412e-06, "loss": 0.17109375, "step": 65260 }, { "epoch": 0.5643271567042222, "grad_norm": 0.7936314063684343, "learning_rate": 4.896516191858948e-06, "loss": 0.13899917602539064, "step": 65265 }, { "epoch": 0.5643703902257654, "grad_norm": 3.7389525999781035, "learning_rate": 4.896358329313919e-06, "loss": 0.26709136962890623, "step": 65270 }, { "epoch": 0.5644136237473087, "grad_norm": 0.08906093191377683, "learning_rate": 4.8962004580230516e-06, "loss": 0.14929542541503907, "step": 65275 }, { "epoch": 0.564456857268852, "grad_norm": 1.9382455401169414, "learning_rate": 4.896042577987077e-06, "loss": 0.228118896484375, "step": 65280 }, { "epoch": 0.5645000907903952, "grad_norm": 17.33006823099776, "learning_rate": 4.895884689206722e-06, "loss": 0.16098442077636718, "step": 65285 }, { "epoch": 0.5645433243119385, "grad_norm": 17.0489929508274, "learning_rate": 4.895726791682712e-06, "loss": 0.1827850341796875, "step": 65290 }, { "epoch": 0.5645865578334818, "grad_norm": 7.339063426610509, "learning_rate": 4.8955688854157795e-06, "loss": 0.4391048431396484, "step": 65295 }, { "epoch": 0.564629791355025, "grad_norm": 0.4835587262448179, "learning_rate": 4.895410970406651e-06, "loss": 0.02783355712890625, "step": 65300 }, { "epoch": 0.5646730248765683, "grad_norm": 13.696841529337153, "learning_rate": 4.895253046656052e-06, "loss": 0.324371337890625, "step": 65305 }, { "epoch": 0.5647162583981116, "grad_norm": 6.278394564521722, "learning_rate": 4.8950951141647155e-06, "loss": 0.2373382568359375, "step": 65310 }, { "epoch": 0.5647594919196548, "grad_norm": 23.475424674244252, "learning_rate": 4.894937172933368e-06, "loss": 0.09224090576171876, "step": 65315 }, { "epoch": 0.5648027254411981, "grad_norm": 0.10112710236452278, "learning_rate": 4.894779222962736e-06, "loss": 0.060749053955078125, "step": 65320 }, { "epoch": 0.5648459589627414, "grad_norm": 0.5507085516087162, "learning_rate": 4.894621264253552e-06, "loss": 0.15070343017578125, "step": 65325 }, { "epoch": 0.5648891924842846, "grad_norm": 16.147766514822692, "learning_rate": 4.894463296806541e-06, "loss": 0.46512374877929685, "step": 65330 }, { "epoch": 0.5649324260058278, "grad_norm": 3.1383811216620985, "learning_rate": 4.894305320622432e-06, "loss": 0.39209728240966796, "step": 65335 }, { "epoch": 0.5649756595273712, "grad_norm": 3.673261659534606, "learning_rate": 4.8941473357019546e-06, "loss": 0.226300048828125, "step": 65340 }, { "epoch": 0.5650188930489144, "grad_norm": 49.95404656439289, "learning_rate": 4.893989342045838e-06, "loss": 0.2024566650390625, "step": 65345 }, { "epoch": 0.5650621265704576, "grad_norm": 1.7563964405905177, "learning_rate": 4.89383133965481e-06, "loss": 0.14505615234375, "step": 65350 }, { "epoch": 0.565105360092001, "grad_norm": 0.7718491791178953, "learning_rate": 4.893673328529597e-06, "loss": 0.58919677734375, "step": 65355 }, { "epoch": 0.5651485936135442, "grad_norm": 28.05439820539546, "learning_rate": 4.893515308670931e-06, "loss": 0.11379852294921874, "step": 65360 }, { "epoch": 0.5651918271350874, "grad_norm": 3.4861006326994004, "learning_rate": 4.893357280079541e-06, "loss": 0.06575927734375, "step": 65365 }, { "epoch": 0.5652350606566308, "grad_norm": 0.8167253367765609, "learning_rate": 4.8931992427561535e-06, "loss": 0.3474334716796875, "step": 65370 }, { "epoch": 0.565278294178174, "grad_norm": 32.847965267736754, "learning_rate": 4.893041196701498e-06, "loss": 0.416021728515625, "step": 65375 }, { "epoch": 0.5653215276997172, "grad_norm": 27.768928057550056, "learning_rate": 4.892883141916304e-06, "loss": 0.39298553466796876, "step": 65380 }, { "epoch": 0.5653647612212606, "grad_norm": 13.421925751987677, "learning_rate": 4.892725078401301e-06, "loss": 0.1255462646484375, "step": 65385 }, { "epoch": 0.5654079947428038, "grad_norm": 1.7549928704998976, "learning_rate": 4.8925670061572164e-06, "loss": 0.07931747436523437, "step": 65390 }, { "epoch": 0.565451228264347, "grad_norm": 0.2169849082951757, "learning_rate": 4.89240892518478e-06, "loss": 0.20577621459960938, "step": 65395 }, { "epoch": 0.5654944617858904, "grad_norm": 5.420870936313204, "learning_rate": 4.892250835484722e-06, "loss": 0.08486289978027343, "step": 65400 }, { "epoch": 0.5655376953074336, "grad_norm": 4.125114606853287, "learning_rate": 4.892092737057769e-06, "loss": 0.09609375, "step": 65405 }, { "epoch": 0.5655809288289768, "grad_norm": 14.884182049440183, "learning_rate": 4.891934629904652e-06, "loss": 0.14915695190429687, "step": 65410 }, { "epoch": 0.5656241623505202, "grad_norm": 20.8553041884599, "learning_rate": 4.891776514026099e-06, "loss": 0.35513916015625, "step": 65415 }, { "epoch": 0.5656673958720634, "grad_norm": 0.750896070146707, "learning_rate": 4.89161838942284e-06, "loss": 0.03958206176757813, "step": 65420 }, { "epoch": 0.5657106293936066, "grad_norm": 24.316910775774392, "learning_rate": 4.891460256095605e-06, "loss": 0.12277679443359375, "step": 65425 }, { "epoch": 0.5657538629151498, "grad_norm": 5.126506678267677, "learning_rate": 4.891302114045122e-06, "loss": 0.10013446807861329, "step": 65430 }, { "epoch": 0.5657970964366932, "grad_norm": 0.3115423729039779, "learning_rate": 4.89114396327212e-06, "loss": 0.20799560546875, "step": 65435 }, { "epoch": 0.5658403299582364, "grad_norm": 0.6358011224144735, "learning_rate": 4.890985803777331e-06, "loss": 0.13975372314453124, "step": 65440 }, { "epoch": 0.5658835634797796, "grad_norm": 0.16667935797373654, "learning_rate": 4.8908276355614814e-06, "loss": 0.046558380126953125, "step": 65445 }, { "epoch": 0.565926797001323, "grad_norm": 26.10148565346131, "learning_rate": 4.890669458625302e-06, "loss": 0.428271484375, "step": 65450 }, { "epoch": 0.5659700305228662, "grad_norm": 7.858499857338581, "learning_rate": 4.890511272969523e-06, "loss": 0.06304855346679687, "step": 65455 }, { "epoch": 0.5660132640444094, "grad_norm": 3.929803010316092, "learning_rate": 4.8903530785948704e-06, "loss": 0.06257972717285157, "step": 65460 }, { "epoch": 0.5660564975659528, "grad_norm": 51.4752755803223, "learning_rate": 4.890194875502078e-06, "loss": 0.47146759033203123, "step": 65465 }, { "epoch": 0.566099731087496, "grad_norm": 2.4061758889776423, "learning_rate": 4.890036663691875e-06, "loss": 0.3352535247802734, "step": 65470 }, { "epoch": 0.5661429646090392, "grad_norm": 35.09528587592999, "learning_rate": 4.889878443164989e-06, "loss": 0.30382537841796875, "step": 65475 }, { "epoch": 0.5661861981305826, "grad_norm": 0.7407204555596795, "learning_rate": 4.88972021392215e-06, "loss": 0.06801605224609375, "step": 65480 }, { "epoch": 0.5662294316521258, "grad_norm": 15.412695754537738, "learning_rate": 4.889561975964088e-06, "loss": 0.2881317138671875, "step": 65485 }, { "epoch": 0.566272665173669, "grad_norm": 14.908542864303755, "learning_rate": 4.889403729291535e-06, "loss": 0.29915542602539064, "step": 65490 }, { "epoch": 0.5663158986952124, "grad_norm": 0.4712448308330638, "learning_rate": 4.889245473905217e-06, "loss": 0.1977874755859375, "step": 65495 }, { "epoch": 0.5663591322167556, "grad_norm": 14.751740192286345, "learning_rate": 4.889087209805867e-06, "loss": 0.1466583251953125, "step": 65500 }, { "epoch": 0.5664023657382988, "grad_norm": 19.80823707989805, "learning_rate": 4.888928936994213e-06, "loss": 0.0924835205078125, "step": 65505 }, { "epoch": 0.5664455992598421, "grad_norm": 31.990734489376162, "learning_rate": 4.888770655470987e-06, "loss": 0.3321699142456055, "step": 65510 }, { "epoch": 0.5664888327813854, "grad_norm": 5.60712768258993, "learning_rate": 4.888612365236917e-06, "loss": 0.33980712890625, "step": 65515 }, { "epoch": 0.5665320663029286, "grad_norm": 6.818396498969793, "learning_rate": 4.888454066292734e-06, "loss": 0.2175159454345703, "step": 65520 }, { "epoch": 0.5665752998244719, "grad_norm": 2.978206087735023, "learning_rate": 4.888295758639166e-06, "loss": 0.03640956878662109, "step": 65525 }, { "epoch": 0.5666185333460152, "grad_norm": 0.6456176383078736, "learning_rate": 4.888137442276947e-06, "loss": 0.0818939208984375, "step": 65530 }, { "epoch": 0.5666617668675584, "grad_norm": 27.678768536821945, "learning_rate": 4.887979117206803e-06, "loss": 0.12258377075195312, "step": 65535 }, { "epoch": 0.5667050003891017, "grad_norm": 33.99308694185566, "learning_rate": 4.887820783429467e-06, "loss": 0.0470916748046875, "step": 65540 }, { "epoch": 0.566748233910645, "grad_norm": 40.27441570659988, "learning_rate": 4.887662440945668e-06, "loss": 0.65430908203125, "step": 65545 }, { "epoch": 0.5667914674321882, "grad_norm": 11.326213047348299, "learning_rate": 4.887504089756137e-06, "loss": 0.24276123046875, "step": 65550 }, { "epoch": 0.5668347009537315, "grad_norm": 0.2755478026025756, "learning_rate": 4.887345729861604e-06, "loss": 0.07221908569335937, "step": 65555 }, { "epoch": 0.5668779344752748, "grad_norm": 0.08037029737392515, "learning_rate": 4.887187361262799e-06, "loss": 0.058075904846191406, "step": 65560 }, { "epoch": 0.566921167996818, "grad_norm": 2.083547962556401, "learning_rate": 4.887028983960453e-06, "loss": 0.321258544921875, "step": 65565 }, { "epoch": 0.5669644015183612, "grad_norm": 0.383931878848138, "learning_rate": 4.886870597955296e-06, "loss": 0.03707809448242187, "step": 65570 }, { "epoch": 0.5670076350399046, "grad_norm": 7.182559710564235, "learning_rate": 4.886712203248058e-06, "loss": 0.18217048645019532, "step": 65575 }, { "epoch": 0.5670508685614478, "grad_norm": 2.2248965662888103, "learning_rate": 4.886553799839471e-06, "loss": 0.11397247314453125, "step": 65580 }, { "epoch": 0.567094102082991, "grad_norm": 1.0029251544460185, "learning_rate": 4.886395387730263e-06, "loss": 0.14185333251953125, "step": 65585 }, { "epoch": 0.5671373356045344, "grad_norm": 109.7865859349594, "learning_rate": 4.886236966921167e-06, "loss": 0.07536773681640625, "step": 65590 }, { "epoch": 0.5671805691260776, "grad_norm": 12.130369198895167, "learning_rate": 4.886078537412913e-06, "loss": 0.06112594604492187, "step": 65595 }, { "epoch": 0.5672238026476208, "grad_norm": 2.2105000622560276, "learning_rate": 4.8859200992062325e-06, "loss": 0.080230712890625, "step": 65600 }, { "epoch": 0.5672670361691641, "grad_norm": 29.581744998677266, "learning_rate": 4.885761652301854e-06, "loss": 0.23456993103027343, "step": 65605 }, { "epoch": 0.5673102696907074, "grad_norm": 5.039317796020561, "learning_rate": 4.8856031967005085e-06, "loss": 0.247088623046875, "step": 65610 }, { "epoch": 0.5673535032122506, "grad_norm": 26.16007516582612, "learning_rate": 4.885444732402928e-06, "loss": 0.135791015625, "step": 65615 }, { "epoch": 0.5673967367337939, "grad_norm": 4.3152488637153805, "learning_rate": 4.885286259409844e-06, "loss": 0.15570068359375, "step": 65620 }, { "epoch": 0.5674399702553372, "grad_norm": 13.17104501398107, "learning_rate": 4.885127777721987e-06, "loss": 0.12635498046875, "step": 65625 }, { "epoch": 0.5674832037768804, "grad_norm": 31.15736490823683, "learning_rate": 4.884969287340086e-06, "loss": 0.3069366455078125, "step": 65630 }, { "epoch": 0.5675264372984237, "grad_norm": 2.9316327682606693, "learning_rate": 4.884810788264874e-06, "loss": 0.1988811492919922, "step": 65635 }, { "epoch": 0.567569670819967, "grad_norm": 5.1515384387997, "learning_rate": 4.88465228049708e-06, "loss": 0.1013275146484375, "step": 65640 }, { "epoch": 0.5676129043415102, "grad_norm": 1.938775997386321, "learning_rate": 4.884493764037436e-06, "loss": 0.35029296875, "step": 65645 }, { "epoch": 0.5676561378630535, "grad_norm": 4.99550800104695, "learning_rate": 4.884335238886675e-06, "loss": 0.13166732788085939, "step": 65650 }, { "epoch": 0.5676993713845968, "grad_norm": 17.945461919116205, "learning_rate": 4.884176705045525e-06, "loss": 0.33098297119140624, "step": 65655 }, { "epoch": 0.56774260490614, "grad_norm": 7.869992337423117, "learning_rate": 4.88401816251472e-06, "loss": 0.2227783203125, "step": 65660 }, { "epoch": 0.5677858384276833, "grad_norm": 0.3841278650947174, "learning_rate": 4.883859611294988e-06, "loss": 0.041900634765625, "step": 65665 }, { "epoch": 0.5678290719492266, "grad_norm": 5.115718502712095, "learning_rate": 4.883701051387063e-06, "loss": 0.5284698486328125, "step": 65670 }, { "epoch": 0.5678723054707698, "grad_norm": 32.21176388071177, "learning_rate": 4.883542482791675e-06, "loss": 0.09288330078125, "step": 65675 }, { "epoch": 0.5679155389923131, "grad_norm": 0.5274973441316291, "learning_rate": 4.883383905509555e-06, "loss": 0.17181777954101562, "step": 65680 }, { "epoch": 0.5679587725138563, "grad_norm": 30.982575844420776, "learning_rate": 4.883225319541435e-06, "loss": 0.4179473876953125, "step": 65685 }, { "epoch": 0.5680020060353996, "grad_norm": 22.189145495196446, "learning_rate": 4.883066724888046e-06, "loss": 0.234344482421875, "step": 65690 }, { "epoch": 0.5680452395569429, "grad_norm": 4.712422900131775, "learning_rate": 4.88290812155012e-06, "loss": 0.09375152587890626, "step": 65695 }, { "epoch": 0.5680884730784861, "grad_norm": 17.87422662370857, "learning_rate": 4.882749509528389e-06, "loss": 0.2577659606933594, "step": 65700 }, { "epoch": 0.5681317066000294, "grad_norm": 9.252703483382396, "learning_rate": 4.882590888823582e-06, "loss": 0.052044677734375, "step": 65705 }, { "epoch": 0.5681749401215727, "grad_norm": 0.5592255135219172, "learning_rate": 4.882432259436433e-06, "loss": 0.41710205078125, "step": 65710 }, { "epoch": 0.5682181736431159, "grad_norm": 0.10166811361641845, "learning_rate": 4.882273621367674e-06, "loss": 0.23425140380859374, "step": 65715 }, { "epoch": 0.5682614071646592, "grad_norm": 20.71122336380128, "learning_rate": 4.882114974618033e-06, "loss": 0.2999359130859375, "step": 65720 }, { "epoch": 0.5683046406862025, "grad_norm": 15.90079496727666, "learning_rate": 4.881956319188245e-06, "loss": 0.17846221923828126, "step": 65725 }, { "epoch": 0.5683478742077457, "grad_norm": 24.97996925931549, "learning_rate": 4.881797655079041e-06, "loss": 0.1216339111328125, "step": 65730 }, { "epoch": 0.568391107729289, "grad_norm": 17.525994969686156, "learning_rate": 4.8816389822911525e-06, "loss": 0.17980728149414063, "step": 65735 }, { "epoch": 0.5684343412508323, "grad_norm": 17.67886328360771, "learning_rate": 4.8814803008253115e-06, "loss": 0.17362289428710936, "step": 65740 }, { "epoch": 0.5684775747723755, "grad_norm": 0.4012688885870515, "learning_rate": 4.881321610682249e-06, "loss": 0.04201812744140625, "step": 65745 }, { "epoch": 0.5685208082939188, "grad_norm": 9.00319120448008, "learning_rate": 4.881162911862698e-06, "loss": 0.040106582641601565, "step": 65750 }, { "epoch": 0.568564041815462, "grad_norm": 4.149319119431506, "learning_rate": 4.8810042043673895e-06, "loss": 0.2330913543701172, "step": 65755 }, { "epoch": 0.5686072753370053, "grad_norm": 0.5284486473837496, "learning_rate": 4.880845488197056e-06, "loss": 0.11422119140625, "step": 65760 }, { "epoch": 0.5686505088585485, "grad_norm": 33.9696685304325, "learning_rate": 4.8806867633524285e-06, "loss": 0.18281021118164062, "step": 65765 }, { "epoch": 0.5686937423800918, "grad_norm": 32.01385491532755, "learning_rate": 4.880528029834241e-06, "loss": 0.2577301025390625, "step": 65770 }, { "epoch": 0.5687369759016351, "grad_norm": 2.8372289983190897, "learning_rate": 4.880369287643224e-06, "loss": 0.3134674072265625, "step": 65775 }, { "epoch": 0.5687802094231783, "grad_norm": 9.620802978868802, "learning_rate": 4.880210536780111e-06, "loss": 0.4912841796875, "step": 65780 }, { "epoch": 0.5688234429447216, "grad_norm": 0.13428946269696942, "learning_rate": 4.88005177724563e-06, "loss": 0.07977447509765626, "step": 65785 }, { "epoch": 0.5688666764662649, "grad_norm": 2.758390566471936, "learning_rate": 4.8798930090405195e-06, "loss": 0.14024658203125, "step": 65790 }, { "epoch": 0.5689099099878081, "grad_norm": 2.4094396327747574, "learning_rate": 4.879734232165508e-06, "loss": 0.20911026000976562, "step": 65795 }, { "epoch": 0.5689531435093514, "grad_norm": 6.618528577391952, "learning_rate": 4.8795754466213265e-06, "loss": 0.13583831787109374, "step": 65800 }, { "epoch": 0.5689963770308947, "grad_norm": 1.3549770066764422, "learning_rate": 4.879416652408711e-06, "loss": 0.24957275390625, "step": 65805 }, { "epoch": 0.5690396105524379, "grad_norm": 39.236991282081384, "learning_rate": 4.879257849528391e-06, "loss": 0.26181640625, "step": 65810 }, { "epoch": 0.5690828440739812, "grad_norm": 7.232956087906325, "learning_rate": 4.879099037981101e-06, "loss": 0.45601806640625, "step": 65815 }, { "epoch": 0.5691260775955245, "grad_norm": 6.479759847738085, "learning_rate": 4.878940217767572e-06, "loss": 0.06396484375, "step": 65820 }, { "epoch": 0.5691693111170677, "grad_norm": 9.21071414247146, "learning_rate": 4.8787813888885355e-06, "loss": 0.17162704467773438, "step": 65825 }, { "epoch": 0.569212544638611, "grad_norm": 1.502694772561975, "learning_rate": 4.878622551344726e-06, "loss": 0.1648193359375, "step": 65830 }, { "epoch": 0.5692557781601543, "grad_norm": 36.532915727663884, "learning_rate": 4.8784637051368745e-06, "loss": 0.1527557373046875, "step": 65835 }, { "epoch": 0.5692990116816975, "grad_norm": 18.95799423752386, "learning_rate": 4.878304850265716e-06, "loss": 0.08082504272460937, "step": 65840 }, { "epoch": 0.5693422452032408, "grad_norm": 1.867356616153351, "learning_rate": 4.87814598673198e-06, "loss": 0.0555633544921875, "step": 65845 }, { "epoch": 0.5693854787247841, "grad_norm": 3.4402064024058103, "learning_rate": 4.877987114536402e-06, "loss": 0.15961761474609376, "step": 65850 }, { "epoch": 0.5694287122463273, "grad_norm": 3.061201819676881, "learning_rate": 4.877828233679713e-06, "loss": 0.36481552124023436, "step": 65855 }, { "epoch": 0.5694719457678705, "grad_norm": 2.4171600955884927, "learning_rate": 4.877669344162645e-06, "loss": 0.13906784057617189, "step": 65860 }, { "epoch": 0.5695151792894139, "grad_norm": 10.698236222502368, "learning_rate": 4.877510445985933e-06, "loss": 0.12930908203125, "step": 65865 }, { "epoch": 0.5695584128109571, "grad_norm": 31.663340371282334, "learning_rate": 4.877351539150308e-06, "loss": 0.30041961669921874, "step": 65870 }, { "epoch": 0.5696016463325003, "grad_norm": 2.2410625695543285, "learning_rate": 4.8771926236565044e-06, "loss": 0.16346435546875, "step": 65875 }, { "epoch": 0.5696448798540437, "grad_norm": 0.5639292618033422, "learning_rate": 4.877033699505254e-06, "loss": 0.1267563819885254, "step": 65880 }, { "epoch": 0.5696881133755869, "grad_norm": 23.448312717665242, "learning_rate": 4.87687476669729e-06, "loss": 0.10360565185546874, "step": 65885 }, { "epoch": 0.5697313468971301, "grad_norm": 0.5780493698748592, "learning_rate": 4.876715825233346e-06, "loss": 0.024161529541015626, "step": 65890 }, { "epoch": 0.5697745804186735, "grad_norm": 0.14950289356627502, "learning_rate": 4.876556875114153e-06, "loss": 0.23624267578125, "step": 65895 }, { "epoch": 0.5698178139402167, "grad_norm": 12.487733838783512, "learning_rate": 4.876397916340446e-06, "loss": 0.05966339111328125, "step": 65900 }, { "epoch": 0.5698610474617599, "grad_norm": 0.27195995760967917, "learning_rate": 4.876238948912959e-06, "loss": 0.41686038970947265, "step": 65905 }, { "epoch": 0.5699042809833033, "grad_norm": 5.805051779350772, "learning_rate": 4.876079972832422e-06, "loss": 0.11847915649414062, "step": 65910 }, { "epoch": 0.5699475145048465, "grad_norm": 10.636215760124534, "learning_rate": 4.87592098809957e-06, "loss": 0.130322265625, "step": 65915 }, { "epoch": 0.5699907480263897, "grad_norm": 5.25961628618465, "learning_rate": 4.875761994715137e-06, "loss": 0.14060325622558595, "step": 65920 }, { "epoch": 0.570033981547933, "grad_norm": 27.71560300406579, "learning_rate": 4.875602992679855e-06, "loss": 0.23859405517578125, "step": 65925 }, { "epoch": 0.5700772150694763, "grad_norm": 5.151145117703025, "learning_rate": 4.875443981994458e-06, "loss": 0.022119903564453126, "step": 65930 }, { "epoch": 0.5701204485910195, "grad_norm": 4.420430173166472, "learning_rate": 4.8752849626596794e-06, "loss": 0.0277130126953125, "step": 65935 }, { "epoch": 0.5701636821125627, "grad_norm": 12.451468347532023, "learning_rate": 4.875125934676252e-06, "loss": 0.1098297119140625, "step": 65940 }, { "epoch": 0.5702069156341061, "grad_norm": 10.923428123575263, "learning_rate": 4.874966898044909e-06, "loss": 0.17710342407226562, "step": 65945 }, { "epoch": 0.5702501491556493, "grad_norm": 1.2761200188425808, "learning_rate": 4.874807852766386e-06, "loss": 0.0459259033203125, "step": 65950 }, { "epoch": 0.5702933826771925, "grad_norm": 1.0316195447120386, "learning_rate": 4.874648798841413e-06, "loss": 0.07979621887207031, "step": 65955 }, { "epoch": 0.5703366161987359, "grad_norm": 20.72562872983377, "learning_rate": 4.874489736270725e-06, "loss": 0.2018585205078125, "step": 65960 }, { "epoch": 0.5703798497202791, "grad_norm": 20.439419949493878, "learning_rate": 4.874330665055058e-06, "loss": 0.6409591674804688, "step": 65965 }, { "epoch": 0.5704230832418223, "grad_norm": 21.284373688845466, "learning_rate": 4.874171585195142e-06, "loss": 0.1267822265625, "step": 65970 }, { "epoch": 0.5704663167633657, "grad_norm": 29.768679908969755, "learning_rate": 4.874012496691712e-06, "loss": 0.27596607208251955, "step": 65975 }, { "epoch": 0.5705095502849089, "grad_norm": 1.8621014182042923, "learning_rate": 4.873853399545503e-06, "loss": 0.020701217651367187, "step": 65980 }, { "epoch": 0.5705527838064521, "grad_norm": 26.46730021167508, "learning_rate": 4.873694293757248e-06, "loss": 0.2354339599609375, "step": 65985 }, { "epoch": 0.5705960173279955, "grad_norm": 18.206544497081595, "learning_rate": 4.87353517932768e-06, "loss": 0.303106689453125, "step": 65990 }, { "epoch": 0.5706392508495387, "grad_norm": 21.628817214277984, "learning_rate": 4.873376056257532e-06, "loss": 0.15836944580078124, "step": 65995 }, { "epoch": 0.5706824843710819, "grad_norm": 2.5208813200297997, "learning_rate": 4.873216924547541e-06, "loss": 0.2788816452026367, "step": 66000 }, { "epoch": 0.5707257178926253, "grad_norm": 0.035357050781247894, "learning_rate": 4.873057784198438e-06, "loss": 0.10513687133789062, "step": 66005 }, { "epoch": 0.5707689514141685, "grad_norm": 5.41515264458363, "learning_rate": 4.8728986352109576e-06, "loss": 0.03526382446289063, "step": 66010 }, { "epoch": 0.5708121849357117, "grad_norm": 4.576187826361284, "learning_rate": 4.872739477585836e-06, "loss": 0.06002388000488281, "step": 66015 }, { "epoch": 0.5708554184572551, "grad_norm": 23.037831629982943, "learning_rate": 4.872580311323805e-06, "loss": 0.4678318023681641, "step": 66020 }, { "epoch": 0.5708986519787983, "grad_norm": 6.801393665244901, "learning_rate": 4.872421136425597e-06, "loss": 0.1608306884765625, "step": 66025 }, { "epoch": 0.5709418855003415, "grad_norm": 1.9413567780688294, "learning_rate": 4.87226195289195e-06, "loss": 0.11425018310546875, "step": 66030 }, { "epoch": 0.5709851190218848, "grad_norm": 9.489776288009514, "learning_rate": 4.872102760723596e-06, "loss": 0.1061309814453125, "step": 66035 }, { "epoch": 0.5710283525434281, "grad_norm": 7.084205282193627, "learning_rate": 4.8719435599212695e-06, "loss": 0.07563056945800781, "step": 66040 }, { "epoch": 0.5710715860649713, "grad_norm": 33.25340148322494, "learning_rate": 4.871784350485705e-06, "loss": 0.158197021484375, "step": 66045 }, { "epoch": 0.5711148195865146, "grad_norm": 8.254818181985176, "learning_rate": 4.871625132417636e-06, "loss": 0.06467742919921875, "step": 66050 }, { "epoch": 0.5711580531080579, "grad_norm": 18.089199677453095, "learning_rate": 4.871465905717797e-06, "loss": 0.13300018310546874, "step": 66055 }, { "epoch": 0.5712012866296011, "grad_norm": 41.99093025774287, "learning_rate": 4.871306670386923e-06, "loss": 0.3260154724121094, "step": 66060 }, { "epoch": 0.5712445201511444, "grad_norm": 6.3770026213983835, "learning_rate": 4.871147426425748e-06, "loss": 0.0874176025390625, "step": 66065 }, { "epoch": 0.5712877536726877, "grad_norm": 39.84884415960908, "learning_rate": 4.8709881738350066e-06, "loss": 0.301348876953125, "step": 66070 }, { "epoch": 0.5713309871942309, "grad_norm": 3.4378395030356668, "learning_rate": 4.870828912615433e-06, "loss": 0.1115234375, "step": 66075 }, { "epoch": 0.5713742207157742, "grad_norm": 53.4703273984332, "learning_rate": 4.870669642767761e-06, "loss": 0.16097030639648438, "step": 66080 }, { "epoch": 0.5714174542373175, "grad_norm": 6.78630524922212, "learning_rate": 4.870510364292727e-06, "loss": 0.16769866943359374, "step": 66085 }, { "epoch": 0.5714606877588607, "grad_norm": 1.7997520671733311, "learning_rate": 4.870351077191063e-06, "loss": 0.3275413513183594, "step": 66090 }, { "epoch": 0.571503921280404, "grad_norm": 0.6632381798777259, "learning_rate": 4.870191781463507e-06, "loss": 0.09729232788085937, "step": 66095 }, { "epoch": 0.5715471548019473, "grad_norm": 2.543193701918116, "learning_rate": 4.870032477110791e-06, "loss": 0.19478073120117187, "step": 66100 }, { "epoch": 0.5715903883234905, "grad_norm": 10.81225755758137, "learning_rate": 4.869873164133649e-06, "loss": 0.25399150848388674, "step": 66105 }, { "epoch": 0.5716336218450337, "grad_norm": 1.9059372342351573, "learning_rate": 4.8697138425328195e-06, "loss": 0.09694175720214844, "step": 66110 }, { "epoch": 0.571676855366577, "grad_norm": 9.568926108062984, "learning_rate": 4.869554512309034e-06, "loss": 0.256341552734375, "step": 66115 }, { "epoch": 0.5717200888881203, "grad_norm": 0.5419045108533991, "learning_rate": 4.869395173463028e-06, "loss": 0.11040401458740234, "step": 66120 }, { "epoch": 0.5717633224096635, "grad_norm": 2.1564679406766882, "learning_rate": 4.869235825995537e-06, "loss": 0.134075927734375, "step": 66125 }, { "epoch": 0.5718065559312068, "grad_norm": 0.6565119337116949, "learning_rate": 4.869076469907295e-06, "loss": 0.1679210662841797, "step": 66130 }, { "epoch": 0.5718497894527501, "grad_norm": 12.364294722580691, "learning_rate": 4.868917105199039e-06, "loss": 0.2413177490234375, "step": 66135 }, { "epoch": 0.5718930229742933, "grad_norm": 0.2474279122752451, "learning_rate": 4.868757731871501e-06, "loss": 0.1324859619140625, "step": 66140 }, { "epoch": 0.5719362564958366, "grad_norm": 4.240569578324095, "learning_rate": 4.8685983499254175e-06, "loss": 0.18542327880859374, "step": 66145 }, { "epoch": 0.5719794900173799, "grad_norm": 6.7757122880900695, "learning_rate": 4.868438959361524e-06, "loss": 0.18175086975097657, "step": 66150 }, { "epoch": 0.5720227235389231, "grad_norm": 2.4216126039206136, "learning_rate": 4.868279560180554e-06, "loss": 0.1360870361328125, "step": 66155 }, { "epoch": 0.5720659570604664, "grad_norm": 0.2975811655558153, "learning_rate": 4.8681201523832454e-06, "loss": 0.08255081176757813, "step": 66160 }, { "epoch": 0.5721091905820097, "grad_norm": 0.8091974497218765, "learning_rate": 4.867960735970331e-06, "loss": 0.2941619873046875, "step": 66165 }, { "epoch": 0.5721524241035529, "grad_norm": 24.65970731456708, "learning_rate": 4.867801310942547e-06, "loss": 0.2967254638671875, "step": 66170 }, { "epoch": 0.5721956576250962, "grad_norm": 3.0167964226302013, "learning_rate": 4.867641877300628e-06, "loss": 0.27906646728515627, "step": 66175 }, { "epoch": 0.5722388911466395, "grad_norm": 4.852337021949996, "learning_rate": 4.86748243504531e-06, "loss": 0.30352249145507815, "step": 66180 }, { "epoch": 0.5722821246681827, "grad_norm": 30.601237087541683, "learning_rate": 4.8673229841773275e-06, "loss": 0.4820648193359375, "step": 66185 }, { "epoch": 0.572325358189726, "grad_norm": 1.34587823108161, "learning_rate": 4.867163524697417e-06, "loss": 0.10732154846191407, "step": 66190 }, { "epoch": 0.5723685917112693, "grad_norm": 27.42733250335868, "learning_rate": 4.867004056606313e-06, "loss": 0.221832275390625, "step": 66195 }, { "epoch": 0.5724118252328125, "grad_norm": 21.43088758242358, "learning_rate": 4.866844579904751e-06, "loss": 0.33946075439453127, "step": 66200 }, { "epoch": 0.5724550587543558, "grad_norm": 0.1457453977431617, "learning_rate": 4.8666850945934676e-06, "loss": 0.056101226806640626, "step": 66205 }, { "epoch": 0.572498292275899, "grad_norm": 0.8956812270333125, "learning_rate": 4.866525600673197e-06, "loss": 0.04538726806640625, "step": 66210 }, { "epoch": 0.5725415257974423, "grad_norm": 14.52017898035653, "learning_rate": 4.866366098144675e-06, "loss": 0.23856143951416015, "step": 66215 }, { "epoch": 0.5725847593189856, "grad_norm": 1.0432266941207378, "learning_rate": 4.8662065870086376e-06, "loss": 0.12390365600585937, "step": 66220 }, { "epoch": 0.5726279928405288, "grad_norm": 10.39680077461756, "learning_rate": 4.86604706726582e-06, "loss": 0.133880615234375, "step": 66225 }, { "epoch": 0.5726712263620721, "grad_norm": 21.168114820331567, "learning_rate": 4.865887538916959e-06, "loss": 0.26340255737304685, "step": 66230 }, { "epoch": 0.5727144598836154, "grad_norm": 1.5016723675457988, "learning_rate": 4.865728001962789e-06, "loss": 0.09731597900390625, "step": 66235 }, { "epoch": 0.5727576934051586, "grad_norm": 4.807794023738379, "learning_rate": 4.865568456404047e-06, "loss": 0.103546142578125, "step": 66240 }, { "epoch": 0.5728009269267019, "grad_norm": 0.6592125173065908, "learning_rate": 4.8654089022414674e-06, "loss": 0.19545021057128906, "step": 66245 }, { "epoch": 0.5728441604482452, "grad_norm": 12.228522794467189, "learning_rate": 4.865249339475787e-06, "loss": 0.15462989807128907, "step": 66250 }, { "epoch": 0.5728873939697884, "grad_norm": 6.836094392012096, "learning_rate": 4.865089768107742e-06, "loss": 0.2413482666015625, "step": 66255 }, { "epoch": 0.5729306274913317, "grad_norm": 55.027409146393914, "learning_rate": 4.864930188138067e-06, "loss": 0.1686248779296875, "step": 66260 }, { "epoch": 0.572973861012875, "grad_norm": 15.673785640152165, "learning_rate": 4.864770599567499e-06, "loss": 0.1217041015625, "step": 66265 }, { "epoch": 0.5730170945344182, "grad_norm": 2.353534243782813, "learning_rate": 4.864611002396774e-06, "loss": 0.37728271484375, "step": 66270 }, { "epoch": 0.5730603280559615, "grad_norm": 2.4311625204089995, "learning_rate": 4.8644513966266266e-06, "loss": 0.099798583984375, "step": 66275 }, { "epoch": 0.5731035615775047, "grad_norm": 5.830623764269659, "learning_rate": 4.8642917822577955e-06, "loss": 0.06820068359375, "step": 66280 }, { "epoch": 0.573146795099048, "grad_norm": 31.95886616548637, "learning_rate": 4.864132159291015e-06, "loss": 0.2577507019042969, "step": 66285 }, { "epoch": 0.5731900286205912, "grad_norm": 0.3414861735185852, "learning_rate": 4.863972527727022e-06, "loss": 0.03541374206542969, "step": 66290 }, { "epoch": 0.5732332621421345, "grad_norm": 8.256492126237074, "learning_rate": 4.863812887566552e-06, "loss": 0.39189834594726564, "step": 66295 }, { "epoch": 0.5732764956636778, "grad_norm": 6.000742455220808, "learning_rate": 4.863653238810342e-06, "loss": 0.03899192810058594, "step": 66300 }, { "epoch": 0.573319729185221, "grad_norm": 23.44683296782039, "learning_rate": 4.863493581459127e-06, "loss": 0.22678756713867188, "step": 66305 }, { "epoch": 0.5733629627067643, "grad_norm": 28.318465679411645, "learning_rate": 4.863333915513645e-06, "loss": 0.13802490234375, "step": 66310 }, { "epoch": 0.5734061962283076, "grad_norm": 10.283999075864362, "learning_rate": 4.863174240974632e-06, "loss": 0.12643585205078126, "step": 66315 }, { "epoch": 0.5734494297498508, "grad_norm": 1.1986014577360935, "learning_rate": 4.863014557842823e-06, "loss": 0.26478729248046873, "step": 66320 }, { "epoch": 0.5734926632713941, "grad_norm": 2.0454335450881698, "learning_rate": 4.8628548661189565e-06, "loss": 0.15404052734375, "step": 66325 }, { "epoch": 0.5735358967929374, "grad_norm": 12.369772388933288, "learning_rate": 4.862695165803768e-06, "loss": 0.23100738525390624, "step": 66330 }, { "epoch": 0.5735791303144806, "grad_norm": 1.4927524481219265, "learning_rate": 4.862535456897993e-06, "loss": 0.04600963592529297, "step": 66335 }, { "epoch": 0.5736223638360239, "grad_norm": 11.991262382985006, "learning_rate": 4.86237573940237e-06, "loss": 0.247705078125, "step": 66340 }, { "epoch": 0.5736655973575672, "grad_norm": 39.78505898524652, "learning_rate": 4.862216013317635e-06, "loss": 0.6302230834960938, "step": 66345 }, { "epoch": 0.5737088308791104, "grad_norm": 24.963409424546082, "learning_rate": 4.862056278644524e-06, "loss": 0.2971435546875, "step": 66350 }, { "epoch": 0.5737520644006537, "grad_norm": 40.609081743008716, "learning_rate": 4.861896535383773e-06, "loss": 0.498480224609375, "step": 66355 }, { "epoch": 0.573795297922197, "grad_norm": 8.08166349606824, "learning_rate": 4.86173678353612e-06, "loss": 0.05720672607421875, "step": 66360 }, { "epoch": 0.5738385314437402, "grad_norm": 36.68479517234999, "learning_rate": 4.861577023102301e-06, "loss": 0.31283111572265626, "step": 66365 }, { "epoch": 0.5738817649652835, "grad_norm": 2.1550138461151707, "learning_rate": 4.861417254083055e-06, "loss": 0.19157867431640624, "step": 66370 }, { "epoch": 0.5739249984868268, "grad_norm": 0.5944534170016158, "learning_rate": 4.861257476479115e-06, "loss": 0.26298828125, "step": 66375 }, { "epoch": 0.57396823200837, "grad_norm": 1.1663717938504894, "learning_rate": 4.861097690291222e-06, "loss": 0.08441429138183594, "step": 66380 }, { "epoch": 0.5740114655299132, "grad_norm": 5.395919358165921, "learning_rate": 4.86093789552011e-06, "loss": 0.05845489501953125, "step": 66385 }, { "epoch": 0.5740546990514566, "grad_norm": 14.169721675431898, "learning_rate": 4.860778092166516e-06, "loss": 0.15943679809570313, "step": 66390 }, { "epoch": 0.5740979325729998, "grad_norm": 0.5958668746583793, "learning_rate": 4.860618280231178e-06, "loss": 0.10190277099609375, "step": 66395 }, { "epoch": 0.574141166094543, "grad_norm": 38.2057406638617, "learning_rate": 4.8604584597148345e-06, "loss": 0.23291015625, "step": 66400 }, { "epoch": 0.5741843996160864, "grad_norm": 29.9117313196762, "learning_rate": 4.86029863061822e-06, "loss": 0.19216423034667968, "step": 66405 }, { "epoch": 0.5742276331376296, "grad_norm": 4.1074132663205, "learning_rate": 4.860138792942073e-06, "loss": 0.09814300537109374, "step": 66410 }, { "epoch": 0.5742708666591728, "grad_norm": 7.653803581637909, "learning_rate": 4.85997894668713e-06, "loss": 0.4917510986328125, "step": 66415 }, { "epoch": 0.5743141001807162, "grad_norm": 13.52977004209655, "learning_rate": 4.859819091854127e-06, "loss": 0.130279541015625, "step": 66420 }, { "epoch": 0.5743573337022594, "grad_norm": 1.5556027607242409, "learning_rate": 4.859659228443805e-06, "loss": 0.25293731689453125, "step": 66425 }, { "epoch": 0.5744005672238026, "grad_norm": 0.8752976058345289, "learning_rate": 4.8594993564568985e-06, "loss": 0.14078369140625, "step": 66430 }, { "epoch": 0.574443800745346, "grad_norm": 3.383285608956857, "learning_rate": 4.8593394758941444e-06, "loss": 0.09734420776367188, "step": 66435 }, { "epoch": 0.5744870342668892, "grad_norm": 1.104268732289973, "learning_rate": 4.859179586756281e-06, "loss": 0.16138916015625, "step": 66440 }, { "epoch": 0.5745302677884324, "grad_norm": 9.559029204108912, "learning_rate": 4.859019689044047e-06, "loss": 0.38560791015625, "step": 66445 }, { "epoch": 0.5745735013099758, "grad_norm": 26.247715437372943, "learning_rate": 4.858859782758177e-06, "loss": 0.11858062744140625, "step": 66450 }, { "epoch": 0.574616734831519, "grad_norm": 19.45134028639503, "learning_rate": 4.8586998678994094e-06, "loss": 0.136456298828125, "step": 66455 }, { "epoch": 0.5746599683530622, "grad_norm": 23.773454731380237, "learning_rate": 4.858539944468483e-06, "loss": 0.12910842895507812, "step": 66460 }, { "epoch": 0.5747032018746054, "grad_norm": 67.7795967279608, "learning_rate": 4.858380012466136e-06, "loss": 0.2047607421875, "step": 66465 }, { "epoch": 0.5747464353961488, "grad_norm": 0.15441810303728312, "learning_rate": 4.858220071893103e-06, "loss": 0.04077072143554687, "step": 66470 }, { "epoch": 0.574789668917692, "grad_norm": 16.30544310915518, "learning_rate": 4.858060122750123e-06, "loss": 0.08554229736328126, "step": 66475 }, { "epoch": 0.5748329024392352, "grad_norm": 3.057004894443389, "learning_rate": 4.8579001650379345e-06, "loss": 0.15498771667480468, "step": 66480 }, { "epoch": 0.5748761359607786, "grad_norm": 11.106569615189516, "learning_rate": 4.857740198757275e-06, "loss": 0.09385910034179687, "step": 66485 }, { "epoch": 0.5749193694823218, "grad_norm": 15.171101524993354, "learning_rate": 4.857580223908881e-06, "loss": 0.1446746826171875, "step": 66490 }, { "epoch": 0.574962603003865, "grad_norm": 1.7049291018522634, "learning_rate": 4.8574202404934916e-06, "loss": 0.024407958984375, "step": 66495 }, { "epoch": 0.5750058365254084, "grad_norm": 10.41508524492608, "learning_rate": 4.857260248511844e-06, "loss": 0.19467391967773437, "step": 66500 }, { "epoch": 0.5750490700469516, "grad_norm": 3.522403558668042, "learning_rate": 4.857100247964677e-06, "loss": 0.0760080337524414, "step": 66505 }, { "epoch": 0.5750923035684948, "grad_norm": 9.586502945749686, "learning_rate": 4.856940238852727e-06, "loss": 0.096063232421875, "step": 66510 }, { "epoch": 0.5751355370900382, "grad_norm": 16.89802120927867, "learning_rate": 4.8567802211767325e-06, "loss": 0.1983062744140625, "step": 66515 }, { "epoch": 0.5751787706115814, "grad_norm": 15.1472890295204, "learning_rate": 4.856620194937432e-06, "loss": 0.06826286315917969, "step": 66520 }, { "epoch": 0.5752220041331246, "grad_norm": 4.523254281067961, "learning_rate": 4.856460160135564e-06, "loss": 0.4527679443359375, "step": 66525 }, { "epoch": 0.575265237654668, "grad_norm": 64.4589247156301, "learning_rate": 4.856300116771864e-06, "loss": 0.4653942108154297, "step": 66530 }, { "epoch": 0.5753084711762112, "grad_norm": 5.69098080152511, "learning_rate": 4.856140064847073e-06, "loss": 0.020179939270019532, "step": 66535 }, { "epoch": 0.5753517046977544, "grad_norm": 3.0811131880030835, "learning_rate": 4.855980004361928e-06, "loss": 0.04701080322265625, "step": 66540 }, { "epoch": 0.5753949382192978, "grad_norm": 33.04077615076834, "learning_rate": 4.855819935317167e-06, "loss": 0.126019287109375, "step": 66545 }, { "epoch": 0.575438171740841, "grad_norm": 25.81473982672866, "learning_rate": 4.855659857713529e-06, "loss": 0.0937225341796875, "step": 66550 }, { "epoch": 0.5754814052623842, "grad_norm": 4.187460853929747, "learning_rate": 4.855499771551751e-06, "loss": 0.26153411865234377, "step": 66555 }, { "epoch": 0.5755246387839275, "grad_norm": 62.609924889264526, "learning_rate": 4.855339676832573e-06, "loss": 0.5876068115234375, "step": 66560 }, { "epoch": 0.5755678723054708, "grad_norm": 0.8138088868740971, "learning_rate": 4.855179573556731e-06, "loss": 0.36439857482910154, "step": 66565 }, { "epoch": 0.575611105827014, "grad_norm": 3.084345343141221, "learning_rate": 4.855019461724966e-06, "loss": 0.373883056640625, "step": 66570 }, { "epoch": 0.5756543393485573, "grad_norm": 7.671536767071792, "learning_rate": 4.854859341338014e-06, "loss": 0.25308380126953123, "step": 66575 }, { "epoch": 0.5756975728701006, "grad_norm": 2.814745896041502, "learning_rate": 4.854699212396615e-06, "loss": 0.09287872314453124, "step": 66580 }, { "epoch": 0.5757408063916438, "grad_norm": 0.6338703931152421, "learning_rate": 4.854539074901508e-06, "loss": 0.21243133544921874, "step": 66585 }, { "epoch": 0.575784039913187, "grad_norm": 9.194320037176833, "learning_rate": 4.854378928853429e-06, "loss": 0.1239898681640625, "step": 66590 }, { "epoch": 0.5758272734347304, "grad_norm": 6.798403841774779, "learning_rate": 4.85421877425312e-06, "loss": 0.12111892700195312, "step": 66595 }, { "epoch": 0.5758705069562736, "grad_norm": 4.4149733315515185, "learning_rate": 4.854058611101317e-06, "loss": 0.26103363037109373, "step": 66600 }, { "epoch": 0.5759137404778168, "grad_norm": 13.56314893141936, "learning_rate": 4.8538984393987596e-06, "loss": 0.12509841918945314, "step": 66605 }, { "epoch": 0.5759569739993602, "grad_norm": 68.36527188833462, "learning_rate": 4.853738259146187e-06, "loss": 0.3091888427734375, "step": 66610 }, { "epoch": 0.5760002075209034, "grad_norm": 0.404492759181855, "learning_rate": 4.853578070344336e-06, "loss": 0.1392120361328125, "step": 66615 }, { "epoch": 0.5760434410424466, "grad_norm": 0.047019204689688066, "learning_rate": 4.853417872993947e-06, "loss": 0.027142143249511717, "step": 66620 }, { "epoch": 0.57608667456399, "grad_norm": 6.041500825866051, "learning_rate": 4.853257667095759e-06, "loss": 0.16309967041015624, "step": 66625 }, { "epoch": 0.5761299080855332, "grad_norm": 33.56779872154489, "learning_rate": 4.85309745265051e-06, "loss": 0.13006439208984374, "step": 66630 }, { "epoch": 0.5761731416070764, "grad_norm": 0.6378246312161797, "learning_rate": 4.85293722965894e-06, "loss": 0.11570405960083008, "step": 66635 }, { "epoch": 0.5762163751286197, "grad_norm": 1.379811519322165, "learning_rate": 4.852776998121787e-06, "loss": 0.06495285034179688, "step": 66640 }, { "epoch": 0.576259608650163, "grad_norm": 49.26486618708413, "learning_rate": 4.85261675803979e-06, "loss": 0.3981952667236328, "step": 66645 }, { "epoch": 0.5763028421717062, "grad_norm": 4.089632473637785, "learning_rate": 4.852456509413689e-06, "loss": 0.0758270263671875, "step": 66650 }, { "epoch": 0.5763460756932495, "grad_norm": 6.91851264463163, "learning_rate": 4.852296252244221e-06, "loss": 0.07196884155273438, "step": 66655 }, { "epoch": 0.5763893092147928, "grad_norm": 1.8884805754638343, "learning_rate": 4.852135986532127e-06, "loss": 0.0769683837890625, "step": 66660 }, { "epoch": 0.576432542736336, "grad_norm": 6.555748993524218, "learning_rate": 4.851975712278145e-06, "loss": 0.150518798828125, "step": 66665 }, { "epoch": 0.5764757762578793, "grad_norm": 4.084005194201709, "learning_rate": 4.851815429483015e-06, "loss": 0.07235908508300781, "step": 66670 }, { "epoch": 0.5765190097794226, "grad_norm": 8.87640780578882, "learning_rate": 4.851655138147476e-06, "loss": 0.3318046569824219, "step": 66675 }, { "epoch": 0.5765622433009658, "grad_norm": 41.712240729904586, "learning_rate": 4.851494838272267e-06, "loss": 0.25406036376953123, "step": 66680 }, { "epoch": 0.5766054768225091, "grad_norm": 12.253798346669601, "learning_rate": 4.851334529858128e-06, "loss": 0.2076202392578125, "step": 66685 }, { "epoch": 0.5766487103440524, "grad_norm": 3.6676010704078914, "learning_rate": 4.851174212905797e-06, "loss": 0.019304656982421876, "step": 66690 }, { "epoch": 0.5766919438655956, "grad_norm": 1.8195349872463276, "learning_rate": 4.851013887416014e-06, "loss": 0.22443046569824218, "step": 66695 }, { "epoch": 0.5767351773871389, "grad_norm": 3.051466953663614, "learning_rate": 4.850853553389519e-06, "loss": 0.057642555236816405, "step": 66700 }, { "epoch": 0.5767784109086822, "grad_norm": 13.807596276486864, "learning_rate": 4.85069321082705e-06, "loss": 0.09557075500488281, "step": 66705 }, { "epoch": 0.5768216444302254, "grad_norm": 26.85707201337166, "learning_rate": 4.850532859729349e-06, "loss": 0.3605663299560547, "step": 66710 }, { "epoch": 0.5768648779517687, "grad_norm": 21.750337110471904, "learning_rate": 4.850372500097153e-06, "loss": 0.1145782470703125, "step": 66715 }, { "epoch": 0.576908111473312, "grad_norm": 2.4235283045238263, "learning_rate": 4.850212131931202e-06, "loss": 0.08171844482421875, "step": 66720 }, { "epoch": 0.5769513449948552, "grad_norm": 51.9302083141871, "learning_rate": 4.850051755232238e-06, "loss": 0.6972930908203125, "step": 66725 }, { "epoch": 0.5769945785163985, "grad_norm": 0.7999198242228254, "learning_rate": 4.8498913700009976e-06, "loss": 0.026352310180664064, "step": 66730 }, { "epoch": 0.5770378120379417, "grad_norm": 15.246945140834047, "learning_rate": 4.849730976238221e-06, "loss": 0.61893310546875, "step": 66735 }, { "epoch": 0.577081045559485, "grad_norm": 13.360738525811344, "learning_rate": 4.849570573944649e-06, "loss": 0.08382720947265625, "step": 66740 }, { "epoch": 0.5771242790810283, "grad_norm": 30.819418790091227, "learning_rate": 4.849410163121022e-06, "loss": 0.26650238037109375, "step": 66745 }, { "epoch": 0.5771675126025715, "grad_norm": 0.3613194623258989, "learning_rate": 4.849249743768078e-06, "loss": 0.08194160461425781, "step": 66750 }, { "epoch": 0.5772107461241148, "grad_norm": 21.614606176610703, "learning_rate": 4.849089315886557e-06, "loss": 0.2651634216308594, "step": 66755 }, { "epoch": 0.577253979645658, "grad_norm": 1.8361899986918047, "learning_rate": 4.8489288794772e-06, "loss": 0.10917396545410156, "step": 66760 }, { "epoch": 0.5772972131672013, "grad_norm": 41.551758582047476, "learning_rate": 4.848768434540746e-06, "loss": 0.7755441665649414, "step": 66765 }, { "epoch": 0.5773404466887446, "grad_norm": 2.1871463102765847, "learning_rate": 4.848607981077936e-06, "loss": 0.1332866668701172, "step": 66770 }, { "epoch": 0.5773836802102879, "grad_norm": 4.317579006255888, "learning_rate": 4.848447519089509e-06, "loss": 0.4091644287109375, "step": 66775 }, { "epoch": 0.5774269137318311, "grad_norm": 27.256711422126966, "learning_rate": 4.8482870485762054e-06, "loss": 0.08588829040527343, "step": 66780 }, { "epoch": 0.5774701472533744, "grad_norm": 18.00695793886408, "learning_rate": 4.848126569538766e-06, "loss": 0.24847564697265626, "step": 66785 }, { "epoch": 0.5775133807749177, "grad_norm": 18.900860357833576, "learning_rate": 4.8479660819779284e-06, "loss": 0.43438262939453126, "step": 66790 }, { "epoch": 0.5775566142964609, "grad_norm": 5.947909116398094, "learning_rate": 4.847805585894436e-06, "loss": 0.178607177734375, "step": 66795 }, { "epoch": 0.5775998478180042, "grad_norm": 9.17888556839383, "learning_rate": 4.847645081289027e-06, "loss": 0.11300773620605468, "step": 66800 }, { "epoch": 0.5776430813395474, "grad_norm": 20.40380294797158, "learning_rate": 4.847484568162443e-06, "loss": 0.1802978515625, "step": 66805 }, { "epoch": 0.5776863148610907, "grad_norm": 8.635927684857762, "learning_rate": 4.8473240465154224e-06, "loss": 0.205682373046875, "step": 66810 }, { "epoch": 0.5777295483826339, "grad_norm": 7.964839521503184, "learning_rate": 4.847163516348707e-06, "loss": 0.1458465576171875, "step": 66815 }, { "epoch": 0.5777727819041772, "grad_norm": 0.7764505746567536, "learning_rate": 4.847002977663035e-06, "loss": 0.1788177490234375, "step": 66820 }, { "epoch": 0.5778160154257205, "grad_norm": 12.145584824094808, "learning_rate": 4.846842430459151e-06, "loss": 0.24563751220703126, "step": 66825 }, { "epoch": 0.5778592489472637, "grad_norm": 1.751949297153846, "learning_rate": 4.846681874737792e-06, "loss": 0.16043930053710936, "step": 66830 }, { "epoch": 0.577902482468807, "grad_norm": 24.30511420274073, "learning_rate": 4.846521310499698e-06, "loss": 0.1560272216796875, "step": 66835 }, { "epoch": 0.5779457159903503, "grad_norm": 13.400646672830385, "learning_rate": 4.846360737745613e-06, "loss": 0.18732986450195313, "step": 66840 }, { "epoch": 0.5779889495118935, "grad_norm": 8.51976736211836, "learning_rate": 4.8462001564762735e-06, "loss": 0.049003219604492186, "step": 66845 }, { "epoch": 0.5780321830334368, "grad_norm": 4.615527248846457, "learning_rate": 4.846039566692423e-06, "loss": 0.4658496856689453, "step": 66850 }, { "epoch": 0.5780754165549801, "grad_norm": 2.1389291236508003, "learning_rate": 4.845878968394802e-06, "loss": 0.10320510864257812, "step": 66855 }, { "epoch": 0.5781186500765233, "grad_norm": 0.9352824466558246, "learning_rate": 4.8457183615841495e-06, "loss": 0.10180282592773438, "step": 66860 }, { "epoch": 0.5781618835980666, "grad_norm": 12.466995570322936, "learning_rate": 4.845557746261206e-06, "loss": 0.3115234375, "step": 66865 }, { "epoch": 0.5782051171196099, "grad_norm": 7.560826966830415, "learning_rate": 4.845397122426715e-06, "loss": 0.10204391479492188, "step": 66870 }, { "epoch": 0.5782483506411531, "grad_norm": 8.830894446772842, "learning_rate": 4.845236490081415e-06, "loss": 0.0567108154296875, "step": 66875 }, { "epoch": 0.5782915841626964, "grad_norm": 518.2368480034353, "learning_rate": 4.845075849226046e-06, "loss": 0.6011566162109375, "step": 66880 }, { "epoch": 0.5783348176842397, "grad_norm": 6.786363540092992, "learning_rate": 4.844915199861352e-06, "loss": 0.22809410095214844, "step": 66885 }, { "epoch": 0.5783780512057829, "grad_norm": 10.645287162606875, "learning_rate": 4.8447545419880715e-06, "loss": 0.08801727294921875, "step": 66890 }, { "epoch": 0.5784212847273262, "grad_norm": 3.4772303192360354, "learning_rate": 4.844593875606946e-06, "loss": 0.0212493896484375, "step": 66895 }, { "epoch": 0.5784645182488695, "grad_norm": 17.808132014659638, "learning_rate": 4.844433200718716e-06, "loss": 0.1953125, "step": 66900 }, { "epoch": 0.5785077517704127, "grad_norm": 0.03846833253661281, "learning_rate": 4.844272517324124e-06, "loss": 0.4445489883422852, "step": 66905 }, { "epoch": 0.5785509852919559, "grad_norm": 3.1650334454475173, "learning_rate": 4.8441118254239094e-06, "loss": 0.0227813720703125, "step": 66910 }, { "epoch": 0.5785942188134993, "grad_norm": 24.13346302240496, "learning_rate": 4.843951125018814e-06, "loss": 0.18605194091796876, "step": 66915 }, { "epoch": 0.5786374523350425, "grad_norm": 12.742940458219685, "learning_rate": 4.84379041610958e-06, "loss": 0.267034912109375, "step": 66920 }, { "epoch": 0.5786806858565857, "grad_norm": 16.375483118842766, "learning_rate": 4.843629698696947e-06, "loss": 0.26515045166015627, "step": 66925 }, { "epoch": 0.5787239193781291, "grad_norm": 2.170802565589702, "learning_rate": 4.843468972781655e-06, "loss": 0.10019378662109375, "step": 66930 }, { "epoch": 0.5787671528996723, "grad_norm": 22.666267236597452, "learning_rate": 4.84330823836445e-06, "loss": 0.250299072265625, "step": 66935 }, { "epoch": 0.5788103864212155, "grad_norm": 41.30869719735503, "learning_rate": 4.843147495446067e-06, "loss": 0.338055419921875, "step": 66940 }, { "epoch": 0.5788536199427589, "grad_norm": 34.20656342208562, "learning_rate": 4.842986744027253e-06, "loss": 0.07806243896484374, "step": 66945 }, { "epoch": 0.5788968534643021, "grad_norm": 5.704500231086579, "learning_rate": 4.842825984108747e-06, "loss": 0.036226654052734376, "step": 66950 }, { "epoch": 0.5789400869858453, "grad_norm": 1.680637273958884, "learning_rate": 4.84266521569129e-06, "loss": 0.1847991943359375, "step": 66955 }, { "epoch": 0.5789833205073887, "grad_norm": 12.836413045378853, "learning_rate": 4.842504438775622e-06, "loss": 0.03267974853515625, "step": 66960 }, { "epoch": 0.5790265540289319, "grad_norm": 2.5979658568977118, "learning_rate": 4.8423436533624895e-06, "loss": 0.145556640625, "step": 66965 }, { "epoch": 0.5790697875504751, "grad_norm": 9.937148909319594, "learning_rate": 4.842182859452629e-06, "loss": 0.06267013549804687, "step": 66970 }, { "epoch": 0.5791130210720185, "grad_norm": 7.839270236167119, "learning_rate": 4.842022057046784e-06, "loss": 0.10478515625, "step": 66975 }, { "epoch": 0.5791562545935617, "grad_norm": 3.3411481072482623, "learning_rate": 4.841861246145696e-06, "loss": 0.2006011962890625, "step": 66980 }, { "epoch": 0.5791994881151049, "grad_norm": 4.0834033014326065, "learning_rate": 4.841700426750107e-06, "loss": 0.299273681640625, "step": 66985 }, { "epoch": 0.5792427216366481, "grad_norm": 14.172772807277067, "learning_rate": 4.841539598860759e-06, "loss": 0.03190193176269531, "step": 66990 }, { "epoch": 0.5792859551581915, "grad_norm": 2.9998034559057936, "learning_rate": 4.841378762478392e-06, "loss": 0.13143157958984375, "step": 66995 }, { "epoch": 0.5793291886797347, "grad_norm": 1.873492530726049, "learning_rate": 4.841217917603748e-06, "loss": 0.09952392578125, "step": 67000 }, { "epoch": 0.5793724222012779, "grad_norm": 20.107824856130215, "learning_rate": 4.841057064237573e-06, "loss": 0.4010658264160156, "step": 67005 }, { "epoch": 0.5794156557228213, "grad_norm": 12.377455970579641, "learning_rate": 4.8408962023806025e-06, "loss": 0.16404647827148439, "step": 67010 }, { "epoch": 0.5794588892443645, "grad_norm": 7.537383651458083, "learning_rate": 4.840735332033582e-06, "loss": 0.0363037109375, "step": 67015 }, { "epoch": 0.5795021227659077, "grad_norm": 9.337777524553337, "learning_rate": 4.840574453197253e-06, "loss": 0.14627838134765625, "step": 67020 }, { "epoch": 0.5795453562874511, "grad_norm": 9.096139498670748, "learning_rate": 4.840413565872358e-06, "loss": 0.11754608154296875, "step": 67025 }, { "epoch": 0.5795885898089943, "grad_norm": 2.703643977986583, "learning_rate": 4.840252670059637e-06, "loss": 0.09030914306640625, "step": 67030 }, { "epoch": 0.5796318233305375, "grad_norm": 17.37716284701815, "learning_rate": 4.840091765759834e-06, "loss": 0.1751007080078125, "step": 67035 }, { "epoch": 0.5796750568520809, "grad_norm": 10.974583210828659, "learning_rate": 4.8399308529736895e-06, "loss": 0.4068267822265625, "step": 67040 }, { "epoch": 0.5797182903736241, "grad_norm": 5.1944978951539245, "learning_rate": 4.839769931701947e-06, "loss": 0.21645421981811525, "step": 67045 }, { "epoch": 0.5797615238951673, "grad_norm": 4.511387835624076, "learning_rate": 4.839609001945349e-06, "loss": 0.4465728759765625, "step": 67050 }, { "epoch": 0.5798047574167107, "grad_norm": 0.8168473594012197, "learning_rate": 4.839448063704637e-06, "loss": 0.051168632507324216, "step": 67055 }, { "epoch": 0.5798479909382539, "grad_norm": 0.2981830276228831, "learning_rate": 4.8392871169805514e-06, "loss": 0.33873672485351564, "step": 67060 }, { "epoch": 0.5798912244597971, "grad_norm": 2.942877694330209, "learning_rate": 4.839126161773838e-06, "loss": 0.10891571044921874, "step": 67065 }, { "epoch": 0.5799344579813404, "grad_norm": 32.579589561709106, "learning_rate": 4.838965198085235e-06, "loss": 0.24508056640625, "step": 67070 }, { "epoch": 0.5799776915028837, "grad_norm": 2.7286587288616473, "learning_rate": 4.838804225915488e-06, "loss": 0.5118896484375, "step": 67075 }, { "epoch": 0.5800209250244269, "grad_norm": 6.6719677934980295, "learning_rate": 4.83864324526534e-06, "loss": 0.17737236022949218, "step": 67080 }, { "epoch": 0.5800641585459702, "grad_norm": 34.1305868405533, "learning_rate": 4.83848225613553e-06, "loss": 0.18742904663085938, "step": 67085 }, { "epoch": 0.5801073920675135, "grad_norm": 15.145407159733349, "learning_rate": 4.8383212585268026e-06, "loss": 0.04588394165039063, "step": 67090 }, { "epoch": 0.5801506255890567, "grad_norm": 8.713509859493666, "learning_rate": 4.8381602524399006e-06, "loss": 0.2888206481933594, "step": 67095 }, { "epoch": 0.5801938591106, "grad_norm": 44.582155543313974, "learning_rate": 4.837999237875565e-06, "loss": 0.20941696166992188, "step": 67100 }, { "epoch": 0.5802370926321433, "grad_norm": 20.81269690536081, "learning_rate": 4.83783821483454e-06, "loss": 0.15538330078125, "step": 67105 }, { "epoch": 0.5802803261536865, "grad_norm": 1.2959654371393734, "learning_rate": 4.837677183317566e-06, "loss": 0.22408599853515626, "step": 67110 }, { "epoch": 0.5803235596752297, "grad_norm": 1.3716638450896605, "learning_rate": 4.83751614332539e-06, "loss": 0.05204010009765625, "step": 67115 }, { "epoch": 0.5803667931967731, "grad_norm": 23.11439632528397, "learning_rate": 4.83735509485875e-06, "loss": 0.26904096603393557, "step": 67120 }, { "epoch": 0.5804100267183163, "grad_norm": 12.933330331393933, "learning_rate": 4.8371940379183916e-06, "loss": 0.13435516357421876, "step": 67125 }, { "epoch": 0.5804532602398595, "grad_norm": 0.30259264326082, "learning_rate": 4.837032972505056e-06, "loss": 0.19851303100585938, "step": 67130 }, { "epoch": 0.5804964937614029, "grad_norm": 26.558659071964694, "learning_rate": 4.836871898619487e-06, "loss": 0.264019775390625, "step": 67135 }, { "epoch": 0.5805397272829461, "grad_norm": 0.38891938777018736, "learning_rate": 4.836710816262427e-06, "loss": 0.18189163208007814, "step": 67140 }, { "epoch": 0.5805829608044893, "grad_norm": 0.3912275921371205, "learning_rate": 4.836549725434619e-06, "loss": 0.11836929321289062, "step": 67145 }, { "epoch": 0.5806261943260327, "grad_norm": 3.466238839894084, "learning_rate": 4.836388626136805e-06, "loss": 0.08263397216796875, "step": 67150 }, { "epoch": 0.5806694278475759, "grad_norm": 4.004240193402089, "learning_rate": 4.83622751836973e-06, "loss": 0.12963790893554689, "step": 67155 }, { "epoch": 0.5807126613691191, "grad_norm": 1.0096114396270572, "learning_rate": 4.836066402134136e-06, "loss": 0.23604888916015626, "step": 67160 }, { "epoch": 0.5807558948906624, "grad_norm": 5.761158137679552, "learning_rate": 4.8359052774307655e-06, "loss": 0.06847152709960938, "step": 67165 }, { "epoch": 0.5807991284122057, "grad_norm": 9.347165123877739, "learning_rate": 4.835744144260363e-06, "loss": 0.1374603271484375, "step": 67170 }, { "epoch": 0.5808423619337489, "grad_norm": 1.1709529344421123, "learning_rate": 4.83558300262367e-06, "loss": 0.06464767456054688, "step": 67175 }, { "epoch": 0.5808855954552922, "grad_norm": 23.35844009025923, "learning_rate": 4.8354218525214305e-06, "loss": 0.16263427734375, "step": 67180 }, { "epoch": 0.5809288289768355, "grad_norm": 19.988562768256365, "learning_rate": 4.8352606939543865e-06, "loss": 0.509637451171875, "step": 67185 }, { "epoch": 0.5809720624983787, "grad_norm": 9.261986348306156, "learning_rate": 4.835099526923284e-06, "loss": 0.15886154174804687, "step": 67190 }, { "epoch": 0.581015296019922, "grad_norm": 0.43467978811812197, "learning_rate": 4.834938351428865e-06, "loss": 0.0436187744140625, "step": 67195 }, { "epoch": 0.5810585295414653, "grad_norm": 10.525876417830375, "learning_rate": 4.834777167471871e-06, "loss": 0.14957351684570314, "step": 67200 }, { "epoch": 0.5811017630630085, "grad_norm": 1.5805321555268796, "learning_rate": 4.834615975053047e-06, "loss": 0.05897445678710937, "step": 67205 }, { "epoch": 0.5811449965845518, "grad_norm": 19.562527199589553, "learning_rate": 4.834454774173137e-06, "loss": 0.1868988037109375, "step": 67210 }, { "epoch": 0.5811882301060951, "grad_norm": 1.3557782860492282, "learning_rate": 4.834293564832882e-06, "loss": 0.4784740447998047, "step": 67215 }, { "epoch": 0.5812314636276383, "grad_norm": 6.014053669948648, "learning_rate": 4.83413234703303e-06, "loss": 0.4773406982421875, "step": 67220 }, { "epoch": 0.5812746971491816, "grad_norm": 6.507990753068167, "learning_rate": 4.8339711207743195e-06, "loss": 0.34479827880859376, "step": 67225 }, { "epoch": 0.5813179306707249, "grad_norm": 21.969559123541472, "learning_rate": 4.833809886057496e-06, "loss": 0.27245254516601564, "step": 67230 }, { "epoch": 0.5813611641922681, "grad_norm": 4.107759420833343, "learning_rate": 4.833648642883304e-06, "loss": 0.1722137451171875, "step": 67235 }, { "epoch": 0.5814043977138114, "grad_norm": 34.20220738362551, "learning_rate": 4.833487391252487e-06, "loss": 0.1392822265625, "step": 67240 }, { "epoch": 0.5814476312353546, "grad_norm": 4.497278285826183, "learning_rate": 4.833326131165788e-06, "loss": 0.11944656372070313, "step": 67245 }, { "epoch": 0.5814908647568979, "grad_norm": 0.5380940495572135, "learning_rate": 4.83316486262395e-06, "loss": 0.03674774169921875, "step": 67250 }, { "epoch": 0.5815340982784412, "grad_norm": 16.772302941470965, "learning_rate": 4.833003585627718e-06, "loss": 0.14784641265869142, "step": 67255 }, { "epoch": 0.5815773317999844, "grad_norm": 0.3674949528373205, "learning_rate": 4.8328423001778365e-06, "loss": 0.1650665283203125, "step": 67260 }, { "epoch": 0.5816205653215277, "grad_norm": 0.4486583108079008, "learning_rate": 4.832681006275047e-06, "loss": 0.095440673828125, "step": 67265 }, { "epoch": 0.581663798843071, "grad_norm": 5.82736481698983, "learning_rate": 4.832519703920095e-06, "loss": 0.06958236694335937, "step": 67270 }, { "epoch": 0.5817070323646142, "grad_norm": 4.534477733308752, "learning_rate": 4.832358393113724e-06, "loss": 0.29764862060546876, "step": 67275 }, { "epoch": 0.5817502658861575, "grad_norm": 1.2149568770144707, "learning_rate": 4.832197073856679e-06, "loss": 0.035797119140625, "step": 67280 }, { "epoch": 0.5817934994077008, "grad_norm": 22.342581634631934, "learning_rate": 4.8320357461497016e-06, "loss": 0.3735504150390625, "step": 67285 }, { "epoch": 0.581836732929244, "grad_norm": 0.19267097909582526, "learning_rate": 4.831874409993538e-06, "loss": 0.20778388977050782, "step": 67290 }, { "epoch": 0.5818799664507873, "grad_norm": 2.2240340100565725, "learning_rate": 4.831713065388931e-06, "loss": 0.12318115234375, "step": 67295 }, { "epoch": 0.5819231999723306, "grad_norm": 1.1008669595867753, "learning_rate": 4.831551712336626e-06, "loss": 0.14797210693359375, "step": 67300 }, { "epoch": 0.5819664334938738, "grad_norm": 7.40068125555138, "learning_rate": 4.831390350837366e-06, "loss": 0.077130126953125, "step": 67305 }, { "epoch": 0.5820096670154171, "grad_norm": 0.9542271902421298, "learning_rate": 4.8312289808918945e-06, "loss": 0.0320068359375, "step": 67310 }, { "epoch": 0.5820529005369603, "grad_norm": 3.725948103018298, "learning_rate": 4.831067602500958e-06, "loss": 0.08316650390625, "step": 67315 }, { "epoch": 0.5820961340585036, "grad_norm": 0.1646289715553298, "learning_rate": 4.830906215665299e-06, "loss": 0.0712982177734375, "step": 67320 }, { "epoch": 0.5821393675800469, "grad_norm": 8.181654763701575, "learning_rate": 4.830744820385662e-06, "loss": 0.09814605712890626, "step": 67325 }, { "epoch": 0.5821826011015901, "grad_norm": 4.5181482441306535, "learning_rate": 4.830583416662792e-06, "loss": 0.6600929260253906, "step": 67330 }, { "epoch": 0.5822258346231334, "grad_norm": 1.8910229806261594, "learning_rate": 4.830422004497433e-06, "loss": 0.053918075561523435, "step": 67335 }, { "epoch": 0.5822690681446766, "grad_norm": 7.843730926819512, "learning_rate": 4.830260583890331e-06, "loss": 0.2451324462890625, "step": 67340 }, { "epoch": 0.58231230166622, "grad_norm": 0.09549510815546255, "learning_rate": 4.830099154842227e-06, "loss": 0.021083831787109375, "step": 67345 }, { "epoch": 0.5823555351877632, "grad_norm": 4.38376069618974, "learning_rate": 4.829937717353867e-06, "loss": 0.21013641357421875, "step": 67350 }, { "epoch": 0.5823987687093064, "grad_norm": 4.726832806454038, "learning_rate": 4.829776271425997e-06, "loss": 0.13478546142578124, "step": 67355 }, { "epoch": 0.5824420022308497, "grad_norm": 2.008906299948712, "learning_rate": 4.82961481705936e-06, "loss": 0.2545978546142578, "step": 67360 }, { "epoch": 0.582485235752393, "grad_norm": 4.017458664849088, "learning_rate": 4.829453354254702e-06, "loss": 0.07281036376953125, "step": 67365 }, { "epoch": 0.5825284692739362, "grad_norm": 0.23678158373196695, "learning_rate": 4.8292918830127664e-06, "loss": 0.11483917236328126, "step": 67370 }, { "epoch": 0.5825717027954795, "grad_norm": 4.314401243851493, "learning_rate": 4.829130403334298e-06, "loss": 0.10200424194335937, "step": 67375 }, { "epoch": 0.5826149363170228, "grad_norm": 1.3704411906660103, "learning_rate": 4.828968915220042e-06, "loss": 0.14821014404296876, "step": 67380 }, { "epoch": 0.582658169838566, "grad_norm": 24.609903991844355, "learning_rate": 4.828807418670743e-06, "loss": 0.09273033142089844, "step": 67385 }, { "epoch": 0.5827014033601093, "grad_norm": 33.834046883822076, "learning_rate": 4.828645913687145e-06, "loss": 0.25982208251953126, "step": 67390 }, { "epoch": 0.5827446368816526, "grad_norm": 18.233111365342918, "learning_rate": 4.828484400269995e-06, "loss": 0.14713554382324218, "step": 67395 }, { "epoch": 0.5827878704031958, "grad_norm": 0.804090104624286, "learning_rate": 4.828322878420035e-06, "loss": 0.024176692962646483, "step": 67400 }, { "epoch": 0.5828311039247391, "grad_norm": 1.7400195785384693, "learning_rate": 4.828161348138013e-06, "loss": 0.10040550231933594, "step": 67405 }, { "epoch": 0.5828743374462824, "grad_norm": 10.536649204643894, "learning_rate": 4.827999809424671e-06, "loss": 0.0961181640625, "step": 67410 }, { "epoch": 0.5829175709678256, "grad_norm": 0.4348451198477376, "learning_rate": 4.827838262280757e-06, "loss": 0.047174835205078126, "step": 67415 }, { "epoch": 0.5829608044893688, "grad_norm": 1.859413699217844, "learning_rate": 4.827676706707013e-06, "loss": 0.3824920654296875, "step": 67420 }, { "epoch": 0.5830040380109122, "grad_norm": 2.316204944403846, "learning_rate": 4.827515142704186e-06, "loss": 0.059304428100585935, "step": 67425 }, { "epoch": 0.5830472715324554, "grad_norm": 2.426139531793118, "learning_rate": 4.827353570273021e-06, "loss": 0.06457366943359374, "step": 67430 }, { "epoch": 0.5830905050539986, "grad_norm": 29.531799223934506, "learning_rate": 4.827191989414262e-06, "loss": 0.15647659301757813, "step": 67435 }, { "epoch": 0.583133738575542, "grad_norm": 16.246236602120913, "learning_rate": 4.827030400128656e-06, "loss": 0.14083046913146974, "step": 67440 }, { "epoch": 0.5831769720970852, "grad_norm": 1.6587085831544837, "learning_rate": 4.8268688024169465e-06, "loss": 0.09737777709960938, "step": 67445 }, { "epoch": 0.5832202056186284, "grad_norm": 2.621785278211735, "learning_rate": 4.826707196279879e-06, "loss": 0.2759521484375, "step": 67450 }, { "epoch": 0.5832634391401718, "grad_norm": 0.40219901915865097, "learning_rate": 4.8265455817182004e-06, "loss": 0.03768463134765625, "step": 67455 }, { "epoch": 0.583306672661715, "grad_norm": 4.732976921382258, "learning_rate": 4.826383958732655e-06, "loss": 0.1219390869140625, "step": 67460 }, { "epoch": 0.5833499061832582, "grad_norm": 11.83331965401863, "learning_rate": 4.826222327323988e-06, "loss": 0.23366851806640626, "step": 67465 }, { "epoch": 0.5833931397048016, "grad_norm": 23.897452652971076, "learning_rate": 4.826060687492945e-06, "loss": 0.32579765319824217, "step": 67470 }, { "epoch": 0.5834363732263448, "grad_norm": 11.289110098705278, "learning_rate": 4.8258990392402705e-06, "loss": 0.2185791015625, "step": 67475 }, { "epoch": 0.583479606747888, "grad_norm": 12.270961531431132, "learning_rate": 4.825737382566712e-06, "loss": 0.177081298828125, "step": 67480 }, { "epoch": 0.5835228402694314, "grad_norm": 310.9508211361292, "learning_rate": 4.825575717473014e-06, "loss": 0.245452880859375, "step": 67485 }, { "epoch": 0.5835660737909746, "grad_norm": 6.932559734037668, "learning_rate": 4.825414043959921e-06, "loss": 0.09331283569335938, "step": 67490 }, { "epoch": 0.5836093073125178, "grad_norm": 0.2492546445016949, "learning_rate": 4.825252362028181e-06, "loss": 0.3196277618408203, "step": 67495 }, { "epoch": 0.5836525408340612, "grad_norm": 5.208659100917217, "learning_rate": 4.825090671678538e-06, "loss": 0.3822929382324219, "step": 67500 }, { "epoch": 0.5836957743556044, "grad_norm": 3.2093022315813937, "learning_rate": 4.824928972911738e-06, "loss": 0.07453765869140624, "step": 67505 }, { "epoch": 0.5837390078771476, "grad_norm": 1.4435443330997264, "learning_rate": 4.824767265728527e-06, "loss": 0.1772380828857422, "step": 67510 }, { "epoch": 0.5837822413986908, "grad_norm": 8.964341185207875, "learning_rate": 4.824605550129651e-06, "loss": 0.0480010986328125, "step": 67515 }, { "epoch": 0.5838254749202342, "grad_norm": 17.568599625339207, "learning_rate": 4.824443826115854e-06, "loss": 0.14099807739257814, "step": 67520 }, { "epoch": 0.5838687084417774, "grad_norm": 17.10271174390467, "learning_rate": 4.824282093687884e-06, "loss": 0.2529388427734375, "step": 67525 }, { "epoch": 0.5839119419633206, "grad_norm": 1.9955735277071343, "learning_rate": 4.824120352846487e-06, "loss": 0.030099105834960938, "step": 67530 }, { "epoch": 0.583955175484864, "grad_norm": 0.3660854487528914, "learning_rate": 4.823958603592407e-06, "loss": 0.1618377685546875, "step": 67535 }, { "epoch": 0.5839984090064072, "grad_norm": 3.813079291849636, "learning_rate": 4.823796845926391e-06, "loss": 0.1060821533203125, "step": 67540 }, { "epoch": 0.5840416425279504, "grad_norm": 8.343060748521129, "learning_rate": 4.823635079849186e-06, "loss": 0.1858306884765625, "step": 67545 }, { "epoch": 0.5840848760494938, "grad_norm": 1.6919261046362786, "learning_rate": 4.823473305361537e-06, "loss": 0.13759918212890626, "step": 67550 }, { "epoch": 0.584128109571037, "grad_norm": 9.211721134147282, "learning_rate": 4.82331152246419e-06, "loss": 0.2184234619140625, "step": 67555 }, { "epoch": 0.5841713430925802, "grad_norm": 5.5154023418424245, "learning_rate": 4.823149731157892e-06, "loss": 0.204766845703125, "step": 67560 }, { "epoch": 0.5842145766141236, "grad_norm": 0.7052790223763659, "learning_rate": 4.822987931443387e-06, "loss": 0.03130645751953125, "step": 67565 }, { "epoch": 0.5842578101356668, "grad_norm": 20.96905349127665, "learning_rate": 4.822826123321424e-06, "loss": 0.14260025024414064, "step": 67570 }, { "epoch": 0.58430104365721, "grad_norm": 5.874125539594912, "learning_rate": 4.822664306792748e-06, "loss": 0.098974609375, "step": 67575 }, { "epoch": 0.5843442771787534, "grad_norm": 6.726451726197113, "learning_rate": 4.8225024818581045e-06, "loss": 0.098388671875, "step": 67580 }, { "epoch": 0.5843875107002966, "grad_norm": 21.280080077677102, "learning_rate": 4.8223406485182415e-06, "loss": 0.23487319946289062, "step": 67585 }, { "epoch": 0.5844307442218398, "grad_norm": 6.563195206813839, "learning_rate": 4.822178806773904e-06, "loss": 0.117755126953125, "step": 67590 }, { "epoch": 0.584473977743383, "grad_norm": 6.830458676172836, "learning_rate": 4.822016956625839e-06, "loss": 0.10036201477050781, "step": 67595 }, { "epoch": 0.5845172112649264, "grad_norm": 0.3932191336503873, "learning_rate": 4.821855098074793e-06, "loss": 0.3391632080078125, "step": 67600 }, { "epoch": 0.5845604447864696, "grad_norm": 16.58953513769415, "learning_rate": 4.821693231121512e-06, "loss": 0.118798828125, "step": 67605 }, { "epoch": 0.5846036783080129, "grad_norm": 0.36137688590092193, "learning_rate": 4.821531355766742e-06, "loss": 0.202398681640625, "step": 67610 }, { "epoch": 0.5846469118295562, "grad_norm": 19.58143063779705, "learning_rate": 4.82136947201123e-06, "loss": 0.14921531677246094, "step": 67615 }, { "epoch": 0.5846901453510994, "grad_norm": 12.992852921473535, "learning_rate": 4.821207579855725e-06, "loss": 0.23222122192382813, "step": 67620 }, { "epoch": 0.5847333788726427, "grad_norm": 0.9053781227066772, "learning_rate": 4.821045679300971e-06, "loss": 0.0148468017578125, "step": 67625 }, { "epoch": 0.584776612394186, "grad_norm": 1.5974407532416444, "learning_rate": 4.820883770347714e-06, "loss": 0.2137054443359375, "step": 67630 }, { "epoch": 0.5848198459157292, "grad_norm": 4.9611936154880665, "learning_rate": 4.820721852996703e-06, "loss": 0.178863525390625, "step": 67635 }, { "epoch": 0.5848630794372724, "grad_norm": 13.746946888425946, "learning_rate": 4.820559927248684e-06, "loss": 0.07693862915039062, "step": 67640 }, { "epoch": 0.5849063129588158, "grad_norm": 2.362749036184085, "learning_rate": 4.820397993104402e-06, "loss": 0.13045806884765626, "step": 67645 }, { "epoch": 0.584949546480359, "grad_norm": 2.8904155000696203, "learning_rate": 4.820236050564606e-06, "loss": 0.27232666015625, "step": 67650 }, { "epoch": 0.5849927800019022, "grad_norm": 22.55131141329689, "learning_rate": 4.820074099630043e-06, "loss": 0.07789535522460937, "step": 67655 }, { "epoch": 0.5850360135234456, "grad_norm": 6.758578139664903, "learning_rate": 4.819912140301458e-06, "loss": 0.187078857421875, "step": 67660 }, { "epoch": 0.5850792470449888, "grad_norm": 14.024061206856695, "learning_rate": 4.819750172579599e-06, "loss": 0.2666473388671875, "step": 67665 }, { "epoch": 0.585122480566532, "grad_norm": 18.615321180268893, "learning_rate": 4.819588196465214e-06, "loss": 0.3438560485839844, "step": 67670 }, { "epoch": 0.5851657140880754, "grad_norm": 3.394872946876965, "learning_rate": 4.819426211959048e-06, "loss": 0.2778228759765625, "step": 67675 }, { "epoch": 0.5852089476096186, "grad_norm": 0.8761667181694263, "learning_rate": 4.819264219061851e-06, "loss": 0.046117401123046874, "step": 67680 }, { "epoch": 0.5852521811311618, "grad_norm": 5.6623007658762425, "learning_rate": 4.819102217774366e-06, "loss": 0.08189468383789063, "step": 67685 }, { "epoch": 0.5852954146527051, "grad_norm": 0.7138511156404687, "learning_rate": 4.8189402080973435e-06, "loss": 0.102557373046875, "step": 67690 }, { "epoch": 0.5853386481742484, "grad_norm": 38.607847888116, "learning_rate": 4.8187781900315285e-06, "loss": 0.16613006591796875, "step": 67695 }, { "epoch": 0.5853818816957916, "grad_norm": 1.0496889255910953, "learning_rate": 4.818616163577669e-06, "loss": 0.23133087158203125, "step": 67700 }, { "epoch": 0.5854251152173349, "grad_norm": 8.283298942304254, "learning_rate": 4.818454128736514e-06, "loss": 0.21907634735107423, "step": 67705 }, { "epoch": 0.5854683487388782, "grad_norm": 17.065691316372227, "learning_rate": 4.8182920855088085e-06, "loss": 0.39522705078125, "step": 67710 }, { "epoch": 0.5855115822604214, "grad_norm": 11.828225167062882, "learning_rate": 4.818130033895301e-06, "loss": 0.111004638671875, "step": 67715 }, { "epoch": 0.5855548157819647, "grad_norm": 5.805682804304965, "learning_rate": 4.817967973896737e-06, "loss": 0.2502784729003906, "step": 67720 }, { "epoch": 0.585598049303508, "grad_norm": 10.89927409310192, "learning_rate": 4.8178059055138665e-06, "loss": 0.07846946716308593, "step": 67725 }, { "epoch": 0.5856412828250512, "grad_norm": 3.9267802480580634, "learning_rate": 4.817643828747435e-06, "loss": 0.1134307861328125, "step": 67730 }, { "epoch": 0.5856845163465945, "grad_norm": 0.33501558289434763, "learning_rate": 4.817481743598192e-06, "loss": 0.07590408325195312, "step": 67735 }, { "epoch": 0.5857277498681378, "grad_norm": 14.982295854861112, "learning_rate": 4.817319650066884e-06, "loss": 0.493292236328125, "step": 67740 }, { "epoch": 0.585770983389681, "grad_norm": 5.323532021258545, "learning_rate": 4.8171575481542565e-06, "loss": 0.13105316162109376, "step": 67745 }, { "epoch": 0.5858142169112243, "grad_norm": 0.9730543001042798, "learning_rate": 4.81699543786106e-06, "loss": 0.02119293212890625, "step": 67750 }, { "epoch": 0.5858574504327676, "grad_norm": 24.778801922474333, "learning_rate": 4.81683331918804e-06, "loss": 0.12314453125, "step": 67755 }, { "epoch": 0.5859006839543108, "grad_norm": 4.156432380007037, "learning_rate": 4.816671192135948e-06, "loss": 0.35335540771484375, "step": 67760 }, { "epoch": 0.5859439174758541, "grad_norm": 5.397973033380546, "learning_rate": 4.816509056705527e-06, "loss": 0.1805908203125, "step": 67765 }, { "epoch": 0.5859871509973973, "grad_norm": 4.29092122197055, "learning_rate": 4.816346912897527e-06, "loss": 0.1467498779296875, "step": 67770 }, { "epoch": 0.5860303845189406, "grad_norm": 22.273402827060632, "learning_rate": 4.816184760712695e-06, "loss": 0.143914794921875, "step": 67775 }, { "epoch": 0.5860736180404839, "grad_norm": 12.781187360743905, "learning_rate": 4.816022600151781e-06, "loss": 0.24139480590820311, "step": 67780 }, { "epoch": 0.5861168515620271, "grad_norm": 18.834619531431965, "learning_rate": 4.815860431215529e-06, "loss": 0.16426544189453124, "step": 67785 }, { "epoch": 0.5861600850835704, "grad_norm": 5.1514419866405685, "learning_rate": 4.81569825390469e-06, "loss": 0.04613571166992188, "step": 67790 }, { "epoch": 0.5862033186051137, "grad_norm": 0.6380443191960371, "learning_rate": 4.8155360682200124e-06, "loss": 0.11127738952636719, "step": 67795 }, { "epoch": 0.5862465521266569, "grad_norm": 5.0228694705553085, "learning_rate": 4.815373874162241e-06, "loss": 0.1069183349609375, "step": 67800 }, { "epoch": 0.5862897856482002, "grad_norm": 0.6189521061446133, "learning_rate": 4.815211671732127e-06, "loss": 0.0693115234375, "step": 67805 }, { "epoch": 0.5863330191697435, "grad_norm": 13.984851005582874, "learning_rate": 4.8150494609304165e-06, "loss": 0.2300943374633789, "step": 67810 }, { "epoch": 0.5863762526912867, "grad_norm": 5.433530528194445, "learning_rate": 4.814887241757859e-06, "loss": 0.24496726989746093, "step": 67815 }, { "epoch": 0.58641948621283, "grad_norm": 0.2498720175135624, "learning_rate": 4.8147250142152016e-06, "loss": 0.17049407958984375, "step": 67820 }, { "epoch": 0.5864627197343733, "grad_norm": 22.189570918602026, "learning_rate": 4.814562778303192e-06, "loss": 0.14916229248046875, "step": 67825 }, { "epoch": 0.5865059532559165, "grad_norm": 22.094119388396898, "learning_rate": 4.81440053402258e-06, "loss": 0.192120361328125, "step": 67830 }, { "epoch": 0.5865491867774598, "grad_norm": 12.380128440172507, "learning_rate": 4.814238281374112e-06, "loss": 0.18213882446289062, "step": 67835 }, { "epoch": 0.586592420299003, "grad_norm": 2.6251686830123675, "learning_rate": 4.814076020358539e-06, "loss": 0.098175048828125, "step": 67840 }, { "epoch": 0.5866356538205463, "grad_norm": 8.37798961042405, "learning_rate": 4.813913750976607e-06, "loss": 0.134844970703125, "step": 67845 }, { "epoch": 0.5866788873420896, "grad_norm": 2.6877132045333907, "learning_rate": 4.813751473229065e-06, "loss": 0.066192626953125, "step": 67850 }, { "epoch": 0.5867221208636328, "grad_norm": 30.045218461798193, "learning_rate": 4.813589187116661e-06, "loss": 0.22899980545043946, "step": 67855 }, { "epoch": 0.5867653543851761, "grad_norm": 7.79685418883952, "learning_rate": 4.813426892640144e-06, "loss": 0.091912841796875, "step": 67860 }, { "epoch": 0.5868085879067193, "grad_norm": 4.0284117150939505, "learning_rate": 4.8132645898002635e-06, "loss": 0.037646484375, "step": 67865 }, { "epoch": 0.5868518214282626, "grad_norm": 23.682966619488568, "learning_rate": 4.813102278597766e-06, "loss": 0.268914794921875, "step": 67870 }, { "epoch": 0.5868950549498059, "grad_norm": 2.632782729772258, "learning_rate": 4.812939959033401e-06, "loss": 0.05864410400390625, "step": 67875 }, { "epoch": 0.5869382884713491, "grad_norm": 24.884545675772443, "learning_rate": 4.812777631107918e-06, "loss": 0.36267967224121095, "step": 67880 }, { "epoch": 0.5869815219928924, "grad_norm": 45.39947495581798, "learning_rate": 4.812615294822064e-06, "loss": 0.2910736083984375, "step": 67885 }, { "epoch": 0.5870247555144357, "grad_norm": 12.118281061489474, "learning_rate": 4.812452950176588e-06, "loss": 0.23906631469726564, "step": 67890 }, { "epoch": 0.5870679890359789, "grad_norm": 10.689828869010297, "learning_rate": 4.81229059717224e-06, "loss": 0.14176864624023439, "step": 67895 }, { "epoch": 0.5871112225575222, "grad_norm": 4.592658144470107, "learning_rate": 4.812128235809767e-06, "loss": 0.08875732421875, "step": 67900 }, { "epoch": 0.5871544560790655, "grad_norm": 1.2998399947124295, "learning_rate": 4.81196586608992e-06, "loss": 0.07296600341796874, "step": 67905 }, { "epoch": 0.5871976896006087, "grad_norm": 18.646022921293476, "learning_rate": 4.811803488013445e-06, "loss": 0.20956573486328126, "step": 67910 }, { "epoch": 0.587240923122152, "grad_norm": 9.228390036508378, "learning_rate": 4.811641101581092e-06, "loss": 0.2390594482421875, "step": 67915 }, { "epoch": 0.5872841566436953, "grad_norm": 0.27852065314438207, "learning_rate": 4.811478706793612e-06, "loss": 0.13490219116210939, "step": 67920 }, { "epoch": 0.5873273901652385, "grad_norm": 16.430907083221033, "learning_rate": 4.811316303651752e-06, "loss": 0.1670074462890625, "step": 67925 }, { "epoch": 0.5873706236867818, "grad_norm": 1.5183089759607915, "learning_rate": 4.811153892156261e-06, "loss": 0.212591552734375, "step": 67930 }, { "epoch": 0.5874138572083251, "grad_norm": 39.689752992355515, "learning_rate": 4.810991472307889e-06, "loss": 0.3081047058105469, "step": 67935 }, { "epoch": 0.5874570907298683, "grad_norm": 1.4984301319047284, "learning_rate": 4.810829044107384e-06, "loss": 0.2068634033203125, "step": 67940 }, { "epoch": 0.5875003242514115, "grad_norm": 2.934751884830707, "learning_rate": 4.810666607555494e-06, "loss": 0.19159488677978515, "step": 67945 }, { "epoch": 0.5875435577729549, "grad_norm": 0.07749714766336921, "learning_rate": 4.8105041626529705e-06, "loss": 0.37404327392578124, "step": 67950 }, { "epoch": 0.5875867912944981, "grad_norm": 0.7187315214614787, "learning_rate": 4.810341709400563e-06, "loss": 0.06913681030273437, "step": 67955 }, { "epoch": 0.5876300248160413, "grad_norm": 6.909624960929049, "learning_rate": 4.810179247799018e-06, "loss": 0.3871002197265625, "step": 67960 }, { "epoch": 0.5876732583375847, "grad_norm": 1.7225542707835242, "learning_rate": 4.810016777849087e-06, "loss": 0.3042930603027344, "step": 67965 }, { "epoch": 0.5877164918591279, "grad_norm": 14.042663488603734, "learning_rate": 4.809854299551517e-06, "loss": 0.246136474609375, "step": 67970 }, { "epoch": 0.5877597253806711, "grad_norm": 114.59464945412915, "learning_rate": 4.8096918129070616e-06, "loss": 0.16564855575561524, "step": 67975 }, { "epoch": 0.5878029589022145, "grad_norm": 4.492799958871972, "learning_rate": 4.809529317916465e-06, "loss": 0.07642822265625, "step": 67980 }, { "epoch": 0.5878461924237577, "grad_norm": 2.8227080215082725, "learning_rate": 4.80936681458048e-06, "loss": 0.08167724609375, "step": 67985 }, { "epoch": 0.5878894259453009, "grad_norm": 9.749948452697383, "learning_rate": 4.809204302899856e-06, "loss": 0.08627777099609375, "step": 67990 }, { "epoch": 0.5879326594668443, "grad_norm": 14.550139113477137, "learning_rate": 4.80904178287534e-06, "loss": 0.18618621826171874, "step": 67995 }, { "epoch": 0.5879758929883875, "grad_norm": 4.903666787095757, "learning_rate": 4.808879254507685e-06, "loss": 0.13495712280273436, "step": 68000 }, { "epoch": 0.5880191265099307, "grad_norm": 1.6304511828837962, "learning_rate": 4.808716717797638e-06, "loss": 0.0943267822265625, "step": 68005 }, { "epoch": 0.588062360031474, "grad_norm": 1.6036539520339743, "learning_rate": 4.808554172745948e-06, "loss": 0.145806884765625, "step": 68010 }, { "epoch": 0.5881055935530173, "grad_norm": 1.0178122346028011, "learning_rate": 4.8083916193533675e-06, "loss": 0.10030136108398438, "step": 68015 }, { "epoch": 0.5881488270745605, "grad_norm": 0.2529583241438766, "learning_rate": 4.8082290576206446e-06, "loss": 0.40447540283203126, "step": 68020 }, { "epoch": 0.5881920605961038, "grad_norm": 11.074210313979581, "learning_rate": 4.808066487548527e-06, "loss": 0.08814697265625, "step": 68025 }, { "epoch": 0.5882352941176471, "grad_norm": 17.404531542443163, "learning_rate": 4.80790390913777e-06, "loss": 0.321923828125, "step": 68030 }, { "epoch": 0.5882785276391903, "grad_norm": 2.3814122969849563, "learning_rate": 4.807741322389117e-06, "loss": 0.11618499755859375, "step": 68035 }, { "epoch": 0.5883217611607335, "grad_norm": 6.117184379697648, "learning_rate": 4.8075787273033224e-06, "loss": 0.2347259521484375, "step": 68040 }, { "epoch": 0.5883649946822769, "grad_norm": 1.3331743823375146, "learning_rate": 4.807416123881134e-06, "loss": 0.024512481689453126, "step": 68045 }, { "epoch": 0.5884082282038201, "grad_norm": 21.52140642732303, "learning_rate": 4.807253512123301e-06, "loss": 0.17046661376953126, "step": 68050 }, { "epoch": 0.5884514617253633, "grad_norm": 1.0503689125426305, "learning_rate": 4.807090892030576e-06, "loss": 0.16347312927246094, "step": 68055 }, { "epoch": 0.5884946952469067, "grad_norm": 28.981723347525246, "learning_rate": 4.806928263603707e-06, "loss": 0.46384353637695314, "step": 68060 }, { "epoch": 0.5885379287684499, "grad_norm": 7.021881106472831, "learning_rate": 4.806765626843445e-06, "loss": 0.2713733673095703, "step": 68065 }, { "epoch": 0.5885811622899931, "grad_norm": 9.639990843120376, "learning_rate": 4.8066029817505385e-06, "loss": 0.14325275421142578, "step": 68070 }, { "epoch": 0.5886243958115365, "grad_norm": 27.429916147526942, "learning_rate": 4.806440328325739e-06, "loss": 0.17618789672851562, "step": 68075 }, { "epoch": 0.5886676293330797, "grad_norm": 6.321370260739955, "learning_rate": 4.806277666569797e-06, "loss": 0.33507080078125, "step": 68080 }, { "epoch": 0.5887108628546229, "grad_norm": 16.20606293229208, "learning_rate": 4.806114996483461e-06, "loss": 0.18614883422851564, "step": 68085 }, { "epoch": 0.5887540963761663, "grad_norm": 27.764901620630667, "learning_rate": 4.8059523180674825e-06, "loss": 0.25721435546875, "step": 68090 }, { "epoch": 0.5887973298977095, "grad_norm": 7.792039233031692, "learning_rate": 4.805789631322613e-06, "loss": 0.13586807250976562, "step": 68095 }, { "epoch": 0.5888405634192527, "grad_norm": 2.0686106381937535, "learning_rate": 4.8056269362496e-06, "loss": 0.1836700439453125, "step": 68100 }, { "epoch": 0.5888837969407961, "grad_norm": 1.8750957765605685, "learning_rate": 4.805464232849194e-06, "loss": 0.20331497192382814, "step": 68105 }, { "epoch": 0.5889270304623393, "grad_norm": 0.3096501874184894, "learning_rate": 4.805301521122148e-06, "loss": 0.0607666015625, "step": 68110 }, { "epoch": 0.5889702639838825, "grad_norm": 14.235865765659055, "learning_rate": 4.8051388010692115e-06, "loss": 0.28296051025390623, "step": 68115 }, { "epoch": 0.5890134975054258, "grad_norm": 31.34836340664625, "learning_rate": 4.804976072691133e-06, "loss": 0.30401611328125, "step": 68120 }, { "epoch": 0.5890567310269691, "grad_norm": 2.7158322449760304, "learning_rate": 4.804813335988664e-06, "loss": 0.08563461303710937, "step": 68125 }, { "epoch": 0.5890999645485123, "grad_norm": 5.656092450032354, "learning_rate": 4.804650590962557e-06, "loss": 0.6698211669921875, "step": 68130 }, { "epoch": 0.5891431980700556, "grad_norm": 4.1935443990546455, "learning_rate": 4.80448783761356e-06, "loss": 0.0879852294921875, "step": 68135 }, { "epoch": 0.5891864315915989, "grad_norm": 0.44070485214386784, "learning_rate": 4.8043250759424244e-06, "loss": 0.0638458251953125, "step": 68140 }, { "epoch": 0.5892296651131421, "grad_norm": 4.384549887951632, "learning_rate": 4.804162305949901e-06, "loss": 0.04441680908203125, "step": 68145 }, { "epoch": 0.5892728986346853, "grad_norm": 0.5748495600881571, "learning_rate": 4.803999527636741e-06, "loss": 0.03128662109375, "step": 68150 }, { "epoch": 0.5893161321562287, "grad_norm": 22.753585121340024, "learning_rate": 4.803836741003694e-06, "loss": 0.25360565185546874, "step": 68155 }, { "epoch": 0.5893593656777719, "grad_norm": 26.856890230549084, "learning_rate": 4.803673946051511e-06, "loss": 0.1880615234375, "step": 68160 }, { "epoch": 0.5894025991993151, "grad_norm": 1.5728206125300919, "learning_rate": 4.803511142780945e-06, "loss": 0.14239349365234374, "step": 68165 }, { "epoch": 0.5894458327208585, "grad_norm": 1.061003814568714, "learning_rate": 4.8033483311927435e-06, "loss": 0.2909111022949219, "step": 68170 }, { "epoch": 0.5894890662424017, "grad_norm": 12.376774079604758, "learning_rate": 4.803185511287659e-06, "loss": 0.15223007202148436, "step": 68175 }, { "epoch": 0.589532299763945, "grad_norm": 7.536600842647821, "learning_rate": 4.803022683066442e-06, "loss": 0.1856903076171875, "step": 68180 }, { "epoch": 0.5895755332854883, "grad_norm": 2.199486592239652, "learning_rate": 4.802859846529844e-06, "loss": 0.13545570373535157, "step": 68185 }, { "epoch": 0.5896187668070315, "grad_norm": 16.76827952620235, "learning_rate": 4.802697001678616e-06, "loss": 0.12938461303710938, "step": 68190 }, { "epoch": 0.5896620003285747, "grad_norm": 0.948780652208219, "learning_rate": 4.802534148513509e-06, "loss": 0.041163063049316405, "step": 68195 }, { "epoch": 0.5897052338501181, "grad_norm": 6.00558856513652, "learning_rate": 4.802371287035272e-06, "loss": 0.113226318359375, "step": 68200 }, { "epoch": 0.5897484673716613, "grad_norm": 22.836837537925554, "learning_rate": 4.802208417244659e-06, "loss": 0.19197998046875, "step": 68205 }, { "epoch": 0.5897917008932045, "grad_norm": 6.266021465376554, "learning_rate": 4.80204553914242e-06, "loss": 0.24044342041015626, "step": 68210 }, { "epoch": 0.5898349344147478, "grad_norm": 0.8740430794077052, "learning_rate": 4.801882652729307e-06, "loss": 0.20237579345703124, "step": 68215 }, { "epoch": 0.5898781679362911, "grad_norm": 21.40322075967707, "learning_rate": 4.80171975800607e-06, "loss": 0.16810073852539062, "step": 68220 }, { "epoch": 0.5899214014578343, "grad_norm": 13.013573441619233, "learning_rate": 4.80155685497346e-06, "loss": 0.167755126953125, "step": 68225 }, { "epoch": 0.5899646349793776, "grad_norm": 14.040012512248044, "learning_rate": 4.801393943632229e-06, "loss": 0.138641357421875, "step": 68230 }, { "epoch": 0.5900078685009209, "grad_norm": 8.051383625728434, "learning_rate": 4.8012310239831286e-06, "loss": 0.15302734375, "step": 68235 }, { "epoch": 0.5900511020224641, "grad_norm": 2.5822272567625486, "learning_rate": 4.80106809602691e-06, "loss": 0.09203338623046875, "step": 68240 }, { "epoch": 0.5900943355440074, "grad_norm": 2.744005912429879, "learning_rate": 4.800905159764325e-06, "loss": 0.102239990234375, "step": 68245 }, { "epoch": 0.5901375690655507, "grad_norm": 31.334136027109555, "learning_rate": 4.800742215196124e-06, "loss": 0.37510833740234373, "step": 68250 }, { "epoch": 0.5901808025870939, "grad_norm": 0.9522061062356504, "learning_rate": 4.800579262323058e-06, "loss": 0.11056098937988282, "step": 68255 }, { "epoch": 0.5902240361086372, "grad_norm": 0.32777777825413223, "learning_rate": 4.8004163011458815e-06, "loss": 0.1893768310546875, "step": 68260 }, { "epoch": 0.5902672696301805, "grad_norm": 2.5880756328007064, "learning_rate": 4.800253331665343e-06, "loss": 0.09258651733398438, "step": 68265 }, { "epoch": 0.5903105031517237, "grad_norm": 41.456414527448466, "learning_rate": 4.800090353882195e-06, "loss": 0.4226951599121094, "step": 68270 }, { "epoch": 0.590353736673267, "grad_norm": 4.984295149982733, "learning_rate": 4.7999273677971896e-06, "loss": 0.22054443359375, "step": 68275 }, { "epoch": 0.5903969701948103, "grad_norm": 3.459146407200667, "learning_rate": 4.799764373411079e-06, "loss": 0.13350753784179686, "step": 68280 }, { "epoch": 0.5904402037163535, "grad_norm": 1.4902795349587517, "learning_rate": 4.799601370724613e-06, "loss": 0.1248016357421875, "step": 68285 }, { "epoch": 0.5904834372378968, "grad_norm": 4.015600981118535, "learning_rate": 4.799438359738545e-06, "loss": 0.33665771484375, "step": 68290 }, { "epoch": 0.59052667075944, "grad_norm": 11.399834984589589, "learning_rate": 4.799275340453627e-06, "loss": 0.0700469970703125, "step": 68295 }, { "epoch": 0.5905699042809833, "grad_norm": 2.868920842988121, "learning_rate": 4.7991123128706085e-06, "loss": 0.026123046875, "step": 68300 }, { "epoch": 0.5906131378025266, "grad_norm": 1.5863276906584758, "learning_rate": 4.798949276990244e-06, "loss": 0.4328746795654297, "step": 68305 }, { "epoch": 0.5906563713240698, "grad_norm": 1.2563286969491498, "learning_rate": 4.798786232813284e-06, "loss": 0.050365447998046875, "step": 68310 }, { "epoch": 0.5906996048456131, "grad_norm": 11.241197619865037, "learning_rate": 4.798623180340481e-06, "loss": 0.10243511199951172, "step": 68315 }, { "epoch": 0.5907428383671564, "grad_norm": 18.08025308298033, "learning_rate": 4.798460119572587e-06, "loss": 0.15225906372070314, "step": 68320 }, { "epoch": 0.5907860718886996, "grad_norm": 50.55883977135284, "learning_rate": 4.798297050510354e-06, "loss": 0.5772140502929688, "step": 68325 }, { "epoch": 0.5908293054102429, "grad_norm": 21.67709907266195, "learning_rate": 4.798133973154534e-06, "loss": 0.16602020263671874, "step": 68330 }, { "epoch": 0.5908725389317862, "grad_norm": 3.218655459212324, "learning_rate": 4.797970887505878e-06, "loss": 0.16515960693359374, "step": 68335 }, { "epoch": 0.5909157724533294, "grad_norm": 5.409036428142189, "learning_rate": 4.79780779356514e-06, "loss": 0.049650955200195315, "step": 68340 }, { "epoch": 0.5909590059748727, "grad_norm": 1.7382637867508481, "learning_rate": 4.797644691333071e-06, "loss": 0.08615570068359375, "step": 68345 }, { "epoch": 0.591002239496416, "grad_norm": 2.225357185312172, "learning_rate": 4.797481580810423e-06, "loss": 0.15895767211914064, "step": 68350 }, { "epoch": 0.5910454730179592, "grad_norm": 2.9506780818719927, "learning_rate": 4.797318461997949e-06, "loss": 0.0154052734375, "step": 68355 }, { "epoch": 0.5910887065395025, "grad_norm": 1.1581709668258378, "learning_rate": 4.797155334896403e-06, "loss": 0.08292236328125, "step": 68360 }, { "epoch": 0.5911319400610457, "grad_norm": 4.331353184397354, "learning_rate": 4.796992199506533e-06, "loss": 0.031420135498046876, "step": 68365 }, { "epoch": 0.591175173582589, "grad_norm": 5.622604352768235, "learning_rate": 4.796829055829095e-06, "loss": 0.31113662719726565, "step": 68370 }, { "epoch": 0.5912184071041323, "grad_norm": 25.921972090743527, "learning_rate": 4.796665903864839e-06, "loss": 0.2008697509765625, "step": 68375 }, { "epoch": 0.5912616406256755, "grad_norm": 5.5153309841604825, "learning_rate": 4.79650274361452e-06, "loss": 0.126434326171875, "step": 68380 }, { "epoch": 0.5913048741472188, "grad_norm": 26.93338296673136, "learning_rate": 4.796339575078889e-06, "loss": 0.19478759765625, "step": 68385 }, { "epoch": 0.591348107668762, "grad_norm": 1.4464768553354135, "learning_rate": 4.7961763982586975e-06, "loss": 0.251666259765625, "step": 68390 }, { "epoch": 0.5913913411903053, "grad_norm": 9.180116008720113, "learning_rate": 4.7960132131546994e-06, "loss": 0.23986053466796875, "step": 68395 }, { "epoch": 0.5914345747118486, "grad_norm": 0.20919831612343442, "learning_rate": 4.7958500197676475e-06, "loss": 0.12333984375, "step": 68400 }, { "epoch": 0.5914778082333918, "grad_norm": 4.343675147585023, "learning_rate": 4.795686818098294e-06, "loss": 0.249517822265625, "step": 68405 }, { "epoch": 0.5915210417549351, "grad_norm": 9.362019806727421, "learning_rate": 4.7955236081473915e-06, "loss": 0.0634765625, "step": 68410 }, { "epoch": 0.5915642752764784, "grad_norm": 3.5946112501682967, "learning_rate": 4.795360389915693e-06, "loss": 0.23197021484375, "step": 68415 }, { "epoch": 0.5916075087980216, "grad_norm": 5.1347861581630765, "learning_rate": 4.795197163403951e-06, "loss": 0.127423095703125, "step": 68420 }, { "epoch": 0.5916507423195649, "grad_norm": 1.3075449137119861, "learning_rate": 4.795033928612917e-06, "loss": 0.012544631958007812, "step": 68425 }, { "epoch": 0.5916939758411082, "grad_norm": 44.09397823381046, "learning_rate": 4.794870685543347e-06, "loss": 0.2494110107421875, "step": 68430 }, { "epoch": 0.5917372093626514, "grad_norm": 1.0839235258727828, "learning_rate": 4.794707434195991e-06, "loss": 0.05968017578125, "step": 68435 }, { "epoch": 0.5917804428841947, "grad_norm": 17.07391807426481, "learning_rate": 4.794544174571602e-06, "loss": 0.14072608947753906, "step": 68440 }, { "epoch": 0.591823676405738, "grad_norm": 8.281194688721483, "learning_rate": 4.794380906670936e-06, "loss": 0.3402740478515625, "step": 68445 }, { "epoch": 0.5918669099272812, "grad_norm": 1.1419425589184915, "learning_rate": 4.794217630494742e-06, "loss": 0.08699760437011719, "step": 68450 }, { "epoch": 0.5919101434488245, "grad_norm": 25.74817388322394, "learning_rate": 4.794054346043775e-06, "loss": 0.4000823974609375, "step": 68455 }, { "epoch": 0.5919533769703678, "grad_norm": 0.40956848515400307, "learning_rate": 4.79389105331879e-06, "loss": 0.093743896484375, "step": 68460 }, { "epoch": 0.591996610491911, "grad_norm": 112.35421732459851, "learning_rate": 4.793727752320536e-06, "loss": 0.14258956909179688, "step": 68465 }, { "epoch": 0.5920398440134542, "grad_norm": 2.604955698759303, "learning_rate": 4.793564443049769e-06, "loss": 0.299755859375, "step": 68470 }, { "epoch": 0.5920830775349976, "grad_norm": 0.45229994430482584, "learning_rate": 4.793401125507241e-06, "loss": 0.43766326904296876, "step": 68475 }, { "epoch": 0.5921263110565408, "grad_norm": 0.09718299695726104, "learning_rate": 4.7932377996937044e-06, "loss": 0.0638702392578125, "step": 68480 }, { "epoch": 0.592169544578084, "grad_norm": 2.778088731450862, "learning_rate": 4.7930744656099145e-06, "loss": 0.11284637451171875, "step": 68485 }, { "epoch": 0.5922127780996274, "grad_norm": 27.74313677532437, "learning_rate": 4.792911123256624e-06, "loss": 0.08092041015625, "step": 68490 }, { "epoch": 0.5922560116211706, "grad_norm": 2.5864754012132094, "learning_rate": 4.792747772634584e-06, "loss": 0.1013092041015625, "step": 68495 }, { "epoch": 0.5922992451427138, "grad_norm": 3.1521598103248394, "learning_rate": 4.792584413744552e-06, "loss": 0.0778106689453125, "step": 68500 }, { "epoch": 0.5923424786642572, "grad_norm": 2.7323977191575524, "learning_rate": 4.792421046587278e-06, "loss": 0.061138916015625, "step": 68505 }, { "epoch": 0.5923857121858004, "grad_norm": 8.150896489461578, "learning_rate": 4.7922576711635165e-06, "loss": 0.060345458984375, "step": 68510 }, { "epoch": 0.5924289457073436, "grad_norm": 3.7660794003095694, "learning_rate": 4.792094287474022e-06, "loss": 0.15399761199951173, "step": 68515 }, { "epoch": 0.592472179228887, "grad_norm": 0.3136459253025327, "learning_rate": 4.791930895519544e-06, "loss": 0.02061004638671875, "step": 68520 }, { "epoch": 0.5925154127504302, "grad_norm": 13.583958733730444, "learning_rate": 4.791767495300842e-06, "loss": 0.11844711303710938, "step": 68525 }, { "epoch": 0.5925586462719734, "grad_norm": 2.8088758215049068, "learning_rate": 4.7916040868186664e-06, "loss": 0.340625, "step": 68530 }, { "epoch": 0.5926018797935168, "grad_norm": 22.96458576103803, "learning_rate": 4.79144067007377e-06, "loss": 0.2667797088623047, "step": 68535 }, { "epoch": 0.59264511331506, "grad_norm": 3.7723371547323494, "learning_rate": 4.7912772450669085e-06, "loss": 0.1117828369140625, "step": 68540 }, { "epoch": 0.5926883468366032, "grad_norm": 2.1050944960378244, "learning_rate": 4.791113811798833e-06, "loss": 0.29744491577148435, "step": 68545 }, { "epoch": 0.5927315803581464, "grad_norm": 44.51355265523246, "learning_rate": 4.790950370270301e-06, "loss": 0.24311904907226561, "step": 68550 }, { "epoch": 0.5927748138796898, "grad_norm": 7.303975835000679, "learning_rate": 4.790786920482062e-06, "loss": 0.323101806640625, "step": 68555 }, { "epoch": 0.592818047401233, "grad_norm": 0.9169781359755307, "learning_rate": 4.790623462434874e-06, "loss": 0.0615814208984375, "step": 68560 }, { "epoch": 0.5928612809227762, "grad_norm": 42.441499171531, "learning_rate": 4.790459996129487e-06, "loss": 0.2851570129394531, "step": 68565 }, { "epoch": 0.5929045144443196, "grad_norm": 0.926447960819861, "learning_rate": 4.790296521566658e-06, "loss": 0.2257293701171875, "step": 68570 }, { "epoch": 0.5929477479658628, "grad_norm": 1.8567444573008671, "learning_rate": 4.790133038747139e-06, "loss": 0.2727203369140625, "step": 68575 }, { "epoch": 0.592990981487406, "grad_norm": 10.337901456773166, "learning_rate": 4.789969547671685e-06, "loss": 0.14525890350341797, "step": 68580 }, { "epoch": 0.5930342150089494, "grad_norm": 5.4918547335066545, "learning_rate": 4.7898060483410495e-06, "loss": 0.13305892944335937, "step": 68585 }, { "epoch": 0.5930774485304926, "grad_norm": 13.187189603723326, "learning_rate": 4.789642540755986e-06, "loss": 0.0826141357421875, "step": 68590 }, { "epoch": 0.5931206820520358, "grad_norm": 0.9242241621785067, "learning_rate": 4.78947902491725e-06, "loss": 0.29131317138671875, "step": 68595 }, { "epoch": 0.5931639155735792, "grad_norm": 2.632723233440846, "learning_rate": 4.7893155008255945e-06, "loss": 0.23171615600585938, "step": 68600 }, { "epoch": 0.5932071490951224, "grad_norm": 2.375492927126442, "learning_rate": 4.789151968481775e-06, "loss": 0.16655120849609376, "step": 68605 }, { "epoch": 0.5932503826166656, "grad_norm": 1.3758594613054353, "learning_rate": 4.7889884278865435e-06, "loss": 0.21912364959716796, "step": 68610 }, { "epoch": 0.593293616138209, "grad_norm": 18.609094256987433, "learning_rate": 4.788824879040656e-06, "loss": 0.20304107666015625, "step": 68615 }, { "epoch": 0.5933368496597522, "grad_norm": 4.873711001778022, "learning_rate": 4.7886613219448654e-06, "loss": 0.05353355407714844, "step": 68620 }, { "epoch": 0.5933800831812954, "grad_norm": 1.718155101466065, "learning_rate": 4.788497756599928e-06, "loss": 0.06112594604492187, "step": 68625 }, { "epoch": 0.5934233167028388, "grad_norm": 35.25164734010454, "learning_rate": 4.788334183006595e-06, "loss": 0.279742431640625, "step": 68630 }, { "epoch": 0.593466550224382, "grad_norm": 5.0833743030252405, "learning_rate": 4.788170601165625e-06, "loss": 0.180126953125, "step": 68635 }, { "epoch": 0.5935097837459252, "grad_norm": 11.945829432596499, "learning_rate": 4.78800701107777e-06, "loss": 0.06835050582885742, "step": 68640 }, { "epoch": 0.5935530172674685, "grad_norm": 11.050015887771487, "learning_rate": 4.787843412743784e-06, "loss": 0.09440574645996094, "step": 68645 }, { "epoch": 0.5935962507890118, "grad_norm": 5.567746597219879, "learning_rate": 4.787679806164422e-06, "loss": 0.1453460693359375, "step": 68650 }, { "epoch": 0.593639484310555, "grad_norm": 0.5093270520432708, "learning_rate": 4.787516191340439e-06, "loss": 0.3315399169921875, "step": 68655 }, { "epoch": 0.5936827178320983, "grad_norm": 2.862896550947667, "learning_rate": 4.78735256827259e-06, "loss": 0.35201091766357423, "step": 68660 }, { "epoch": 0.5937259513536416, "grad_norm": 10.363617715860862, "learning_rate": 4.787188936961627e-06, "loss": 0.101104736328125, "step": 68665 }, { "epoch": 0.5937691848751848, "grad_norm": 124.23281905301685, "learning_rate": 4.787025297408309e-06, "loss": 0.5851242065429687, "step": 68670 }, { "epoch": 0.593812418396728, "grad_norm": 14.26643344649694, "learning_rate": 4.7868616496133865e-06, "loss": 0.07236175537109375, "step": 68675 }, { "epoch": 0.5938556519182714, "grad_norm": 40.2381109408416, "learning_rate": 4.7866979935776156e-06, "loss": 0.2222900390625, "step": 68680 }, { "epoch": 0.5938988854398146, "grad_norm": 2.81707974711384, "learning_rate": 4.786534329301752e-06, "loss": 0.28837890625, "step": 68685 }, { "epoch": 0.5939421189613578, "grad_norm": 14.922191154744798, "learning_rate": 4.7863706567865504e-06, "loss": 0.23193817138671874, "step": 68690 }, { "epoch": 0.5939853524829012, "grad_norm": 9.473340801137608, "learning_rate": 4.786206976032765e-06, "loss": 0.42466278076171876, "step": 68695 }, { "epoch": 0.5940285860044444, "grad_norm": 5.866115746311902, "learning_rate": 4.786043287041151e-06, "loss": 0.1919281005859375, "step": 68700 }, { "epoch": 0.5940718195259876, "grad_norm": 19.961766609729544, "learning_rate": 4.785879589812463e-06, "loss": 0.1647857666015625, "step": 68705 }, { "epoch": 0.594115053047531, "grad_norm": 7.179393103807795, "learning_rate": 4.785715884347456e-06, "loss": 0.1871673583984375, "step": 68710 }, { "epoch": 0.5941582865690742, "grad_norm": 6.346163036637975, "learning_rate": 4.785552170646885e-06, "loss": 0.16325302124023439, "step": 68715 }, { "epoch": 0.5942015200906174, "grad_norm": 33.0677642610608, "learning_rate": 4.785388448711505e-06, "loss": 0.2445648193359375, "step": 68720 }, { "epoch": 0.5942447536121607, "grad_norm": 15.3691334022653, "learning_rate": 4.785224718542072e-06, "loss": 0.447552490234375, "step": 68725 }, { "epoch": 0.594287987133704, "grad_norm": 8.751881685675341, "learning_rate": 4.785060980139339e-06, "loss": 0.15638465881347657, "step": 68730 }, { "epoch": 0.5943312206552472, "grad_norm": 18.463363364909206, "learning_rate": 4.7848972335040635e-06, "loss": 0.1179840087890625, "step": 68735 }, { "epoch": 0.5943744541767905, "grad_norm": 11.46485032320439, "learning_rate": 4.784733478637e-06, "loss": 0.53134765625, "step": 68740 }, { "epoch": 0.5944176876983338, "grad_norm": 0.8162339052852478, "learning_rate": 4.784569715538903e-06, "loss": 0.23276901245117188, "step": 68745 }, { "epoch": 0.594460921219877, "grad_norm": 6.436583084483726, "learning_rate": 4.784405944210528e-06, "loss": 0.07229461669921874, "step": 68750 }, { "epoch": 0.5945041547414203, "grad_norm": 4.174119647700111, "learning_rate": 4.784242164652631e-06, "loss": 0.0923370361328125, "step": 68755 }, { "epoch": 0.5945473882629636, "grad_norm": 2.558354588397841, "learning_rate": 4.784078376865966e-06, "loss": 0.18436698913574218, "step": 68760 }, { "epoch": 0.5945906217845068, "grad_norm": 2.8592056283289518, "learning_rate": 4.783914580851289e-06, "loss": 0.3493156433105469, "step": 68765 }, { "epoch": 0.5946338553060501, "grad_norm": 0.040426208419238656, "learning_rate": 4.783750776609356e-06, "loss": 0.11061897277832031, "step": 68770 }, { "epoch": 0.5946770888275934, "grad_norm": 17.7764795626146, "learning_rate": 4.7835869641409225e-06, "loss": 0.23736000061035156, "step": 68775 }, { "epoch": 0.5947203223491366, "grad_norm": 1.0247990349963392, "learning_rate": 4.783423143446744e-06, "loss": 0.14386138916015626, "step": 68780 }, { "epoch": 0.5947635558706799, "grad_norm": 3.7563437705894254, "learning_rate": 4.7832593145275745e-06, "loss": 0.2222076416015625, "step": 68785 }, { "epoch": 0.5948067893922232, "grad_norm": 30.464505063450392, "learning_rate": 4.7830954773841705e-06, "loss": 0.17671356201171876, "step": 68790 }, { "epoch": 0.5948500229137664, "grad_norm": 1.1579014221867843, "learning_rate": 4.782931632017289e-06, "loss": 0.05639190673828125, "step": 68795 }, { "epoch": 0.5948932564353097, "grad_norm": 3.0583824840615272, "learning_rate": 4.782767778427683e-06, "loss": 0.13735504150390626, "step": 68800 }, { "epoch": 0.594936489956853, "grad_norm": 31.827622826238112, "learning_rate": 4.78260391661611e-06, "loss": 0.32099609375, "step": 68805 }, { "epoch": 0.5949797234783962, "grad_norm": 3.981027224307676, "learning_rate": 4.782440046583325e-06, "loss": 0.08415908813476562, "step": 68810 }, { "epoch": 0.5950229569999395, "grad_norm": 33.796382801083666, "learning_rate": 4.782276168330084e-06, "loss": 0.17131900787353516, "step": 68815 }, { "epoch": 0.5950661905214827, "grad_norm": 8.944559512063767, "learning_rate": 4.7821122818571435e-06, "loss": 0.05884246826171875, "step": 68820 }, { "epoch": 0.595109424043026, "grad_norm": 1.4135567674253393, "learning_rate": 4.781948387165258e-06, "loss": 0.15898056030273439, "step": 68825 }, { "epoch": 0.5951526575645693, "grad_norm": 10.542607846131466, "learning_rate": 4.7817844842551845e-06, "loss": 0.23290367126464845, "step": 68830 }, { "epoch": 0.5951958910861125, "grad_norm": 20.755535567724426, "learning_rate": 4.781620573127678e-06, "loss": 0.5214508056640625, "step": 68835 }, { "epoch": 0.5952391246076558, "grad_norm": 6.543727904402873, "learning_rate": 4.781456653783494e-06, "loss": 0.0379913330078125, "step": 68840 }, { "epoch": 0.595282358129199, "grad_norm": 25.887680501823585, "learning_rate": 4.78129272622339e-06, "loss": 0.2384735107421875, "step": 68845 }, { "epoch": 0.5953255916507423, "grad_norm": 7.3659014514664465, "learning_rate": 4.781128790448122e-06, "loss": 0.3076789855957031, "step": 68850 }, { "epoch": 0.5953688251722856, "grad_norm": 4.300211501919816, "learning_rate": 4.780964846458445e-06, "loss": 0.05500717163085937, "step": 68855 }, { "epoch": 0.5954120586938288, "grad_norm": 8.24537736187098, "learning_rate": 4.780800894255115e-06, "loss": 0.29673042297363283, "step": 68860 }, { "epoch": 0.5954552922153721, "grad_norm": 9.57297256363328, "learning_rate": 4.78063693383889e-06, "loss": 0.19187774658203124, "step": 68865 }, { "epoch": 0.5954985257369154, "grad_norm": 5.619294851465296, "learning_rate": 4.780472965210523e-06, "loss": 0.1177978515625, "step": 68870 }, { "epoch": 0.5955417592584586, "grad_norm": 0.529057311790796, "learning_rate": 4.780308988370773e-06, "loss": 0.06785736083984376, "step": 68875 }, { "epoch": 0.5955849927800019, "grad_norm": 1.084321782574462, "learning_rate": 4.7801450033203945e-06, "loss": 0.040256500244140625, "step": 68880 }, { "epoch": 0.5956282263015452, "grad_norm": 4.402739541097358, "learning_rate": 4.779981010060146e-06, "loss": 0.089404296875, "step": 68885 }, { "epoch": 0.5956714598230884, "grad_norm": 43.72925844278333, "learning_rate": 4.779817008590781e-06, "loss": 0.5807373046875, "step": 68890 }, { "epoch": 0.5957146933446317, "grad_norm": 0.6489962828622178, "learning_rate": 4.7796529989130584e-06, "loss": 0.03794231414794922, "step": 68895 }, { "epoch": 0.5957579268661749, "grad_norm": 0.6656043955854127, "learning_rate": 4.779488981027733e-06, "loss": 0.33575592041015623, "step": 68900 }, { "epoch": 0.5958011603877182, "grad_norm": 0.6004126161612708, "learning_rate": 4.77932495493556e-06, "loss": 0.07695541381835938, "step": 68905 }, { "epoch": 0.5958443939092615, "grad_norm": 5.32087081882586, "learning_rate": 4.7791609206372996e-06, "loss": 0.03792877197265625, "step": 68910 }, { "epoch": 0.5958876274308047, "grad_norm": 4.410499731972736, "learning_rate": 4.778996878133706e-06, "loss": 0.10087242126464843, "step": 68915 }, { "epoch": 0.595930860952348, "grad_norm": 2.2775679852887634, "learning_rate": 4.7788328274255354e-06, "loss": 0.2344970703125, "step": 68920 }, { "epoch": 0.5959740944738913, "grad_norm": 1.0720113070475508, "learning_rate": 4.778668768513545e-06, "loss": 0.0493804931640625, "step": 68925 }, { "epoch": 0.5960173279954345, "grad_norm": 3.1764628252488785, "learning_rate": 4.778504701398492e-06, "loss": 0.114605712890625, "step": 68930 }, { "epoch": 0.5960605615169778, "grad_norm": 20.560096464124676, "learning_rate": 4.778340626081132e-06, "loss": 0.18690071105957032, "step": 68935 }, { "epoch": 0.5961037950385211, "grad_norm": 5.806924305951762, "learning_rate": 4.7781765425622224e-06, "loss": 0.169464111328125, "step": 68940 }, { "epoch": 0.5961470285600643, "grad_norm": 0.3635761393272504, "learning_rate": 4.778012450842519e-06, "loss": 0.0963226318359375, "step": 68945 }, { "epoch": 0.5961902620816076, "grad_norm": 6.3056784945508255, "learning_rate": 4.777848350922779e-06, "loss": 0.12572879791259767, "step": 68950 }, { "epoch": 0.5962334956031509, "grad_norm": 6.947412084681072, "learning_rate": 4.777684242803761e-06, "loss": 0.2384552001953125, "step": 68955 }, { "epoch": 0.5962767291246941, "grad_norm": 15.867188533574907, "learning_rate": 4.77752012648622e-06, "loss": 0.19581451416015624, "step": 68960 }, { "epoch": 0.5963199626462374, "grad_norm": 6.555309713215758, "learning_rate": 4.7773560019709125e-06, "loss": 0.23216705322265624, "step": 68965 }, { "epoch": 0.5963631961677807, "grad_norm": 1.120835560205975, "learning_rate": 4.7771918692585964e-06, "loss": 0.27695465087890625, "step": 68970 }, { "epoch": 0.5964064296893239, "grad_norm": 2.7208589808206067, "learning_rate": 4.7770277283500295e-06, "loss": 0.0965484619140625, "step": 68975 }, { "epoch": 0.5964496632108672, "grad_norm": 30.435430173238576, "learning_rate": 4.776863579245967e-06, "loss": 0.34858245849609376, "step": 68980 }, { "epoch": 0.5964928967324105, "grad_norm": 1.167451911645942, "learning_rate": 4.776699421947166e-06, "loss": 0.224609375, "step": 68985 }, { "epoch": 0.5965361302539537, "grad_norm": 5.962047049194491, "learning_rate": 4.776535256454385e-06, "loss": 0.1605133056640625, "step": 68990 }, { "epoch": 0.5965793637754969, "grad_norm": 0.5643511016261624, "learning_rate": 4.7763710827683805e-06, "loss": 0.10735321044921875, "step": 68995 }, { "epoch": 0.5966225972970403, "grad_norm": 12.285437529657328, "learning_rate": 4.776206900889908e-06, "loss": 0.5858673095703125, "step": 69000 }, { "epoch": 0.5966658308185835, "grad_norm": 0.2801687810661139, "learning_rate": 4.776042710819728e-06, "loss": 0.1048126220703125, "step": 69005 }, { "epoch": 0.5967090643401267, "grad_norm": 41.1851025269893, "learning_rate": 4.775878512558596e-06, "loss": 0.578076171875, "step": 69010 }, { "epoch": 0.5967522978616701, "grad_norm": 68.94017359756775, "learning_rate": 4.775714306107268e-06, "loss": 0.4928955078125, "step": 69015 }, { "epoch": 0.5967955313832133, "grad_norm": 11.220791822810511, "learning_rate": 4.775550091466503e-06, "loss": 0.36951217651367185, "step": 69020 }, { "epoch": 0.5968387649047565, "grad_norm": 17.18746264025822, "learning_rate": 4.775385868637058e-06, "loss": 0.124371337890625, "step": 69025 }, { "epoch": 0.5968819984262999, "grad_norm": 0.18842181190637666, "learning_rate": 4.775221637619689e-06, "loss": 0.0482666015625, "step": 69030 }, { "epoch": 0.5969252319478431, "grad_norm": 24.633931443141417, "learning_rate": 4.775057398415157e-06, "loss": 0.35845260620117186, "step": 69035 }, { "epoch": 0.5969684654693863, "grad_norm": 0.7411122968950445, "learning_rate": 4.774893151024216e-06, "loss": 0.0245269775390625, "step": 69040 }, { "epoch": 0.5970116989909297, "grad_norm": 24.663580178853685, "learning_rate": 4.774728895447624e-06, "loss": 0.170758056640625, "step": 69045 }, { "epoch": 0.5970549325124729, "grad_norm": 10.444873405840681, "learning_rate": 4.77456463168614e-06, "loss": 0.07274360656738281, "step": 69050 }, { "epoch": 0.5970981660340161, "grad_norm": 12.711783298776538, "learning_rate": 4.774400359740522e-06, "loss": 0.07999801635742188, "step": 69055 }, { "epoch": 0.5971413995555594, "grad_norm": 1.0187034868515796, "learning_rate": 4.774236079611524e-06, "loss": 0.08736572265625, "step": 69060 }, { "epoch": 0.5971846330771027, "grad_norm": 0.3790510177846633, "learning_rate": 4.774071791299907e-06, "loss": 0.1326080322265625, "step": 69065 }, { "epoch": 0.5972278665986459, "grad_norm": 4.599583537209834, "learning_rate": 4.7739074948064275e-06, "loss": 0.07120361328125, "step": 69070 }, { "epoch": 0.5972711001201891, "grad_norm": 30.821932064434332, "learning_rate": 4.773743190131843e-06, "loss": 0.226153564453125, "step": 69075 }, { "epoch": 0.5973143336417325, "grad_norm": 15.10715382955792, "learning_rate": 4.773578877276912e-06, "loss": 0.114874267578125, "step": 69080 }, { "epoch": 0.5973575671632757, "grad_norm": 15.45013630774107, "learning_rate": 4.773414556242392e-06, "loss": 0.14971771240234374, "step": 69085 }, { "epoch": 0.5974008006848189, "grad_norm": 23.12623437113896, "learning_rate": 4.77325022702904e-06, "loss": 0.115411376953125, "step": 69090 }, { "epoch": 0.5974440342063623, "grad_norm": 7.033674959538262, "learning_rate": 4.773085889637616e-06, "loss": 0.17133331298828125, "step": 69095 }, { "epoch": 0.5974872677279055, "grad_norm": 1.0413394330912031, "learning_rate": 4.7729215440688755e-06, "loss": 0.11024322509765624, "step": 69100 }, { "epoch": 0.5975305012494487, "grad_norm": 56.54138765895317, "learning_rate": 4.772757190323579e-06, "loss": 0.22095947265625, "step": 69105 }, { "epoch": 0.5975737347709921, "grad_norm": 7.3186465729847985, "learning_rate": 4.772592828402481e-06, "loss": 0.13044509887695313, "step": 69110 }, { "epoch": 0.5976169682925353, "grad_norm": 29.05414652939644, "learning_rate": 4.772428458306342e-06, "loss": 0.27653350830078127, "step": 69115 }, { "epoch": 0.5976602018140785, "grad_norm": 0.8059280404618017, "learning_rate": 4.772264080035921e-06, "loss": 0.01808929443359375, "step": 69120 }, { "epoch": 0.5977034353356219, "grad_norm": 14.359219746402498, "learning_rate": 4.772099693591973e-06, "loss": 0.07228622436523438, "step": 69125 }, { "epoch": 0.5977466688571651, "grad_norm": 18.623067456060795, "learning_rate": 4.771935298975258e-06, "loss": 0.1058929443359375, "step": 69130 }, { "epoch": 0.5977899023787083, "grad_norm": 13.338430182602394, "learning_rate": 4.7717708961865345e-06, "loss": 0.2412139892578125, "step": 69135 }, { "epoch": 0.5978331359002517, "grad_norm": 24.127685253637196, "learning_rate": 4.771606485226561e-06, "loss": 0.12359161376953125, "step": 69140 }, { "epoch": 0.5978763694217949, "grad_norm": 4.073049919979753, "learning_rate": 4.771442066096093e-06, "loss": 0.06681098937988281, "step": 69145 }, { "epoch": 0.5979196029433381, "grad_norm": 0.2776470946385489, "learning_rate": 4.771277638795892e-06, "loss": 0.07800750732421875, "step": 69150 }, { "epoch": 0.5979628364648815, "grad_norm": 0.8681802640862695, "learning_rate": 4.771113203326715e-06, "loss": 0.14560546875, "step": 69155 }, { "epoch": 0.5980060699864247, "grad_norm": 6.672778890839329, "learning_rate": 4.7709487596893195e-06, "loss": 0.20407257080078126, "step": 69160 }, { "epoch": 0.5980493035079679, "grad_norm": 19.866126477471216, "learning_rate": 4.770784307884466e-06, "loss": 0.10901031494140626, "step": 69165 }, { "epoch": 0.5980925370295112, "grad_norm": 26.92593664350949, "learning_rate": 4.770619847912911e-06, "loss": 0.17428245544433593, "step": 69170 }, { "epoch": 0.5981357705510545, "grad_norm": 9.493596833494179, "learning_rate": 4.770455379775414e-06, "loss": 0.3099781036376953, "step": 69175 }, { "epoch": 0.5981790040725977, "grad_norm": 39.126042209070874, "learning_rate": 4.770290903472733e-06, "loss": 0.17503814697265624, "step": 69180 }, { "epoch": 0.598222237594141, "grad_norm": 0.8216457412702722, "learning_rate": 4.7701264190056265e-06, "loss": 0.029760360717773438, "step": 69185 }, { "epoch": 0.5982654711156843, "grad_norm": 27.247526539238862, "learning_rate": 4.769961926374854e-06, "loss": 0.43184356689453124, "step": 69190 }, { "epoch": 0.5983087046372275, "grad_norm": 3.065865320867286, "learning_rate": 4.769797425581174e-06, "loss": 0.1217376708984375, "step": 69195 }, { "epoch": 0.5983519381587707, "grad_norm": 0.7259235373511449, "learning_rate": 4.769632916625344e-06, "loss": 0.12215652465820312, "step": 69200 }, { "epoch": 0.5983951716803141, "grad_norm": 28.86960608191186, "learning_rate": 4.7694683995081224e-06, "loss": 0.1547483444213867, "step": 69205 }, { "epoch": 0.5984384052018573, "grad_norm": 4.94697311712183, "learning_rate": 4.76930387423027e-06, "loss": 0.037908935546875, "step": 69210 }, { "epoch": 0.5984816387234005, "grad_norm": 12.459004988201963, "learning_rate": 4.769139340792544e-06, "loss": 0.11553916931152344, "step": 69215 }, { "epoch": 0.5985248722449439, "grad_norm": 32.20303506142289, "learning_rate": 4.768974799195704e-06, "loss": 0.3309822082519531, "step": 69220 }, { "epoch": 0.5985681057664871, "grad_norm": 0.3855011791702774, "learning_rate": 4.768810249440508e-06, "loss": 0.058461761474609374, "step": 69225 }, { "epoch": 0.5986113392880303, "grad_norm": 1.5088382515835663, "learning_rate": 4.768645691527716e-06, "loss": 0.31720733642578125, "step": 69230 }, { "epoch": 0.5986545728095737, "grad_norm": 1.3754342245706368, "learning_rate": 4.768481125458087e-06, "loss": 0.0368438720703125, "step": 69235 }, { "epoch": 0.5986978063311169, "grad_norm": 32.09592820386077, "learning_rate": 4.768316551232378e-06, "loss": 0.287005615234375, "step": 69240 }, { "epoch": 0.5987410398526601, "grad_norm": 8.637529093448228, "learning_rate": 4.76815196885135e-06, "loss": 0.0463226318359375, "step": 69245 }, { "epoch": 0.5987842733742034, "grad_norm": 2.7783542303574476, "learning_rate": 4.767987378315761e-06, "loss": 0.13998336791992189, "step": 69250 }, { "epoch": 0.5988275068957467, "grad_norm": 33.24298843104482, "learning_rate": 4.767822779626371e-06, "loss": 0.28050537109375, "step": 69255 }, { "epoch": 0.5988707404172899, "grad_norm": 41.22897105526982, "learning_rate": 4.767658172783938e-06, "loss": 0.6651885986328125, "step": 69260 }, { "epoch": 0.5989139739388332, "grad_norm": 30.793079807181137, "learning_rate": 4.767493557789223e-06, "loss": 0.23005447387695313, "step": 69265 }, { "epoch": 0.5989572074603765, "grad_norm": 24.888210213536453, "learning_rate": 4.767328934642981e-06, "loss": 0.14107170104980468, "step": 69270 }, { "epoch": 0.5990004409819197, "grad_norm": 34.24913561465409, "learning_rate": 4.767164303345977e-06, "loss": 0.183990478515625, "step": 69275 }, { "epoch": 0.599043674503463, "grad_norm": 19.706785748047434, "learning_rate": 4.766999663898967e-06, "loss": 0.1505218505859375, "step": 69280 }, { "epoch": 0.5990869080250063, "grad_norm": 7.190728074282822, "learning_rate": 4.766835016302709e-06, "loss": 0.0808074951171875, "step": 69285 }, { "epoch": 0.5991301415465495, "grad_norm": 0.788844525925891, "learning_rate": 4.766670360557966e-06, "loss": 0.08808860778808594, "step": 69290 }, { "epoch": 0.5991733750680928, "grad_norm": 19.899490106821258, "learning_rate": 4.766505696665494e-06, "loss": 0.2150360107421875, "step": 69295 }, { "epoch": 0.5992166085896361, "grad_norm": 16.444940629732415, "learning_rate": 4.766341024626053e-06, "loss": 0.13394126892089844, "step": 69300 }, { "epoch": 0.5992598421111793, "grad_norm": 2.1265737313932087, "learning_rate": 4.7661763444404046e-06, "loss": 0.25653839111328125, "step": 69305 }, { "epoch": 0.5993030756327226, "grad_norm": 0.15625014181778704, "learning_rate": 4.766011656109308e-06, "loss": 0.062264442443847656, "step": 69310 }, { "epoch": 0.5993463091542659, "grad_norm": 4.187674490456623, "learning_rate": 4.76584695963352e-06, "loss": 0.08254280090332031, "step": 69315 }, { "epoch": 0.5993895426758091, "grad_norm": 1.1304519562035042, "learning_rate": 4.765682255013802e-06, "loss": 0.11402511596679688, "step": 69320 }, { "epoch": 0.5994327761973524, "grad_norm": 42.086510584077544, "learning_rate": 4.765517542250914e-06, "loss": 0.38524322509765624, "step": 69325 }, { "epoch": 0.5994760097188957, "grad_norm": 28.045654759816426, "learning_rate": 4.765352821345614e-06, "loss": 0.16392440795898439, "step": 69330 }, { "epoch": 0.5995192432404389, "grad_norm": 5.992572118112181, "learning_rate": 4.765188092298665e-06, "loss": 0.08387451171875, "step": 69335 }, { "epoch": 0.5995624767619822, "grad_norm": 1.3366029965383521, "learning_rate": 4.765023355110823e-06, "loss": 0.06122398376464844, "step": 69340 }, { "epoch": 0.5996057102835254, "grad_norm": 2.3133851841309836, "learning_rate": 4.76485860978285e-06, "loss": 0.0995819091796875, "step": 69345 }, { "epoch": 0.5996489438050687, "grad_norm": 2.8173720221437555, "learning_rate": 4.764693856315504e-06, "loss": 0.22569580078125, "step": 69350 }, { "epoch": 0.599692177326612, "grad_norm": 2.8052282418079844, "learning_rate": 4.7645290947095465e-06, "loss": 0.1370635986328125, "step": 69355 }, { "epoch": 0.5997354108481552, "grad_norm": 1.5469914527475634, "learning_rate": 4.764364324965736e-06, "loss": 0.12836265563964844, "step": 69360 }, { "epoch": 0.5997786443696985, "grad_norm": 0.3243148858424463, "learning_rate": 4.764199547084834e-06, "loss": 0.19262313842773438, "step": 69365 }, { "epoch": 0.5998218778912418, "grad_norm": 14.313230209565408, "learning_rate": 4.764034761067599e-06, "loss": 0.446197509765625, "step": 69370 }, { "epoch": 0.599865111412785, "grad_norm": 25.099793033266398, "learning_rate": 4.763869966914793e-06, "loss": 0.23081817626953124, "step": 69375 }, { "epoch": 0.5999083449343283, "grad_norm": 1.839571501753661, "learning_rate": 4.763705164627172e-06, "loss": 0.24954376220703126, "step": 69380 }, { "epoch": 0.5999515784558715, "grad_norm": 120.79804709640258, "learning_rate": 4.763540354205501e-06, "loss": 0.14019699096679689, "step": 69385 }, { "epoch": 0.5999948119774148, "grad_norm": 7.294375794037661, "learning_rate": 4.763375535650537e-06, "loss": 0.05860061645507812, "step": 69390 }, { "epoch": 0.6000380454989581, "grad_norm": 1.2276174534425106, "learning_rate": 4.763210708963039e-06, "loss": 0.16015472412109374, "step": 69395 }, { "epoch": 0.6000812790205013, "grad_norm": 4.0473914532307385, "learning_rate": 4.763045874143772e-06, "loss": 0.18914871215820311, "step": 69400 }, { "epoch": 0.6001245125420446, "grad_norm": 0.07440388276229325, "learning_rate": 4.762881031193491e-06, "loss": 0.11064376831054687, "step": 69405 }, { "epoch": 0.6001677460635879, "grad_norm": 1.5205452230861636, "learning_rate": 4.76271618011296e-06, "loss": 0.116070556640625, "step": 69410 }, { "epoch": 0.6002109795851311, "grad_norm": 34.537691551020345, "learning_rate": 4.762551320902937e-06, "loss": 0.23902587890625, "step": 69415 }, { "epoch": 0.6002542131066744, "grad_norm": 1.9922071460553656, "learning_rate": 4.762386453564183e-06, "loss": 0.0873291015625, "step": 69420 }, { "epoch": 0.6002974466282176, "grad_norm": 2.4565807510781084, "learning_rate": 4.762221578097459e-06, "loss": 0.296209716796875, "step": 69425 }, { "epoch": 0.6003406801497609, "grad_norm": 6.250205287265317, "learning_rate": 4.762056694503524e-06, "loss": 0.3614961624145508, "step": 69430 }, { "epoch": 0.6003839136713042, "grad_norm": 0.3989881576593368, "learning_rate": 4.761891802783139e-06, "loss": 0.04697914123535156, "step": 69435 }, { "epoch": 0.6004271471928474, "grad_norm": 5.371674174518285, "learning_rate": 4.761726902937066e-06, "loss": 0.49155731201171876, "step": 69440 }, { "epoch": 0.6004703807143907, "grad_norm": 5.819649883221934, "learning_rate": 4.7615619949660625e-06, "loss": 0.16289329528808594, "step": 69445 }, { "epoch": 0.600513614235934, "grad_norm": 50.44683888168846, "learning_rate": 4.761397078870892e-06, "loss": 0.27382545471191405, "step": 69450 }, { "epoch": 0.6005568477574772, "grad_norm": 0.1732950577320613, "learning_rate": 4.761232154652313e-06, "loss": 0.1805755615234375, "step": 69455 }, { "epoch": 0.6006000812790205, "grad_norm": 0.585047011383519, "learning_rate": 4.761067222311087e-06, "loss": 0.08373565673828125, "step": 69460 }, { "epoch": 0.6006433148005638, "grad_norm": 12.450117994237086, "learning_rate": 4.760902281847976e-06, "loss": 0.053668212890625, "step": 69465 }, { "epoch": 0.600686548322107, "grad_norm": 3.00596306336898, "learning_rate": 4.760737333263738e-06, "loss": 0.212969970703125, "step": 69470 }, { "epoch": 0.6007297818436503, "grad_norm": 4.6079912975283515, "learning_rate": 4.760572376559135e-06, "loss": 0.3764434814453125, "step": 69475 }, { "epoch": 0.6007730153651936, "grad_norm": 17.014148547685878, "learning_rate": 4.760407411734928e-06, "loss": 0.3080036163330078, "step": 69480 }, { "epoch": 0.6008162488867368, "grad_norm": 0.974812672349107, "learning_rate": 4.760242438791877e-06, "loss": 0.07398147583007812, "step": 69485 }, { "epoch": 0.6008594824082801, "grad_norm": 2.2064686116121344, "learning_rate": 4.760077457730744e-06, "loss": 0.3256254196166992, "step": 69490 }, { "epoch": 0.6009027159298234, "grad_norm": 27.22745544019507, "learning_rate": 4.759912468552289e-06, "loss": 0.20597686767578124, "step": 69495 }, { "epoch": 0.6009459494513666, "grad_norm": 28.730507513864627, "learning_rate": 4.759747471257275e-06, "loss": 0.12241706848144532, "step": 69500 }, { "epoch": 0.6009891829729099, "grad_norm": 12.638712749252258, "learning_rate": 4.759582465846458e-06, "loss": 0.1463653564453125, "step": 69505 }, { "epoch": 0.6010324164944532, "grad_norm": 5.252301326041155, "learning_rate": 4.759417452320604e-06, "loss": 0.06116142272949219, "step": 69510 }, { "epoch": 0.6010756500159964, "grad_norm": 4.71777815016474, "learning_rate": 4.759252430680473e-06, "loss": 0.11329450607299804, "step": 69515 }, { "epoch": 0.6011188835375396, "grad_norm": 28.220632363759012, "learning_rate": 4.759087400926823e-06, "loss": 0.14647645950317384, "step": 69520 }, { "epoch": 0.601162117059083, "grad_norm": 21.28370672798814, "learning_rate": 4.758922363060419e-06, "loss": 0.22495155334472655, "step": 69525 }, { "epoch": 0.6012053505806262, "grad_norm": 17.975369693439053, "learning_rate": 4.75875731708202e-06, "loss": 0.8129875183105468, "step": 69530 }, { "epoch": 0.6012485841021694, "grad_norm": 80.2489933295682, "learning_rate": 4.758592262992388e-06, "loss": 0.34622802734375, "step": 69535 }, { "epoch": 0.6012918176237128, "grad_norm": 17.660021628851187, "learning_rate": 4.758427200792283e-06, "loss": 0.0798614501953125, "step": 69540 }, { "epoch": 0.601335051145256, "grad_norm": 1.9083681289375638, "learning_rate": 4.758262130482468e-06, "loss": 0.09207687377929688, "step": 69545 }, { "epoch": 0.6013782846667992, "grad_norm": 2.656157872278075, "learning_rate": 4.758097052063703e-06, "loss": 0.49071197509765624, "step": 69550 }, { "epoch": 0.6014215181883426, "grad_norm": 9.883291418754977, "learning_rate": 4.75793196553675e-06, "loss": 0.22453155517578124, "step": 69555 }, { "epoch": 0.6014647517098858, "grad_norm": 24.305948663566852, "learning_rate": 4.7577668709023695e-06, "loss": 0.23405685424804687, "step": 69560 }, { "epoch": 0.601507985231429, "grad_norm": 16.158265882046628, "learning_rate": 4.757601768161324e-06, "loss": 0.324365234375, "step": 69565 }, { "epoch": 0.6015512187529723, "grad_norm": 4.417197662067349, "learning_rate": 4.7574366573143744e-06, "loss": 0.059032440185546875, "step": 69570 }, { "epoch": 0.6015944522745156, "grad_norm": 10.807076194796672, "learning_rate": 4.757271538362282e-06, "loss": 0.246673583984375, "step": 69575 }, { "epoch": 0.6016376857960588, "grad_norm": 6.38540110211967, "learning_rate": 4.75710641130581e-06, "loss": 0.184136962890625, "step": 69580 }, { "epoch": 0.6016809193176021, "grad_norm": 2.1949059884189177, "learning_rate": 4.7569412761457154e-06, "loss": 0.09669723510742187, "step": 69585 }, { "epoch": 0.6017241528391454, "grad_norm": 4.004875382588878, "learning_rate": 4.756776132882765e-06, "loss": 0.2911674499511719, "step": 69590 }, { "epoch": 0.6017673863606886, "grad_norm": 36.27003346029061, "learning_rate": 4.756610981517719e-06, "loss": 0.39752197265625, "step": 69595 }, { "epoch": 0.6018106198822318, "grad_norm": 13.782358981504759, "learning_rate": 4.756445822051337e-06, "loss": 0.09586944580078124, "step": 69600 }, { "epoch": 0.6018538534037752, "grad_norm": 35.48763369843057, "learning_rate": 4.756280654484382e-06, "loss": 0.3047523498535156, "step": 69605 }, { "epoch": 0.6018970869253184, "grad_norm": 19.362434097087366, "learning_rate": 4.756115478817616e-06, "loss": 0.23958892822265626, "step": 69610 }, { "epoch": 0.6019403204468616, "grad_norm": 2.075693913584992, "learning_rate": 4.755950295051802e-06, "loss": 0.19913558959960936, "step": 69615 }, { "epoch": 0.601983553968405, "grad_norm": 8.228345474843886, "learning_rate": 4.755785103187699e-06, "loss": 0.1799072265625, "step": 69620 }, { "epoch": 0.6020267874899482, "grad_norm": 1.4516490699365523, "learning_rate": 4.75561990322607e-06, "loss": 0.1031280517578125, "step": 69625 }, { "epoch": 0.6020700210114914, "grad_norm": 10.284823224003853, "learning_rate": 4.755454695167678e-06, "loss": 0.19257965087890624, "step": 69630 }, { "epoch": 0.6021132545330348, "grad_norm": 92.8048222471644, "learning_rate": 4.755289479013283e-06, "loss": 0.17977828979492189, "step": 69635 }, { "epoch": 0.602156488054578, "grad_norm": 42.834521206256355, "learning_rate": 4.755124254763649e-06, "loss": 0.31302337646484374, "step": 69640 }, { "epoch": 0.6021997215761212, "grad_norm": 29.711872117084248, "learning_rate": 4.754959022419536e-06, "loss": 0.54195556640625, "step": 69645 }, { "epoch": 0.6022429550976646, "grad_norm": 1.9036198266366515, "learning_rate": 4.754793781981709e-06, "loss": 0.06343841552734375, "step": 69650 }, { "epoch": 0.6022861886192078, "grad_norm": 24.76741476000596, "learning_rate": 4.754628533450927e-06, "loss": 0.31480865478515624, "step": 69655 }, { "epoch": 0.602329422140751, "grad_norm": 4.262215138549061, "learning_rate": 4.754463276827953e-06, "loss": 0.3519584655761719, "step": 69660 }, { "epoch": 0.6023726556622944, "grad_norm": 9.27464258335442, "learning_rate": 4.754298012113549e-06, "loss": 0.1405029296875, "step": 69665 }, { "epoch": 0.6024158891838376, "grad_norm": 0.22003920031998578, "learning_rate": 4.754132739308479e-06, "loss": 0.06608505249023437, "step": 69670 }, { "epoch": 0.6024591227053808, "grad_norm": 6.0088549784806276, "learning_rate": 4.753967458413503e-06, "loss": 0.2162506103515625, "step": 69675 }, { "epoch": 0.6025023562269242, "grad_norm": 0.9165269490392901, "learning_rate": 4.753802169429383e-06, "loss": 0.09778180122375488, "step": 69680 }, { "epoch": 0.6025455897484674, "grad_norm": 4.09690227595335, "learning_rate": 4.753636872356884e-06, "loss": 0.3505462646484375, "step": 69685 }, { "epoch": 0.6025888232700106, "grad_norm": 15.932872401357866, "learning_rate": 4.753471567196766e-06, "loss": 0.096319580078125, "step": 69690 }, { "epoch": 0.6026320567915538, "grad_norm": 0.2850986872087773, "learning_rate": 4.753306253949792e-06, "loss": 0.12468948364257812, "step": 69695 }, { "epoch": 0.6026752903130972, "grad_norm": 3.156197444153134, "learning_rate": 4.753140932616725e-06, "loss": 0.097613525390625, "step": 69700 }, { "epoch": 0.6027185238346404, "grad_norm": 0.9361166219434459, "learning_rate": 4.752975603198327e-06, "loss": 0.25787811279296874, "step": 69705 }, { "epoch": 0.6027617573561836, "grad_norm": 0.9674275037433312, "learning_rate": 4.75281026569536e-06, "loss": 0.06970977783203125, "step": 69710 }, { "epoch": 0.602804990877727, "grad_norm": 1.1746556638884365, "learning_rate": 4.752644920108587e-06, "loss": 0.0360595703125, "step": 69715 }, { "epoch": 0.6028482243992702, "grad_norm": 51.134812848069856, "learning_rate": 4.75247956643877e-06, "loss": 0.2696697235107422, "step": 69720 }, { "epoch": 0.6028914579208134, "grad_norm": 6.403438531299867, "learning_rate": 4.7523142046866715e-06, "loss": 0.1672393798828125, "step": 69725 }, { "epoch": 0.6029346914423568, "grad_norm": 2.6138812940916196, "learning_rate": 4.752148834853057e-06, "loss": 0.26951904296875, "step": 69730 }, { "epoch": 0.6029779249639, "grad_norm": 1.4094410046218326, "learning_rate": 4.751983456938685e-06, "loss": 0.2836467742919922, "step": 69735 }, { "epoch": 0.6030211584854432, "grad_norm": 9.723203713526466, "learning_rate": 4.751818070944321e-06, "loss": 0.06914138793945312, "step": 69740 }, { "epoch": 0.6030643920069866, "grad_norm": 0.7334986368877633, "learning_rate": 4.751652676870726e-06, "loss": 0.5344345092773437, "step": 69745 }, { "epoch": 0.6031076255285298, "grad_norm": 27.588719025982787, "learning_rate": 4.751487274718665e-06, "loss": 0.4926719665527344, "step": 69750 }, { "epoch": 0.603150859050073, "grad_norm": 3.190775915425132, "learning_rate": 4.7513218644888975e-06, "loss": 0.046648406982421876, "step": 69755 }, { "epoch": 0.6031940925716164, "grad_norm": 0.5803888657466469, "learning_rate": 4.75115644618219e-06, "loss": 0.30135040283203124, "step": 69760 }, { "epoch": 0.6032373260931596, "grad_norm": 23.338489761866715, "learning_rate": 4.750991019799303e-06, "loss": 0.08809585571289062, "step": 69765 }, { "epoch": 0.6032805596147028, "grad_norm": 19.98308818992166, "learning_rate": 4.750825585341001e-06, "loss": 0.20376815795898437, "step": 69770 }, { "epoch": 0.6033237931362461, "grad_norm": 3.7990290835808724, "learning_rate": 4.750660142808045e-06, "loss": 0.10811767578125, "step": 69775 }, { "epoch": 0.6033670266577894, "grad_norm": 2.9725851391515437, "learning_rate": 4.7504946922011994e-06, "loss": 0.050518035888671875, "step": 69780 }, { "epoch": 0.6034102601793326, "grad_norm": 2.3492185963307795, "learning_rate": 4.750329233521227e-06, "loss": 0.11089725494384765, "step": 69785 }, { "epoch": 0.6034534937008759, "grad_norm": 1.470785812866102, "learning_rate": 4.750163766768891e-06, "loss": 0.1091827392578125, "step": 69790 }, { "epoch": 0.6034967272224192, "grad_norm": 7.560403584683417, "learning_rate": 4.7499982919449534e-06, "loss": 0.2938201904296875, "step": 69795 }, { "epoch": 0.6035399607439624, "grad_norm": 38.13874221364942, "learning_rate": 4.74983280905018e-06, "loss": 0.14330558776855468, "step": 69800 }, { "epoch": 0.6035831942655057, "grad_norm": 2.7355917514782204, "learning_rate": 4.749667318085332e-06, "loss": 0.1517181396484375, "step": 69805 }, { "epoch": 0.603626427787049, "grad_norm": 2.5977936467461498, "learning_rate": 4.749501819051173e-06, "loss": 0.2421600341796875, "step": 69810 }, { "epoch": 0.6036696613085922, "grad_norm": 0.7172704544792277, "learning_rate": 4.749336311948464e-06, "loss": 0.13291568756103517, "step": 69815 }, { "epoch": 0.6037128948301355, "grad_norm": 1.4942028639091254, "learning_rate": 4.749170796777972e-06, "loss": 0.0664093017578125, "step": 69820 }, { "epoch": 0.6037561283516788, "grad_norm": 39.75082299428119, "learning_rate": 4.749005273540459e-06, "loss": 0.5881561279296875, "step": 69825 }, { "epoch": 0.603799361873222, "grad_norm": 30.53850594307224, "learning_rate": 4.748839742236689e-06, "loss": 0.16652374267578124, "step": 69830 }, { "epoch": 0.6038425953947653, "grad_norm": 0.16443085945817154, "learning_rate": 4.748674202867424e-06, "loss": 0.09720783233642578, "step": 69835 }, { "epoch": 0.6038858289163086, "grad_norm": 35.8329981106038, "learning_rate": 4.748508655433428e-06, "loss": 0.23712692260742188, "step": 69840 }, { "epoch": 0.6039290624378518, "grad_norm": 3.675015676666325, "learning_rate": 4.748343099935464e-06, "loss": 0.12724609375, "step": 69845 }, { "epoch": 0.6039722959593951, "grad_norm": 0.32336335856438775, "learning_rate": 4.7481775363742975e-06, "loss": 0.04844245910644531, "step": 69850 }, { "epoch": 0.6040155294809384, "grad_norm": 12.696872460311681, "learning_rate": 4.748011964750689e-06, "loss": 0.14297943115234374, "step": 69855 }, { "epoch": 0.6040587630024816, "grad_norm": 0.3633811958946926, "learning_rate": 4.747846385065404e-06, "loss": 0.0758758544921875, "step": 69860 }, { "epoch": 0.6041019965240249, "grad_norm": 10.843564998930818, "learning_rate": 4.747680797319207e-06, "loss": 0.19340057373046876, "step": 69865 }, { "epoch": 0.6041452300455681, "grad_norm": 3.4882976215422064, "learning_rate": 4.747515201512861e-06, "loss": 0.020703125, "step": 69870 }, { "epoch": 0.6041884635671114, "grad_norm": 3.6699907109110734, "learning_rate": 4.747349597647127e-06, "loss": 0.1105499267578125, "step": 69875 }, { "epoch": 0.6042316970886547, "grad_norm": 14.973854720132586, "learning_rate": 4.747183985722772e-06, "loss": 0.2874629974365234, "step": 69880 }, { "epoch": 0.6042749306101979, "grad_norm": 16.27002514338507, "learning_rate": 4.74701836574056e-06, "loss": 0.29180202484130857, "step": 69885 }, { "epoch": 0.6043181641317412, "grad_norm": 9.621153347033452, "learning_rate": 4.746852737701253e-06, "loss": 0.06281318664550781, "step": 69890 }, { "epoch": 0.6043613976532844, "grad_norm": 1.697336726756117, "learning_rate": 4.746687101605615e-06, "loss": 0.36540069580078127, "step": 69895 }, { "epoch": 0.6044046311748277, "grad_norm": 3.1292727095106363, "learning_rate": 4.746521457454412e-06, "loss": 0.37371368408203126, "step": 69900 }, { "epoch": 0.604447864696371, "grad_norm": 11.250937265048202, "learning_rate": 4.746355805248404e-06, "loss": 0.1509307861328125, "step": 69905 }, { "epoch": 0.6044910982179142, "grad_norm": 4.556928532831455, "learning_rate": 4.74619014498836e-06, "loss": 0.06110458374023438, "step": 69910 }, { "epoch": 0.6045343317394575, "grad_norm": 3.5048920681014115, "learning_rate": 4.746024476675039e-06, "loss": 0.18176116943359374, "step": 69915 }, { "epoch": 0.6045775652610008, "grad_norm": 4.130822421164101, "learning_rate": 4.745858800309208e-06, "loss": 0.21334762573242189, "step": 69920 }, { "epoch": 0.604620798782544, "grad_norm": 17.922510409980397, "learning_rate": 4.745693115891632e-06, "loss": 0.1490509033203125, "step": 69925 }, { "epoch": 0.6046640323040873, "grad_norm": 9.228740311757534, "learning_rate": 4.7455274234230715e-06, "loss": 0.20268173217773439, "step": 69930 }, { "epoch": 0.6047072658256306, "grad_norm": 1.3892610920706807, "learning_rate": 4.745361722904293e-06, "loss": 0.041998291015625, "step": 69935 }, { "epoch": 0.6047504993471738, "grad_norm": 47.563827423564206, "learning_rate": 4.745196014336062e-06, "loss": 0.25667877197265626, "step": 69940 }, { "epoch": 0.6047937328687171, "grad_norm": 3.3555243384935043, "learning_rate": 4.74503029771914e-06, "loss": 0.0305145263671875, "step": 69945 }, { "epoch": 0.6048369663902603, "grad_norm": 6.612025675106382, "learning_rate": 4.744864573054293e-06, "loss": 0.03878345489501953, "step": 69950 }, { "epoch": 0.6048801999118036, "grad_norm": 5.323778776496833, "learning_rate": 4.744698840342285e-06, "loss": 0.10336074829101563, "step": 69955 }, { "epoch": 0.6049234334333469, "grad_norm": 1.5085654327161737, "learning_rate": 4.744533099583879e-06, "loss": 0.02138175964355469, "step": 69960 }, { "epoch": 0.6049666669548901, "grad_norm": 2.4380826815178507, "learning_rate": 4.744367350779841e-06, "loss": 0.1243804931640625, "step": 69965 }, { "epoch": 0.6050099004764334, "grad_norm": 2.090745577649719, "learning_rate": 4.744201593930935e-06, "loss": 0.37376556396484373, "step": 69970 }, { "epoch": 0.6050531339979767, "grad_norm": 0.16356978998875335, "learning_rate": 4.744035829037926e-06, "loss": 0.12403488159179688, "step": 69975 }, { "epoch": 0.6050963675195199, "grad_norm": 36.32307356156884, "learning_rate": 4.743870056101577e-06, "loss": 0.40654754638671875, "step": 69980 }, { "epoch": 0.6051396010410632, "grad_norm": 0.47269145269287394, "learning_rate": 4.743704275122654e-06, "loss": 0.1319610595703125, "step": 69985 }, { "epoch": 0.6051828345626065, "grad_norm": 0.5416900646482935, "learning_rate": 4.7435384861019206e-06, "loss": 0.21752471923828126, "step": 69990 }, { "epoch": 0.6052260680841497, "grad_norm": 34.69892070635789, "learning_rate": 4.743372689040142e-06, "loss": 0.39802017211914065, "step": 69995 }, { "epoch": 0.605269301605693, "grad_norm": 1.0799376268786274, "learning_rate": 4.7432068839380825e-06, "loss": 0.06044921875, "step": 70000 }, { "epoch": 0.6053125351272363, "grad_norm": 5.6040001655254965, "learning_rate": 4.743041070796506e-06, "loss": 0.27399520874023436, "step": 70005 }, { "epoch": 0.6053557686487795, "grad_norm": 1.1664515408466085, "learning_rate": 4.7428752496161795e-06, "loss": 0.04727859497070312, "step": 70010 }, { "epoch": 0.6053990021703228, "grad_norm": 2.373016218161401, "learning_rate": 4.742709420397865e-06, "loss": 0.14810333251953126, "step": 70015 }, { "epoch": 0.6054422356918661, "grad_norm": 29.135574294036914, "learning_rate": 4.74254358314233e-06, "loss": 0.329779052734375, "step": 70020 }, { "epoch": 0.6054854692134093, "grad_norm": 4.64380620957447, "learning_rate": 4.742377737850338e-06, "loss": 0.07926864624023437, "step": 70025 }, { "epoch": 0.6055287027349525, "grad_norm": 5.133727152248615, "learning_rate": 4.742211884522653e-06, "loss": 0.0792510986328125, "step": 70030 }, { "epoch": 0.6055719362564959, "grad_norm": 8.505977141580168, "learning_rate": 4.74204602316004e-06, "loss": 0.08536567687988281, "step": 70035 }, { "epoch": 0.6056151697780391, "grad_norm": 6.2563182935281185, "learning_rate": 4.741880153763265e-06, "loss": 0.06629562377929688, "step": 70040 }, { "epoch": 0.6056584032995823, "grad_norm": 0.08504310364189302, "learning_rate": 4.7417142763330936e-06, "loss": 0.038809585571289065, "step": 70045 }, { "epoch": 0.6057016368211257, "grad_norm": 0.0977459019673571, "learning_rate": 4.741548390870291e-06, "loss": 0.149658203125, "step": 70050 }, { "epoch": 0.6057448703426689, "grad_norm": 13.740202665060348, "learning_rate": 4.741382497375618e-06, "loss": 0.10639801025390624, "step": 70055 }, { "epoch": 0.6057881038642121, "grad_norm": 1.8914114429146072, "learning_rate": 4.741216595849845e-06, "loss": 0.05504798889160156, "step": 70060 }, { "epoch": 0.6058313373857555, "grad_norm": 4.496078258636161, "learning_rate": 4.741050686293734e-06, "loss": 0.11534347534179687, "step": 70065 }, { "epoch": 0.6058745709072987, "grad_norm": 9.217568276747558, "learning_rate": 4.740884768708052e-06, "loss": 0.14102020263671874, "step": 70070 }, { "epoch": 0.6059178044288419, "grad_norm": 15.125889343391659, "learning_rate": 4.7407188430935625e-06, "loss": 0.20392017364501952, "step": 70075 }, { "epoch": 0.6059610379503853, "grad_norm": 0.20728512285630094, "learning_rate": 4.740552909451032e-06, "loss": 0.2612049102783203, "step": 70080 }, { "epoch": 0.6060042714719285, "grad_norm": 16.914804644300915, "learning_rate": 4.740386967781224e-06, "loss": 0.10969772338867187, "step": 70085 }, { "epoch": 0.6060475049934717, "grad_norm": 6.864374143763546, "learning_rate": 4.740221018084906e-06, "loss": 0.054315185546875, "step": 70090 }, { "epoch": 0.606090738515015, "grad_norm": 4.344918315073808, "learning_rate": 4.740055060362842e-06, "loss": 0.20509834289550782, "step": 70095 }, { "epoch": 0.6061339720365583, "grad_norm": 20.00755790102536, "learning_rate": 4.739889094615799e-06, "loss": 0.5223876953125, "step": 70100 }, { "epoch": 0.6061772055581015, "grad_norm": 11.519478221211246, "learning_rate": 4.7397231208445415e-06, "loss": 0.06685943603515625, "step": 70105 }, { "epoch": 0.6062204390796448, "grad_norm": 12.718286423182565, "learning_rate": 4.739557139049833e-06, "loss": 0.11986541748046875, "step": 70110 }, { "epoch": 0.6062636726011881, "grad_norm": 32.40058374665796, "learning_rate": 4.739391149232442e-06, "loss": 0.10423202514648437, "step": 70115 }, { "epoch": 0.6063069061227313, "grad_norm": 3.296142790628998, "learning_rate": 4.7392251513931314e-06, "loss": 0.14680328369140624, "step": 70120 }, { "epoch": 0.6063501396442745, "grad_norm": 13.001377978834894, "learning_rate": 4.739059145532669e-06, "loss": 0.19183578491210937, "step": 70125 }, { "epoch": 0.6063933731658179, "grad_norm": 9.69537244087508, "learning_rate": 4.738893131651819e-06, "loss": 0.24253425598144532, "step": 70130 }, { "epoch": 0.6064366066873611, "grad_norm": 16.409908634824003, "learning_rate": 4.738727109751348e-06, "loss": 0.12531356811523436, "step": 70135 }, { "epoch": 0.6064798402089043, "grad_norm": 2.1698459242466197, "learning_rate": 4.738561079832022e-06, "loss": 0.0192413330078125, "step": 70140 }, { "epoch": 0.6065230737304477, "grad_norm": 6.100969631553115, "learning_rate": 4.7383950418946035e-06, "loss": 0.19802093505859375, "step": 70145 }, { "epoch": 0.6065663072519909, "grad_norm": 27.88725641975471, "learning_rate": 4.738228995939863e-06, "loss": 0.48239822387695314, "step": 70150 }, { "epoch": 0.6066095407735341, "grad_norm": 3.20150612371435, "learning_rate": 4.738062941968563e-06, "loss": 0.7464805603027344, "step": 70155 }, { "epoch": 0.6066527742950775, "grad_norm": 2.5751604359669247, "learning_rate": 4.73789687998147e-06, "loss": 0.2491527557373047, "step": 70160 }, { "epoch": 0.6066960078166207, "grad_norm": 2.8417256288548467, "learning_rate": 4.73773080997935e-06, "loss": 0.1638427734375, "step": 70165 }, { "epoch": 0.6067392413381639, "grad_norm": 14.54104212231943, "learning_rate": 4.73756473196297e-06, "loss": 0.153741455078125, "step": 70170 }, { "epoch": 0.6067824748597073, "grad_norm": 1.730865351648477, "learning_rate": 4.737398645933094e-06, "loss": 0.10157012939453125, "step": 70175 }, { "epoch": 0.6068257083812505, "grad_norm": 51.464463035729416, "learning_rate": 4.73723255189049e-06, "loss": 0.1262603759765625, "step": 70180 }, { "epoch": 0.6068689419027937, "grad_norm": 0.6059861002673871, "learning_rate": 4.737066449835922e-06, "loss": 0.0165252685546875, "step": 70185 }, { "epoch": 0.6069121754243371, "grad_norm": 29.306142838430826, "learning_rate": 4.736900339770157e-06, "loss": 0.26649932861328124, "step": 70190 }, { "epoch": 0.6069554089458803, "grad_norm": 76.97099024946597, "learning_rate": 4.7367342216939625e-06, "loss": 0.48626251220703126, "step": 70195 }, { "epoch": 0.6069986424674235, "grad_norm": 24.93710002906869, "learning_rate": 4.736568095608102e-06, "loss": 0.14247512817382812, "step": 70200 }, { "epoch": 0.6070418759889668, "grad_norm": 7.996847248268284, "learning_rate": 4.736401961513344e-06, "loss": 0.51510009765625, "step": 70205 }, { "epoch": 0.6070851095105101, "grad_norm": 5.697358359882075, "learning_rate": 4.736235819410453e-06, "loss": 0.144061279296875, "step": 70210 }, { "epoch": 0.6071283430320533, "grad_norm": 11.32393930286989, "learning_rate": 4.736069669300195e-06, "loss": 0.10418701171875, "step": 70215 }, { "epoch": 0.6071715765535965, "grad_norm": 0.5996126138799355, "learning_rate": 4.735903511183338e-06, "loss": 0.35166168212890625, "step": 70220 }, { "epoch": 0.6072148100751399, "grad_norm": 2.4659512645562796, "learning_rate": 4.735737345060646e-06, "loss": 0.0530548095703125, "step": 70225 }, { "epoch": 0.6072580435966831, "grad_norm": 2.537610487448434, "learning_rate": 4.7355711709328885e-06, "loss": 0.140167236328125, "step": 70230 }, { "epoch": 0.6073012771182263, "grad_norm": 0.13183571909152367, "learning_rate": 4.73540498880083e-06, "loss": 0.0795135498046875, "step": 70235 }, { "epoch": 0.6073445106397697, "grad_norm": 2.669621824521778, "learning_rate": 4.735238798665236e-06, "loss": 0.0326934814453125, "step": 70240 }, { "epoch": 0.6073877441613129, "grad_norm": 0.400028826442873, "learning_rate": 4.735072600526875e-06, "loss": 0.12311305999755859, "step": 70245 }, { "epoch": 0.6074309776828561, "grad_norm": 8.04421548205352, "learning_rate": 4.734906394386512e-06, "loss": 0.24359130859375, "step": 70250 }, { "epoch": 0.6074742112043995, "grad_norm": 0.5829450426202039, "learning_rate": 4.734740180244914e-06, "loss": 0.10278091430664063, "step": 70255 }, { "epoch": 0.6075174447259427, "grad_norm": 0.28853203920462417, "learning_rate": 4.734573958102848e-06, "loss": 0.15012283325195314, "step": 70260 }, { "epoch": 0.6075606782474859, "grad_norm": 39.24437023096653, "learning_rate": 4.734407727961079e-06, "loss": 0.25972137451171873, "step": 70265 }, { "epoch": 0.6076039117690293, "grad_norm": 3.457610493877371, "learning_rate": 4.734241489820375e-06, "loss": 0.16169891357421876, "step": 70270 }, { "epoch": 0.6076471452905725, "grad_norm": 21.31178202768205, "learning_rate": 4.7340752436815034e-06, "loss": 0.192474365234375, "step": 70275 }, { "epoch": 0.6076903788121157, "grad_norm": 24.49008941813398, "learning_rate": 4.73390898954523e-06, "loss": 0.3281665802001953, "step": 70280 }, { "epoch": 0.6077336123336591, "grad_norm": 10.955950466394428, "learning_rate": 4.733742727412321e-06, "loss": 0.3057098388671875, "step": 70285 }, { "epoch": 0.6077768458552023, "grad_norm": 11.555647513574593, "learning_rate": 4.733576457283544e-06, "loss": 0.13665924072265626, "step": 70290 }, { "epoch": 0.6078200793767455, "grad_norm": 10.61008659464933, "learning_rate": 4.733410179159666e-06, "loss": 0.39081268310546874, "step": 70295 }, { "epoch": 0.6078633128982888, "grad_norm": 3.4790342749068497, "learning_rate": 4.733243893041453e-06, "loss": 0.0936676025390625, "step": 70300 }, { "epoch": 0.6079065464198321, "grad_norm": 8.546274396952452, "learning_rate": 4.733077598929671e-06, "loss": 0.2596271514892578, "step": 70305 }, { "epoch": 0.6079497799413753, "grad_norm": 0.2569308692475092, "learning_rate": 4.73291129682509e-06, "loss": 0.14746551513671874, "step": 70310 }, { "epoch": 0.6079930134629186, "grad_norm": 18.061239625108225, "learning_rate": 4.7327449867284745e-06, "loss": 0.22908935546875, "step": 70315 }, { "epoch": 0.6080362469844619, "grad_norm": 0.3035714301268416, "learning_rate": 4.732578668640593e-06, "loss": 0.1699188232421875, "step": 70320 }, { "epoch": 0.6080794805060051, "grad_norm": 0.23361441933244734, "learning_rate": 4.73241234256221e-06, "loss": 0.07490463256835937, "step": 70325 }, { "epoch": 0.6081227140275484, "grad_norm": 17.739993515714602, "learning_rate": 4.732246008494097e-06, "loss": 0.2867542266845703, "step": 70330 }, { "epoch": 0.6081659475490917, "grad_norm": 12.447942741299098, "learning_rate": 4.732079666437016e-06, "loss": 0.21624221801757812, "step": 70335 }, { "epoch": 0.6082091810706349, "grad_norm": 10.094016585456606, "learning_rate": 4.7319133163917375e-06, "loss": 0.19729118347167968, "step": 70340 }, { "epoch": 0.6082524145921782, "grad_norm": 11.618964849644927, "learning_rate": 4.731746958359029e-06, "loss": 0.041253280639648435, "step": 70345 }, { "epoch": 0.6082956481137215, "grad_norm": 4.878404014627329, "learning_rate": 4.731580592339655e-06, "loss": 0.174151611328125, "step": 70350 }, { "epoch": 0.6083388816352647, "grad_norm": 25.61474444761134, "learning_rate": 4.731414218334385e-06, "loss": 0.1192108154296875, "step": 70355 }, { "epoch": 0.608382115156808, "grad_norm": 2.261974539609841, "learning_rate": 4.731247836343986e-06, "loss": 0.07341842651367188, "step": 70360 }, { "epoch": 0.6084253486783513, "grad_norm": 0.07963484247397293, "learning_rate": 4.7310814463692235e-06, "loss": 0.0688812255859375, "step": 70365 }, { "epoch": 0.6084685821998945, "grad_norm": 3.311939779949258, "learning_rate": 4.730915048410867e-06, "loss": 0.06308212280273437, "step": 70370 }, { "epoch": 0.6085118157214378, "grad_norm": 5.193838680445764, "learning_rate": 4.730748642469684e-06, "loss": 0.071502685546875, "step": 70375 }, { "epoch": 0.608555049242981, "grad_norm": 0.3093960508807758, "learning_rate": 4.730582228546441e-06, "loss": 0.26006622314453126, "step": 70380 }, { "epoch": 0.6085982827645243, "grad_norm": 1.2288894585043142, "learning_rate": 4.730415806641905e-06, "loss": 0.05896739959716797, "step": 70385 }, { "epoch": 0.6086415162860676, "grad_norm": 0.8792161640568271, "learning_rate": 4.730249376756845e-06, "loss": 0.0947265625, "step": 70390 }, { "epoch": 0.6086847498076108, "grad_norm": 11.499771410801747, "learning_rate": 4.730082938892027e-06, "loss": 0.10644760131835937, "step": 70395 }, { "epoch": 0.6087279833291541, "grad_norm": 32.966255035444235, "learning_rate": 4.729916493048219e-06, "loss": 0.22974662780761718, "step": 70400 }, { "epoch": 0.6087712168506973, "grad_norm": 29.461351809199673, "learning_rate": 4.72975003922619e-06, "loss": 0.21128768920898439, "step": 70405 }, { "epoch": 0.6088144503722406, "grad_norm": 3.5420973671132194, "learning_rate": 4.729583577426707e-06, "loss": 0.135888671875, "step": 70410 }, { "epoch": 0.6088576838937839, "grad_norm": 0.8240682044267906, "learning_rate": 4.729417107650536e-06, "loss": 0.22120208740234376, "step": 70415 }, { "epoch": 0.6089009174153271, "grad_norm": 15.743512148124564, "learning_rate": 4.729250629898447e-06, "loss": 0.1993255615234375, "step": 70420 }, { "epoch": 0.6089441509368704, "grad_norm": 4.715202589466452, "learning_rate": 4.729084144171207e-06, "loss": 0.30442466735839846, "step": 70425 }, { "epoch": 0.6089873844584137, "grad_norm": 0.8398587145213807, "learning_rate": 4.728917650469582e-06, "loss": 0.127587890625, "step": 70430 }, { "epoch": 0.609030617979957, "grad_norm": 1.461603594641487, "learning_rate": 4.728751148794344e-06, "loss": 0.04534912109375, "step": 70435 }, { "epoch": 0.6090738515015002, "grad_norm": 13.305827110983142, "learning_rate": 4.728584639146258e-06, "loss": 0.32822265625, "step": 70440 }, { "epoch": 0.6091170850230435, "grad_norm": 5.799463945240733, "learning_rate": 4.728418121526091e-06, "loss": 0.04724884033203125, "step": 70445 }, { "epoch": 0.6091603185445867, "grad_norm": 12.962477265384662, "learning_rate": 4.728251595934613e-06, "loss": 0.1548065185546875, "step": 70450 }, { "epoch": 0.60920355206613, "grad_norm": 13.463770943784368, "learning_rate": 4.728085062372592e-06, "loss": 0.066705322265625, "step": 70455 }, { "epoch": 0.6092467855876733, "grad_norm": 4.818371934512552, "learning_rate": 4.727918520840795e-06, "loss": 0.10522804260253907, "step": 70460 }, { "epoch": 0.6092900191092165, "grad_norm": 2.722463550434113, "learning_rate": 4.72775197133999e-06, "loss": 0.18188858032226562, "step": 70465 }, { "epoch": 0.6093332526307598, "grad_norm": 0.8018541689090525, "learning_rate": 4.727585413870946e-06, "loss": 0.037658309936523436, "step": 70470 }, { "epoch": 0.609376486152303, "grad_norm": 17.121892615738275, "learning_rate": 4.727418848434431e-06, "loss": 0.29433746337890626, "step": 70475 }, { "epoch": 0.6094197196738463, "grad_norm": 5.30349135437969, "learning_rate": 4.7272522750312126e-06, "loss": 0.13363494873046874, "step": 70480 }, { "epoch": 0.6094629531953896, "grad_norm": 15.361748668237407, "learning_rate": 4.7270856936620584e-06, "loss": 0.179388427734375, "step": 70485 }, { "epoch": 0.6095061867169328, "grad_norm": 1.3870170763034118, "learning_rate": 4.726919104327739e-06, "loss": 0.24404563903808593, "step": 70490 }, { "epoch": 0.6095494202384761, "grad_norm": 2.5706275006454447, "learning_rate": 4.72675250702902e-06, "loss": 0.1196319580078125, "step": 70495 }, { "epoch": 0.6095926537600194, "grad_norm": 0.46186794853796487, "learning_rate": 4.7265859017666734e-06, "loss": 0.7162193298339844, "step": 70500 }, { "epoch": 0.6096358872815626, "grad_norm": 47.8524915014028, "learning_rate": 4.726419288541463e-06, "loss": 0.6417304992675781, "step": 70505 }, { "epoch": 0.6096791208031059, "grad_norm": 0.2597728739051843, "learning_rate": 4.72625266735416e-06, "loss": 0.19505767822265624, "step": 70510 }, { "epoch": 0.6097223543246492, "grad_norm": 1.7374194465029416, "learning_rate": 4.726086038205532e-06, "loss": 0.30064697265625, "step": 70515 }, { "epoch": 0.6097655878461924, "grad_norm": 1.164767763641114, "learning_rate": 4.725919401096348e-06, "loss": 0.2407562255859375, "step": 70520 }, { "epoch": 0.6098088213677357, "grad_norm": 3.725530993695918, "learning_rate": 4.725752756027376e-06, "loss": 0.5099390029907227, "step": 70525 }, { "epoch": 0.609852054889279, "grad_norm": 7.363415597536344, "learning_rate": 4.725586102999385e-06, "loss": 0.1267669677734375, "step": 70530 }, { "epoch": 0.6098952884108222, "grad_norm": 5.251681326009602, "learning_rate": 4.725419442013143e-06, "loss": 0.2947723388671875, "step": 70535 }, { "epoch": 0.6099385219323655, "grad_norm": 9.301270078663004, "learning_rate": 4.725252773069419e-06, "loss": 0.4525886535644531, "step": 70540 }, { "epoch": 0.6099817554539088, "grad_norm": 0.11188454599397062, "learning_rate": 4.7250860961689825e-06, "loss": 0.007578277587890625, "step": 70545 }, { "epoch": 0.610024988975452, "grad_norm": 6.141571507323811, "learning_rate": 4.7249194113126005e-06, "loss": 0.16230850219726561, "step": 70550 }, { "epoch": 0.6100682224969952, "grad_norm": 22.338829531147212, "learning_rate": 4.7247527185010436e-06, "loss": 0.18392562866210938, "step": 70555 }, { "epoch": 0.6101114560185386, "grad_norm": 6.0242905759316, "learning_rate": 4.724586017735078e-06, "loss": 0.0808563232421875, "step": 70560 }, { "epoch": 0.6101546895400818, "grad_norm": 14.70139368579282, "learning_rate": 4.724419309015475e-06, "loss": 0.10138206481933594, "step": 70565 }, { "epoch": 0.610197923061625, "grad_norm": 2.0209704861779576, "learning_rate": 4.724252592343002e-06, "loss": 0.12340431213378907, "step": 70570 }, { "epoch": 0.6102411565831684, "grad_norm": 0.6276058093597935, "learning_rate": 4.7240858677184295e-06, "loss": 0.04710235595703125, "step": 70575 }, { "epoch": 0.6102843901047116, "grad_norm": 9.070389874436538, "learning_rate": 4.7239191351425246e-06, "loss": 0.24963531494140626, "step": 70580 }, { "epoch": 0.6103276236262548, "grad_norm": 4.255598414060309, "learning_rate": 4.7237523946160575e-06, "loss": 0.2740692138671875, "step": 70585 }, { "epoch": 0.6103708571477982, "grad_norm": 2.4894999007588416, "learning_rate": 4.723585646139796e-06, "loss": 0.040903663635253905, "step": 70590 }, { "epoch": 0.6104140906693414, "grad_norm": 2.1397656266260463, "learning_rate": 4.723418889714509e-06, "loss": 0.04379119873046875, "step": 70595 }, { "epoch": 0.6104573241908846, "grad_norm": 5.020309067642792, "learning_rate": 4.723252125340968e-06, "loss": 0.09340476989746094, "step": 70600 }, { "epoch": 0.610500557712428, "grad_norm": 0.29855511011859903, "learning_rate": 4.7230853530199394e-06, "loss": 0.09540863037109375, "step": 70605 }, { "epoch": 0.6105437912339712, "grad_norm": 4.885904621662985, "learning_rate": 4.722918572752194e-06, "loss": 0.15423583984375, "step": 70610 }, { "epoch": 0.6105870247555144, "grad_norm": 19.664256864153774, "learning_rate": 4.722751784538501e-06, "loss": 0.196002197265625, "step": 70615 }, { "epoch": 0.6106302582770577, "grad_norm": 32.716761503642374, "learning_rate": 4.722584988379627e-06, "loss": 0.235565185546875, "step": 70620 }, { "epoch": 0.610673491798601, "grad_norm": 4.565956586375479, "learning_rate": 4.722418184276345e-06, "loss": 0.14559288024902345, "step": 70625 }, { "epoch": 0.6107167253201442, "grad_norm": 6.730350698575607, "learning_rate": 4.722251372229422e-06, "loss": 0.08435745239257812, "step": 70630 }, { "epoch": 0.6107599588416875, "grad_norm": 49.28672616706862, "learning_rate": 4.722084552239628e-06, "loss": 0.2799163818359375, "step": 70635 }, { "epoch": 0.6108031923632308, "grad_norm": 0.13473278704940306, "learning_rate": 4.721917724307732e-06, "loss": 0.3170661926269531, "step": 70640 }, { "epoch": 0.610846425884774, "grad_norm": 1.4065912753932468, "learning_rate": 4.721750888434504e-06, "loss": 0.1192535400390625, "step": 70645 }, { "epoch": 0.6108896594063172, "grad_norm": 16.132685761185854, "learning_rate": 4.7215840446207125e-06, "loss": 0.16839141845703126, "step": 70650 }, { "epoch": 0.6109328929278606, "grad_norm": 2.296974807417779, "learning_rate": 4.721417192867127e-06, "loss": 0.188616943359375, "step": 70655 }, { "epoch": 0.6109761264494038, "grad_norm": 2.024040876328039, "learning_rate": 4.721250333174519e-06, "loss": 0.08993453979492187, "step": 70660 }, { "epoch": 0.611019359970947, "grad_norm": 0.3854713788597949, "learning_rate": 4.7210834655436554e-06, "loss": 0.11378288269042969, "step": 70665 }, { "epoch": 0.6110625934924904, "grad_norm": 1.783098830531676, "learning_rate": 4.7209165899753075e-06, "loss": 0.18212432861328126, "step": 70670 }, { "epoch": 0.6111058270140336, "grad_norm": 19.025833933740724, "learning_rate": 4.720749706470244e-06, "loss": 0.114178466796875, "step": 70675 }, { "epoch": 0.6111490605355768, "grad_norm": 12.55465875527372, "learning_rate": 4.7205828150292355e-06, "loss": 0.082855224609375, "step": 70680 }, { "epoch": 0.6111922940571202, "grad_norm": 0.21711052239498946, "learning_rate": 4.72041591565305e-06, "loss": 0.06962890625, "step": 70685 }, { "epoch": 0.6112355275786634, "grad_norm": 3.9685381769332047, "learning_rate": 4.720249008342459e-06, "loss": 0.14133148193359374, "step": 70690 }, { "epoch": 0.6112787611002066, "grad_norm": 2.522313026618653, "learning_rate": 4.720082093098232e-06, "loss": 0.0855621337890625, "step": 70695 }, { "epoch": 0.61132199462175, "grad_norm": 3.79312577000592, "learning_rate": 4.719915169921137e-06, "loss": 0.0853759765625, "step": 70700 }, { "epoch": 0.6113652281432932, "grad_norm": 2.090469431299105, "learning_rate": 4.719748238811947e-06, "loss": 0.17313804626464843, "step": 70705 }, { "epoch": 0.6114084616648364, "grad_norm": 15.484604022700214, "learning_rate": 4.719581299771429e-06, "loss": 0.12125701904296875, "step": 70710 }, { "epoch": 0.6114516951863798, "grad_norm": 23.987711542736687, "learning_rate": 4.719414352800354e-06, "loss": 0.21744613647460936, "step": 70715 }, { "epoch": 0.611494928707923, "grad_norm": 3.0875243778720924, "learning_rate": 4.719247397899492e-06, "loss": 0.133807373046875, "step": 70720 }, { "epoch": 0.6115381622294662, "grad_norm": 4.52121148964267, "learning_rate": 4.719080435069613e-06, "loss": 0.3181789398193359, "step": 70725 }, { "epoch": 0.6115813957510094, "grad_norm": 0.05255272998567562, "learning_rate": 4.7189134643114874e-06, "loss": 0.226800537109375, "step": 70730 }, { "epoch": 0.6116246292725528, "grad_norm": 4.945530069746751, "learning_rate": 4.718746485625884e-06, "loss": 0.11553878784179687, "step": 70735 }, { "epoch": 0.611667862794096, "grad_norm": 6.155290148307665, "learning_rate": 4.718579499013574e-06, "loss": 0.068316650390625, "step": 70740 }, { "epoch": 0.6117110963156392, "grad_norm": 6.252337026207378, "learning_rate": 4.718412504475328e-06, "loss": 0.11189346313476563, "step": 70745 }, { "epoch": 0.6117543298371826, "grad_norm": 2.1661632571755094, "learning_rate": 4.7182455020119145e-06, "loss": 0.08667755126953125, "step": 70750 }, { "epoch": 0.6117975633587258, "grad_norm": 13.850040194486564, "learning_rate": 4.718078491624105e-06, "loss": 0.124066162109375, "step": 70755 }, { "epoch": 0.611840796880269, "grad_norm": 22.013029643795985, "learning_rate": 4.717911473312668e-06, "loss": 0.1340118408203125, "step": 70760 }, { "epoch": 0.6118840304018124, "grad_norm": 0.8820818685649298, "learning_rate": 4.717744447078376e-06, "loss": 0.116357421875, "step": 70765 }, { "epoch": 0.6119272639233556, "grad_norm": 14.64314801937578, "learning_rate": 4.717577412921999e-06, "loss": 0.151434326171875, "step": 70770 }, { "epoch": 0.6119704974448988, "grad_norm": 26.84125449072862, "learning_rate": 4.717410370844306e-06, "loss": 0.38674201965332033, "step": 70775 }, { "epoch": 0.6120137309664422, "grad_norm": 10.504347338971696, "learning_rate": 4.717243320846068e-06, "loss": 0.16900367736816407, "step": 70780 }, { "epoch": 0.6120569644879854, "grad_norm": 8.907695826662758, "learning_rate": 4.7170762629280565e-06, "loss": 0.21310195922851563, "step": 70785 }, { "epoch": 0.6121001980095286, "grad_norm": 1.2535558158581555, "learning_rate": 4.716909197091041e-06, "loss": 0.10559539794921875, "step": 70790 }, { "epoch": 0.612143431531072, "grad_norm": 6.198648435796171, "learning_rate": 4.716742123335791e-06, "loss": 0.21022415161132812, "step": 70795 }, { "epoch": 0.6121866650526152, "grad_norm": 0.1873531251054399, "learning_rate": 4.716575041663078e-06, "loss": 0.0645721435546875, "step": 70800 }, { "epoch": 0.6122298985741584, "grad_norm": 3.5092089489212563, "learning_rate": 4.7164079520736736e-06, "loss": 0.0778839111328125, "step": 70805 }, { "epoch": 0.6122731320957018, "grad_norm": 53.280455386451834, "learning_rate": 4.7162408545683456e-06, "loss": 0.28818798065185547, "step": 70810 }, { "epoch": 0.612316365617245, "grad_norm": 2.7729181942715573, "learning_rate": 4.716073749147868e-06, "loss": 0.067303466796875, "step": 70815 }, { "epoch": 0.6123595991387882, "grad_norm": 2.3629096447115447, "learning_rate": 4.71590663581301e-06, "loss": 0.1782611846923828, "step": 70820 }, { "epoch": 0.6124028326603315, "grad_norm": 23.477842216625685, "learning_rate": 4.715739514564541e-06, "loss": 0.2334228515625, "step": 70825 }, { "epoch": 0.6124460661818748, "grad_norm": 0.21938970853871104, "learning_rate": 4.715572385403234e-06, "loss": 0.025744247436523437, "step": 70830 }, { "epoch": 0.612489299703418, "grad_norm": 6.8625179882034795, "learning_rate": 4.715405248329859e-06, "loss": 0.09102249145507812, "step": 70835 }, { "epoch": 0.6125325332249613, "grad_norm": 0.2576514355846675, "learning_rate": 4.715238103345186e-06, "loss": 0.312115478515625, "step": 70840 }, { "epoch": 0.6125757667465046, "grad_norm": 3.26945717689111, "learning_rate": 4.715070950449986e-06, "loss": 0.06608543395996094, "step": 70845 }, { "epoch": 0.6126190002680478, "grad_norm": 5.345882579750098, "learning_rate": 4.714903789645031e-06, "loss": 0.10518798828125, "step": 70850 }, { "epoch": 0.6126622337895911, "grad_norm": 1.8809617867904918, "learning_rate": 4.714736620931091e-06, "loss": 0.06464920043945313, "step": 70855 }, { "epoch": 0.6127054673111344, "grad_norm": 24.498597236872133, "learning_rate": 4.714569444308937e-06, "loss": 0.166229248046875, "step": 70860 }, { "epoch": 0.6127487008326776, "grad_norm": 32.34813197145942, "learning_rate": 4.71440225977934e-06, "loss": 0.39234580993652346, "step": 70865 }, { "epoch": 0.6127919343542209, "grad_norm": 1.217185059013092, "learning_rate": 4.714235067343073e-06, "loss": 0.3764373779296875, "step": 70870 }, { "epoch": 0.6128351678757642, "grad_norm": 26.62134183251501, "learning_rate": 4.714067867000903e-06, "loss": 0.222149658203125, "step": 70875 }, { "epoch": 0.6128784013973074, "grad_norm": 0.6118680091178557, "learning_rate": 4.713900658753605e-06, "loss": 0.020769500732421876, "step": 70880 }, { "epoch": 0.6129216349188507, "grad_norm": 3.592797920934089, "learning_rate": 4.713733442601948e-06, "loss": 0.13752059936523436, "step": 70885 }, { "epoch": 0.612964868440394, "grad_norm": 32.83195872372796, "learning_rate": 4.713566218546703e-06, "loss": 0.2830291748046875, "step": 70890 }, { "epoch": 0.6130081019619372, "grad_norm": 22.759417785853476, "learning_rate": 4.713398986588644e-06, "loss": 0.13501815795898436, "step": 70895 }, { "epoch": 0.6130513354834805, "grad_norm": 6.473570700635561, "learning_rate": 4.713231746728539e-06, "loss": 0.19893875122070312, "step": 70900 }, { "epoch": 0.6130945690050237, "grad_norm": 11.70764203755055, "learning_rate": 4.713064498967161e-06, "loss": 0.13205032348632811, "step": 70905 }, { "epoch": 0.613137802526567, "grad_norm": 0.14833666192187836, "learning_rate": 4.7128972433052805e-06, "loss": 0.0662689208984375, "step": 70910 }, { "epoch": 0.6131810360481103, "grad_norm": 5.682736649790792, "learning_rate": 4.712729979743669e-06, "loss": 0.03428955078125, "step": 70915 }, { "epoch": 0.6132242695696535, "grad_norm": 3.8188502986007355, "learning_rate": 4.712562708283099e-06, "loss": 0.08623504638671875, "step": 70920 }, { "epoch": 0.6132675030911968, "grad_norm": 8.968545278972321, "learning_rate": 4.71239542892434e-06, "loss": 0.49186553955078127, "step": 70925 }, { "epoch": 0.61331073661274, "grad_norm": 6.411732516988485, "learning_rate": 4.712228141668166e-06, "loss": 0.455767822265625, "step": 70930 }, { "epoch": 0.6133539701342833, "grad_norm": 0.15416151622613436, "learning_rate": 4.7120608465153465e-06, "loss": 0.10218124389648438, "step": 70935 }, { "epoch": 0.6133972036558266, "grad_norm": 6.250319851703717, "learning_rate": 4.711893543466654e-06, "loss": 0.0301055908203125, "step": 70940 }, { "epoch": 0.6134404371773698, "grad_norm": 0.3749241279415973, "learning_rate": 4.711726232522858e-06, "loss": 0.03080902099609375, "step": 70945 }, { "epoch": 0.6134836706989131, "grad_norm": 1.121666953914615, "learning_rate": 4.711558913684733e-06, "loss": 0.0443878173828125, "step": 70950 }, { "epoch": 0.6135269042204564, "grad_norm": 23.49791935289134, "learning_rate": 4.71139158695305e-06, "loss": 0.44829559326171875, "step": 70955 }, { "epoch": 0.6135701377419996, "grad_norm": 22.386108825762037, "learning_rate": 4.71122425232858e-06, "loss": 0.4060943603515625, "step": 70960 }, { "epoch": 0.6136133712635429, "grad_norm": 0.776491709908757, "learning_rate": 4.711056909812095e-06, "loss": 0.120062255859375, "step": 70965 }, { "epoch": 0.6136566047850862, "grad_norm": 9.850097261515398, "learning_rate": 4.710889559404367e-06, "loss": 0.069024658203125, "step": 70970 }, { "epoch": 0.6136998383066294, "grad_norm": 1.01361818084229, "learning_rate": 4.710722201106167e-06, "loss": 0.35207366943359375, "step": 70975 }, { "epoch": 0.6137430718281727, "grad_norm": 20.062044477256407, "learning_rate": 4.710554834918267e-06, "loss": 0.112615966796875, "step": 70980 }, { "epoch": 0.613786305349716, "grad_norm": 2.1944310393966884, "learning_rate": 4.7103874608414405e-06, "loss": 0.0619293212890625, "step": 70985 }, { "epoch": 0.6138295388712592, "grad_norm": 28.57178555692045, "learning_rate": 4.710220078876457e-06, "loss": 0.21984939575195311, "step": 70990 }, { "epoch": 0.6138727723928025, "grad_norm": 1.5599333529627286, "learning_rate": 4.71005268902409e-06, "loss": 0.0822509765625, "step": 70995 }, { "epoch": 0.6139160059143457, "grad_norm": 0.2716180121289246, "learning_rate": 4.709885291285112e-06, "loss": 0.028983306884765626, "step": 71000 }, { "epoch": 0.613959239435889, "grad_norm": 0.3268112268535512, "learning_rate": 4.709717885660292e-06, "loss": 0.16496810913085938, "step": 71005 }, { "epoch": 0.6140024729574323, "grad_norm": 4.943594874069047, "learning_rate": 4.7095504721504065e-06, "loss": 0.07860870361328125, "step": 71010 }, { "epoch": 0.6140457064789755, "grad_norm": 40.247455116239934, "learning_rate": 4.709383050756224e-06, "loss": 0.29867706298828123, "step": 71015 }, { "epoch": 0.6140889400005188, "grad_norm": 2.573719184225712, "learning_rate": 4.709215621478517e-06, "loss": 0.26558914184570315, "step": 71020 }, { "epoch": 0.6141321735220621, "grad_norm": 0.13631856989702368, "learning_rate": 4.7090481843180605e-06, "loss": 0.3486785888671875, "step": 71025 }, { "epoch": 0.6141754070436053, "grad_norm": 2.471845381023466, "learning_rate": 4.708880739275623e-06, "loss": 0.12556915283203124, "step": 71030 }, { "epoch": 0.6142186405651486, "grad_norm": 15.633536898886303, "learning_rate": 4.708713286351981e-06, "loss": 0.07292404174804687, "step": 71035 }, { "epoch": 0.6142618740866919, "grad_norm": 32.188253296359306, "learning_rate": 4.708545825547902e-06, "loss": 0.1631500244140625, "step": 71040 }, { "epoch": 0.6143051076082351, "grad_norm": 1.2429976684079145, "learning_rate": 4.708378356864162e-06, "loss": 0.027352523803710938, "step": 71045 }, { "epoch": 0.6143483411297784, "grad_norm": 18.714281408841092, "learning_rate": 4.708210880301531e-06, "loss": 0.29161529541015624, "step": 71050 }, { "epoch": 0.6143915746513217, "grad_norm": 18.03612068661483, "learning_rate": 4.708043395860783e-06, "loss": 0.5755523681640625, "step": 71055 }, { "epoch": 0.6144348081728649, "grad_norm": 0.8860072186216058, "learning_rate": 4.7078759035426896e-06, "loss": 0.12698974609375, "step": 71060 }, { "epoch": 0.6144780416944082, "grad_norm": 9.394990213034454, "learning_rate": 4.707708403348024e-06, "loss": 0.2329559326171875, "step": 71065 }, { "epoch": 0.6145212752159515, "grad_norm": 10.122729941153674, "learning_rate": 4.707540895277557e-06, "loss": 0.39010162353515626, "step": 71070 }, { "epoch": 0.6145645087374947, "grad_norm": 12.459545094969465, "learning_rate": 4.707373379332063e-06, "loss": 0.13714599609375, "step": 71075 }, { "epoch": 0.6146077422590379, "grad_norm": 3.878384923843297, "learning_rate": 4.707205855512314e-06, "loss": 0.07022476196289062, "step": 71080 }, { "epoch": 0.6146509757805813, "grad_norm": 0.33666324379831947, "learning_rate": 4.707038323819081e-06, "loss": 0.02994537353515625, "step": 71085 }, { "epoch": 0.6146942093021245, "grad_norm": 15.702117027456257, "learning_rate": 4.7068707842531406e-06, "loss": 0.27308349609375, "step": 71090 }, { "epoch": 0.6147374428236677, "grad_norm": 2.085530586565408, "learning_rate": 4.706703236815261e-06, "loss": 0.024484634399414062, "step": 71095 }, { "epoch": 0.614780676345211, "grad_norm": 13.689185796900528, "learning_rate": 4.706535681506217e-06, "loss": 0.1847015380859375, "step": 71100 }, { "epoch": 0.6148239098667543, "grad_norm": 2.3355698301852414, "learning_rate": 4.706368118326782e-06, "loss": 0.11896209716796875, "step": 71105 }, { "epoch": 0.6148671433882975, "grad_norm": 13.559271713521238, "learning_rate": 4.706200547277727e-06, "loss": 0.11008033752441407, "step": 71110 }, { "epoch": 0.6149103769098409, "grad_norm": 4.944843163460708, "learning_rate": 4.706032968359827e-06, "loss": 0.04045791625976562, "step": 71115 }, { "epoch": 0.6149536104313841, "grad_norm": 0.4205572464285602, "learning_rate": 4.705865381573854e-06, "loss": 0.25116653442382814, "step": 71120 }, { "epoch": 0.6149968439529273, "grad_norm": 24.99090614891355, "learning_rate": 4.705697786920579e-06, "loss": 0.3527214050292969, "step": 71125 }, { "epoch": 0.6150400774744706, "grad_norm": 11.638502818731364, "learning_rate": 4.705530184400777e-06, "loss": 0.14764480590820311, "step": 71130 }, { "epoch": 0.6150833109960139, "grad_norm": 2.8400801797855677, "learning_rate": 4.705362574015221e-06, "loss": 0.08713455200195312, "step": 71135 }, { "epoch": 0.6151265445175571, "grad_norm": 1.3292273316715262, "learning_rate": 4.7051949557646834e-06, "loss": 0.06392745971679688, "step": 71140 }, { "epoch": 0.6151697780391004, "grad_norm": 20.317308567333104, "learning_rate": 4.705027329649937e-06, "loss": 0.2685943603515625, "step": 71145 }, { "epoch": 0.6152130115606437, "grad_norm": 3.4939705579313545, "learning_rate": 4.704859695671756e-06, "loss": 0.2382892608642578, "step": 71150 }, { "epoch": 0.6152562450821869, "grad_norm": 15.38615845938821, "learning_rate": 4.704692053830912e-06, "loss": 0.212530517578125, "step": 71155 }, { "epoch": 0.6152994786037302, "grad_norm": 6.04933332542173, "learning_rate": 4.704524404128179e-06, "loss": 0.0728668212890625, "step": 71160 }, { "epoch": 0.6153427121252735, "grad_norm": 32.138259068316955, "learning_rate": 4.70435674656433e-06, "loss": 0.76878662109375, "step": 71165 }, { "epoch": 0.6153859456468167, "grad_norm": 7.713853778834452, "learning_rate": 4.7041890811401385e-06, "loss": 0.2891349792480469, "step": 71170 }, { "epoch": 0.6154291791683599, "grad_norm": 6.86217856412669, "learning_rate": 4.704021407856378e-06, "loss": 0.4066253662109375, "step": 71175 }, { "epoch": 0.6154724126899033, "grad_norm": 0.6102147498691655, "learning_rate": 4.703853726713821e-06, "loss": 0.017262840270996095, "step": 71180 }, { "epoch": 0.6155156462114465, "grad_norm": 1.655821625651656, "learning_rate": 4.703686037713241e-06, "loss": 0.08399658203125, "step": 71185 }, { "epoch": 0.6155588797329897, "grad_norm": 10.856648776184693, "learning_rate": 4.703518340855412e-06, "loss": 0.236273193359375, "step": 71190 }, { "epoch": 0.6156021132545331, "grad_norm": 15.187558752879747, "learning_rate": 4.703350636141105e-06, "loss": 0.108062744140625, "step": 71195 }, { "epoch": 0.6156453467760763, "grad_norm": 20.218267421209767, "learning_rate": 4.703182923571098e-06, "loss": 0.0349212646484375, "step": 71200 }, { "epoch": 0.6156885802976195, "grad_norm": 1.266972626735901, "learning_rate": 4.70301520314616e-06, "loss": 0.12664508819580078, "step": 71205 }, { "epoch": 0.6157318138191629, "grad_norm": 22.871421593735374, "learning_rate": 4.702847474867067e-06, "loss": 0.4956687927246094, "step": 71210 }, { "epoch": 0.6157750473407061, "grad_norm": 0.4012293755569376, "learning_rate": 4.702679738734592e-06, "loss": 0.10787506103515625, "step": 71215 }, { "epoch": 0.6158182808622493, "grad_norm": 8.226790821206452, "learning_rate": 4.7025119947495085e-06, "loss": 0.1180084228515625, "step": 71220 }, { "epoch": 0.6158615143837927, "grad_norm": 0.46558716187490623, "learning_rate": 4.70234424291259e-06, "loss": 0.0388946533203125, "step": 71225 }, { "epoch": 0.6159047479053359, "grad_norm": 11.085279491478722, "learning_rate": 4.70217648322461e-06, "loss": 0.152423095703125, "step": 71230 }, { "epoch": 0.6159479814268791, "grad_norm": 0.29751913276162556, "learning_rate": 4.702008715686343e-06, "loss": 0.0466552734375, "step": 71235 }, { "epoch": 0.6159912149484225, "grad_norm": 1.3863970893835666, "learning_rate": 4.7018409402985625e-06, "loss": 0.09937667846679688, "step": 71240 }, { "epoch": 0.6160344484699657, "grad_norm": 30.6130022091558, "learning_rate": 4.701673157062041e-06, "loss": 0.318218994140625, "step": 71245 }, { "epoch": 0.6160776819915089, "grad_norm": 0.29032543149841344, "learning_rate": 4.7015053659775545e-06, "loss": 0.04087409973144531, "step": 71250 }, { "epoch": 0.6161209155130521, "grad_norm": 27.574001639847406, "learning_rate": 4.701337567045874e-06, "loss": 0.1053558349609375, "step": 71255 }, { "epoch": 0.6161641490345955, "grad_norm": 0.4435748144270253, "learning_rate": 4.7011697602677755e-06, "loss": 0.21506729125976562, "step": 71260 }, { "epoch": 0.6162073825561387, "grad_norm": 6.568436947282997, "learning_rate": 4.701001945644033e-06, "loss": 0.107000732421875, "step": 71265 }, { "epoch": 0.616250616077682, "grad_norm": 0.4238173325266665, "learning_rate": 4.700834123175419e-06, "loss": 0.034353446960449216, "step": 71270 }, { "epoch": 0.6162938495992253, "grad_norm": 52.88809300584266, "learning_rate": 4.70066629286271e-06, "loss": 0.131024169921875, "step": 71275 }, { "epoch": 0.6163370831207685, "grad_norm": 20.69753592439548, "learning_rate": 4.700498454706677e-06, "loss": 0.253045654296875, "step": 71280 }, { "epoch": 0.6163803166423117, "grad_norm": 15.26516826749553, "learning_rate": 4.700330608708095e-06, "loss": 0.08364524841308593, "step": 71285 }, { "epoch": 0.6164235501638551, "grad_norm": 9.70704120087994, "learning_rate": 4.700162754867739e-06, "loss": 0.07274169921875, "step": 71290 }, { "epoch": 0.6164667836853983, "grad_norm": 12.166757405826244, "learning_rate": 4.699994893186383e-06, "loss": 0.6763626098632812, "step": 71295 }, { "epoch": 0.6165100172069415, "grad_norm": 0.22032934081663583, "learning_rate": 4.6998270236648e-06, "loss": 0.1753662109375, "step": 71300 }, { "epoch": 0.6165532507284849, "grad_norm": 15.906431274812478, "learning_rate": 4.6996591463037646e-06, "loss": 0.0890279769897461, "step": 71305 }, { "epoch": 0.6165964842500281, "grad_norm": 7.795757150169628, "learning_rate": 4.699491261104053e-06, "loss": 0.1942230224609375, "step": 71310 }, { "epoch": 0.6166397177715713, "grad_norm": 4.582811717918157, "learning_rate": 4.699323368066436e-06, "loss": 0.1169921875, "step": 71315 }, { "epoch": 0.6166829512931147, "grad_norm": 18.118169510352832, "learning_rate": 4.699155467191692e-06, "loss": 0.16077327728271484, "step": 71320 }, { "epoch": 0.6167261848146579, "grad_norm": 0.2503520748255049, "learning_rate": 4.6989875584805915e-06, "loss": 0.12052459716796875, "step": 71325 }, { "epoch": 0.6167694183362011, "grad_norm": 23.623078493240683, "learning_rate": 4.698819641933911e-06, "loss": 0.22233352661132813, "step": 71330 }, { "epoch": 0.6168126518577445, "grad_norm": 23.539653349894923, "learning_rate": 4.698651717552424e-06, "loss": 0.13735885620117189, "step": 71335 }, { "epoch": 0.6168558853792877, "grad_norm": 0.1291099286636655, "learning_rate": 4.6984837853369064e-06, "loss": 0.2836112976074219, "step": 71340 }, { "epoch": 0.6168991189008309, "grad_norm": 0.8230946151885311, "learning_rate": 4.698315845288131e-06, "loss": 0.021079254150390626, "step": 71345 }, { "epoch": 0.6169423524223742, "grad_norm": 30.349101742382718, "learning_rate": 4.698147897406873e-06, "loss": 0.5356956481933594, "step": 71350 }, { "epoch": 0.6169855859439175, "grad_norm": 2.419649647405508, "learning_rate": 4.697979941693906e-06, "loss": 0.11172256469726563, "step": 71355 }, { "epoch": 0.6170288194654607, "grad_norm": 1.2617978979619358, "learning_rate": 4.697811978150007e-06, "loss": 0.179827880859375, "step": 71360 }, { "epoch": 0.617072052987004, "grad_norm": 2.615683204344323, "learning_rate": 4.6976440067759486e-06, "loss": 0.2080169677734375, "step": 71365 }, { "epoch": 0.6171152865085473, "grad_norm": 13.251680727149003, "learning_rate": 4.697476027572506e-06, "loss": 0.123046875, "step": 71370 }, { "epoch": 0.6171585200300905, "grad_norm": 66.46919618046284, "learning_rate": 4.697308040540455e-06, "loss": 0.22144203186035155, "step": 71375 }, { "epoch": 0.6172017535516338, "grad_norm": 4.866556655829688, "learning_rate": 4.697140045680568e-06, "loss": 0.0531951904296875, "step": 71380 }, { "epoch": 0.6172449870731771, "grad_norm": 24.378476200999426, "learning_rate": 4.6969720429936206e-06, "loss": 0.17416229248046874, "step": 71385 }, { "epoch": 0.6172882205947203, "grad_norm": 14.758669718779085, "learning_rate": 4.6968040324803895e-06, "loss": 0.1186004638671875, "step": 71390 }, { "epoch": 0.6173314541162636, "grad_norm": 8.262933993144612, "learning_rate": 4.696636014141647e-06, "loss": 0.170770263671875, "step": 71395 }, { "epoch": 0.6173746876378069, "grad_norm": 4.159399614277595, "learning_rate": 4.69646798797817e-06, "loss": 0.12092971801757812, "step": 71400 }, { "epoch": 0.6174179211593501, "grad_norm": 0.49893701576022703, "learning_rate": 4.6962999539907325e-06, "loss": 0.097698974609375, "step": 71405 }, { "epoch": 0.6174611546808934, "grad_norm": 39.73131219590867, "learning_rate": 4.696131912180109e-06, "loss": 0.18316268920898438, "step": 71410 }, { "epoch": 0.6175043882024367, "grad_norm": 2.269809549150403, "learning_rate": 4.695963862547075e-06, "loss": 0.0615966796875, "step": 71415 }, { "epoch": 0.6175476217239799, "grad_norm": 7.960466824216214, "learning_rate": 4.695795805092406e-06, "loss": 0.1056640625, "step": 71420 }, { "epoch": 0.6175908552455232, "grad_norm": 3.0644067826657637, "learning_rate": 4.695627739816876e-06, "loss": 0.1900726318359375, "step": 71425 }, { "epoch": 0.6176340887670664, "grad_norm": 3.20883706351111, "learning_rate": 4.695459666721261e-06, "loss": 0.31795463562011717, "step": 71430 }, { "epoch": 0.6176773222886097, "grad_norm": 7.5663564181561025, "learning_rate": 4.6952915858063364e-06, "loss": 0.13306655883789062, "step": 71435 }, { "epoch": 0.617720555810153, "grad_norm": 9.321316254352851, "learning_rate": 4.695123497072876e-06, "loss": 0.161236572265625, "step": 71440 }, { "epoch": 0.6177637893316962, "grad_norm": 7.997116889369016, "learning_rate": 4.694955400521656e-06, "loss": 0.1392498016357422, "step": 71445 }, { "epoch": 0.6178070228532395, "grad_norm": 2.755755333724769, "learning_rate": 4.694787296153451e-06, "loss": 0.054510498046875, "step": 71450 }, { "epoch": 0.6178502563747827, "grad_norm": 16.077153266546453, "learning_rate": 4.694619183969038e-06, "loss": 0.1417083740234375, "step": 71455 }, { "epoch": 0.617893489896326, "grad_norm": 12.98119619137466, "learning_rate": 4.69445106396919e-06, "loss": 0.3507072448730469, "step": 71460 }, { "epoch": 0.6179367234178693, "grad_norm": 0.23240647629193403, "learning_rate": 4.694282936154684e-06, "loss": 0.3717742919921875, "step": 71465 }, { "epoch": 0.6179799569394125, "grad_norm": 9.142643693592179, "learning_rate": 4.694114800526294e-06, "loss": 0.4641548156738281, "step": 71470 }, { "epoch": 0.6180231904609558, "grad_norm": 4.911750328968176, "learning_rate": 4.693946657084797e-06, "loss": 0.139837646484375, "step": 71475 }, { "epoch": 0.6180664239824991, "grad_norm": 0.25491378644938306, "learning_rate": 4.693778505830967e-06, "loss": 0.0099517822265625, "step": 71480 }, { "epoch": 0.6181096575040423, "grad_norm": 11.796534886934001, "learning_rate": 4.693610346765581e-06, "loss": 0.15201282501220703, "step": 71485 }, { "epoch": 0.6181528910255856, "grad_norm": 11.027347271901574, "learning_rate": 4.693442179889413e-06, "loss": 0.3233673095703125, "step": 71490 }, { "epoch": 0.6181961245471289, "grad_norm": 2.806302199265286, "learning_rate": 4.6932740052032405e-06, "loss": 0.05402450561523438, "step": 71495 }, { "epoch": 0.6182393580686721, "grad_norm": 1.9807398974819226, "learning_rate": 4.693105822707837e-06, "loss": 0.1360443115234375, "step": 71500 }, { "epoch": 0.6182825915902154, "grad_norm": 0.5964209289411827, "learning_rate": 4.692937632403979e-06, "loss": 0.09316978454589844, "step": 71505 }, { "epoch": 0.6183258251117586, "grad_norm": 15.66916725073616, "learning_rate": 4.6927694342924424e-06, "loss": 0.1549652099609375, "step": 71510 }, { "epoch": 0.6183690586333019, "grad_norm": 6.028871148315239, "learning_rate": 4.692601228374003e-06, "loss": 0.19142532348632812, "step": 71515 }, { "epoch": 0.6184122921548452, "grad_norm": 6.278665132026349, "learning_rate": 4.692433014649436e-06, "loss": 0.06600341796875, "step": 71520 }, { "epoch": 0.6184555256763884, "grad_norm": 3.673486276004387, "learning_rate": 4.692264793119518e-06, "loss": 0.18766326904296876, "step": 71525 }, { "epoch": 0.6184987591979317, "grad_norm": 1.0187695710867755, "learning_rate": 4.692096563785023e-06, "loss": 0.02534761428833008, "step": 71530 }, { "epoch": 0.618541992719475, "grad_norm": 59.59463907888886, "learning_rate": 4.6919283266467295e-06, "loss": 0.24578170776367186, "step": 71535 }, { "epoch": 0.6185852262410182, "grad_norm": 0.29630758953084807, "learning_rate": 4.691760081705411e-06, "loss": 0.1163543701171875, "step": 71540 }, { "epoch": 0.6186284597625615, "grad_norm": 2.035137874208709, "learning_rate": 4.6915918289618446e-06, "loss": 0.0822296142578125, "step": 71545 }, { "epoch": 0.6186716932841048, "grad_norm": 33.01135473050505, "learning_rate": 4.691423568416807e-06, "loss": 0.42851715087890624, "step": 71550 }, { "epoch": 0.618714926805648, "grad_norm": 55.539057714002894, "learning_rate": 4.691255300071073e-06, "loss": 0.25916748046875, "step": 71555 }, { "epoch": 0.6187581603271913, "grad_norm": 1.2622247323307654, "learning_rate": 4.691087023925418e-06, "loss": 0.19884109497070312, "step": 71560 }, { "epoch": 0.6188013938487346, "grad_norm": 9.000346807347462, "learning_rate": 4.690918739980621e-06, "loss": 0.36397705078125, "step": 71565 }, { "epoch": 0.6188446273702778, "grad_norm": 0.34067757937600374, "learning_rate": 4.690750448237455e-06, "loss": 0.1238800048828125, "step": 71570 }, { "epoch": 0.6188878608918211, "grad_norm": 9.316328760218362, "learning_rate": 4.690582148696697e-06, "loss": 0.29282150268554685, "step": 71575 }, { "epoch": 0.6189310944133644, "grad_norm": 22.779606163475, "learning_rate": 4.690413841359125e-06, "loss": 0.23923492431640625, "step": 71580 }, { "epoch": 0.6189743279349076, "grad_norm": 4.754387216208763, "learning_rate": 4.690245526225513e-06, "loss": 0.098590087890625, "step": 71585 }, { "epoch": 0.6190175614564509, "grad_norm": 20.775282774267836, "learning_rate": 4.690077203296637e-06, "loss": 0.3381683349609375, "step": 71590 }, { "epoch": 0.6190607949779942, "grad_norm": 6.299881378356482, "learning_rate": 4.689908872573274e-06, "loss": 0.08994140625, "step": 71595 }, { "epoch": 0.6191040284995374, "grad_norm": 19.162152959825125, "learning_rate": 4.689740534056202e-06, "loss": 0.18626670837402343, "step": 71600 }, { "epoch": 0.6191472620210806, "grad_norm": 4.838841004918317, "learning_rate": 4.689572187746196e-06, "loss": 0.0699127197265625, "step": 71605 }, { "epoch": 0.619190495542624, "grad_norm": 5.253786771614683, "learning_rate": 4.689403833644032e-06, "loss": 0.06626129150390625, "step": 71610 }, { "epoch": 0.6192337290641672, "grad_norm": 12.367331595006888, "learning_rate": 4.689235471750487e-06, "loss": 0.1117950439453125, "step": 71615 }, { "epoch": 0.6192769625857104, "grad_norm": 3.0659230064420355, "learning_rate": 4.689067102066337e-06, "loss": 0.12297859191894531, "step": 71620 }, { "epoch": 0.6193201961072538, "grad_norm": 0.5208456533648633, "learning_rate": 4.688898724592358e-06, "loss": 0.0978363037109375, "step": 71625 }, { "epoch": 0.619363429628797, "grad_norm": 8.961665460965127, "learning_rate": 4.688730339329329e-06, "loss": 0.3724170684814453, "step": 71630 }, { "epoch": 0.6194066631503402, "grad_norm": 2.8469825316299056, "learning_rate": 4.6885619462780234e-06, "loss": 0.15980224609375, "step": 71635 }, { "epoch": 0.6194498966718835, "grad_norm": 0.1494257269815934, "learning_rate": 4.68839354543922e-06, "loss": 0.03042449951171875, "step": 71640 }, { "epoch": 0.6194931301934268, "grad_norm": 0.9951146509476951, "learning_rate": 4.688225136813695e-06, "loss": 0.13132247924804688, "step": 71645 }, { "epoch": 0.61953636371497, "grad_norm": 1.568416488791026, "learning_rate": 4.688056720402224e-06, "loss": 0.32428665161132814, "step": 71650 }, { "epoch": 0.6195795972365133, "grad_norm": 31.978764275084842, "learning_rate": 4.687888296205585e-06, "loss": 0.108843994140625, "step": 71655 }, { "epoch": 0.6196228307580566, "grad_norm": 2.487147285122215, "learning_rate": 4.6877198642245545e-06, "loss": 0.4033470153808594, "step": 71660 }, { "epoch": 0.6196660642795998, "grad_norm": 133.52710422239156, "learning_rate": 4.687551424459909e-06, "loss": 0.24740753173828126, "step": 71665 }, { "epoch": 0.6197092978011431, "grad_norm": 2.6936934860839954, "learning_rate": 4.687382976912425e-06, "loss": 0.09574508666992188, "step": 71670 }, { "epoch": 0.6197525313226864, "grad_norm": 4.2734340268022235, "learning_rate": 4.687214521582881e-06, "loss": 0.1281982421875, "step": 71675 }, { "epoch": 0.6197957648442296, "grad_norm": 1.002004484423904, "learning_rate": 4.687046058472052e-06, "loss": 0.0534088134765625, "step": 71680 }, { "epoch": 0.6198389983657728, "grad_norm": 1.4217783939993862, "learning_rate": 4.686877587580716e-06, "loss": 0.07192344665527343, "step": 71685 }, { "epoch": 0.6198822318873162, "grad_norm": 33.81744152589878, "learning_rate": 4.68670910890965e-06, "loss": 0.27695159912109374, "step": 71690 }, { "epoch": 0.6199254654088594, "grad_norm": 6.014299585320612, "learning_rate": 4.686540622459629e-06, "loss": 0.07437400817871094, "step": 71695 }, { "epoch": 0.6199686989304026, "grad_norm": 12.190063941370365, "learning_rate": 4.686372128231433e-06, "loss": 0.1701740264892578, "step": 71700 }, { "epoch": 0.620011932451946, "grad_norm": 13.086512647122856, "learning_rate": 4.686203626225836e-06, "loss": 0.1814117431640625, "step": 71705 }, { "epoch": 0.6200551659734892, "grad_norm": 6.265873375679719, "learning_rate": 4.6860351164436195e-06, "loss": 0.02929229736328125, "step": 71710 }, { "epoch": 0.6200983994950324, "grad_norm": 1.7301295894503876, "learning_rate": 4.6858665988855565e-06, "loss": 0.06250381469726562, "step": 71715 }, { "epoch": 0.6201416330165758, "grad_norm": 3.946467300518699, "learning_rate": 4.6856980735524256e-06, "loss": 0.08127899169921875, "step": 71720 }, { "epoch": 0.620184866538119, "grad_norm": 1.2874033383440102, "learning_rate": 4.685529540445004e-06, "loss": 0.0947723388671875, "step": 71725 }, { "epoch": 0.6202281000596622, "grad_norm": 21.05256424509494, "learning_rate": 4.68536099956407e-06, "loss": 0.2451751708984375, "step": 71730 }, { "epoch": 0.6202713335812056, "grad_norm": 4.468794135926028, "learning_rate": 4.6851924509103995e-06, "loss": 0.2553466796875, "step": 71735 }, { "epoch": 0.6203145671027488, "grad_norm": 15.649331312106908, "learning_rate": 4.68502389448477e-06, "loss": 0.1416229248046875, "step": 71740 }, { "epoch": 0.620357800624292, "grad_norm": 1.311999454537928, "learning_rate": 4.68485533028796e-06, "loss": 0.307806396484375, "step": 71745 }, { "epoch": 0.6204010341458354, "grad_norm": 3.1192673187307283, "learning_rate": 4.684686758320745e-06, "loss": 0.19119720458984374, "step": 71750 }, { "epoch": 0.6204442676673786, "grad_norm": 11.092775916036981, "learning_rate": 4.684518178583905e-06, "loss": 0.09163818359375, "step": 71755 }, { "epoch": 0.6204875011889218, "grad_norm": 1.21831379181257, "learning_rate": 4.684349591078215e-06, "loss": 0.3414459228515625, "step": 71760 }, { "epoch": 0.6205307347104652, "grad_norm": 1.0623128320629136, "learning_rate": 4.684180995804453e-06, "loss": 0.3432373046875, "step": 71765 }, { "epoch": 0.6205739682320084, "grad_norm": 5.558850745913493, "learning_rate": 4.684012392763398e-06, "loss": 0.0950439453125, "step": 71770 }, { "epoch": 0.6206172017535516, "grad_norm": 31.050332297925053, "learning_rate": 4.683843781955827e-06, "loss": 0.2901802062988281, "step": 71775 }, { "epoch": 0.6206604352750948, "grad_norm": 4.2186023703788065, "learning_rate": 4.683675163382516e-06, "loss": 0.079168701171875, "step": 71780 }, { "epoch": 0.6207036687966382, "grad_norm": 4.272117477841359, "learning_rate": 4.683506537044245e-06, "loss": 0.14034881591796874, "step": 71785 }, { "epoch": 0.6207469023181814, "grad_norm": 1.3124766668627896, "learning_rate": 4.68333790294179e-06, "loss": 0.05115509033203125, "step": 71790 }, { "epoch": 0.6207901358397246, "grad_norm": 0.6734147379806212, "learning_rate": 4.683169261075929e-06, "loss": 0.087225341796875, "step": 71795 }, { "epoch": 0.620833369361268, "grad_norm": 29.479028586259684, "learning_rate": 4.683000611447441e-06, "loss": 0.14886016845703126, "step": 71800 }, { "epoch": 0.6208766028828112, "grad_norm": 13.561890579679115, "learning_rate": 4.682831954057103e-06, "loss": 0.366900634765625, "step": 71805 }, { "epoch": 0.6209198364043544, "grad_norm": 1.185146994033566, "learning_rate": 4.682663288905692e-06, "loss": 0.3483558654785156, "step": 71810 }, { "epoch": 0.6209630699258978, "grad_norm": 40.55277645047572, "learning_rate": 4.6824946159939865e-06, "loss": 0.16467666625976562, "step": 71815 }, { "epoch": 0.621006303447441, "grad_norm": 2.1940972582715914, "learning_rate": 4.682325935322765e-06, "loss": 0.11970291137695313, "step": 71820 }, { "epoch": 0.6210495369689842, "grad_norm": 8.463955185832393, "learning_rate": 4.682157246892805e-06, "loss": 0.1141693115234375, "step": 71825 }, { "epoch": 0.6210927704905276, "grad_norm": 19.217132439756906, "learning_rate": 4.681988550704884e-06, "loss": 0.4970001220703125, "step": 71830 }, { "epoch": 0.6211360040120708, "grad_norm": 13.90458474969577, "learning_rate": 4.681819846759781e-06, "loss": 0.05439453125, "step": 71835 }, { "epoch": 0.621179237533614, "grad_norm": 0.25575256122378925, "learning_rate": 4.681651135058273e-06, "loss": 0.16864089965820311, "step": 71840 }, { "epoch": 0.6212224710551574, "grad_norm": 150.58124085235912, "learning_rate": 4.681482415601139e-06, "loss": 0.17587432861328126, "step": 71845 }, { "epoch": 0.6212657045767006, "grad_norm": 1.5828767798635246, "learning_rate": 4.681313688389156e-06, "loss": 0.128839111328125, "step": 71850 }, { "epoch": 0.6213089380982438, "grad_norm": 4.40177285777097, "learning_rate": 4.681144953423104e-06, "loss": 0.126318359375, "step": 71855 }, { "epoch": 0.6213521716197871, "grad_norm": 9.181820022067146, "learning_rate": 4.680976210703758e-06, "loss": 0.12482452392578125, "step": 71860 }, { "epoch": 0.6213954051413304, "grad_norm": 26.31545987034779, "learning_rate": 4.6808074602318995e-06, "loss": 0.171087646484375, "step": 71865 }, { "epoch": 0.6214386386628736, "grad_norm": 2.1753561623516484, "learning_rate": 4.680638702008306e-06, "loss": 0.10682830810546876, "step": 71870 }, { "epoch": 0.6214818721844169, "grad_norm": 6.076999651925037, "learning_rate": 4.680469936033755e-06, "loss": 0.116552734375, "step": 71875 }, { "epoch": 0.6215251057059602, "grad_norm": 22.668984257799394, "learning_rate": 4.6803011623090235e-06, "loss": 0.318914794921875, "step": 71880 }, { "epoch": 0.6215683392275034, "grad_norm": 0.7846450018710803, "learning_rate": 4.680132380834894e-06, "loss": 0.03103485107421875, "step": 71885 }, { "epoch": 0.6216115727490467, "grad_norm": 3.7151318219177507, "learning_rate": 4.679963591612141e-06, "loss": 0.04377593994140625, "step": 71890 }, { "epoch": 0.62165480627059, "grad_norm": 6.5958213471474405, "learning_rate": 4.679794794641544e-06, "loss": 0.2139892578125, "step": 71895 }, { "epoch": 0.6216980397921332, "grad_norm": 4.982431618053043, "learning_rate": 4.679625989923883e-06, "loss": 0.169549560546875, "step": 71900 }, { "epoch": 0.6217412733136765, "grad_norm": 5.2454522044295295, "learning_rate": 4.679457177459935e-06, "loss": 0.13359375, "step": 71905 }, { "epoch": 0.6217845068352198, "grad_norm": 0.30347518724898603, "learning_rate": 4.679288357250479e-06, "loss": 0.2269012451171875, "step": 71910 }, { "epoch": 0.621827740356763, "grad_norm": 26.03348759882705, "learning_rate": 4.679119529296293e-06, "loss": 0.12968597412109376, "step": 71915 }, { "epoch": 0.6218709738783063, "grad_norm": 0.6129785649740604, "learning_rate": 4.678950693598156e-06, "loss": 0.11532058715820312, "step": 71920 }, { "epoch": 0.6219142073998496, "grad_norm": 31.442521903222595, "learning_rate": 4.678781850156847e-06, "loss": 0.24797821044921875, "step": 71925 }, { "epoch": 0.6219574409213928, "grad_norm": 3.0350267528041566, "learning_rate": 4.678612998973145e-06, "loss": 0.0633880615234375, "step": 71930 }, { "epoch": 0.622000674442936, "grad_norm": 1.4652061139516321, "learning_rate": 4.678444140047828e-06, "loss": 0.09200973510742187, "step": 71935 }, { "epoch": 0.6220439079644794, "grad_norm": 5.5378404251238145, "learning_rate": 4.678275273381673e-06, "loss": 0.11452817916870117, "step": 71940 }, { "epoch": 0.6220871414860226, "grad_norm": 0.2554235696007412, "learning_rate": 4.678106398975463e-06, "loss": 0.17147445678710938, "step": 71945 }, { "epoch": 0.6221303750075659, "grad_norm": 7.936784142506378, "learning_rate": 4.677937516829974e-06, "loss": 0.371966552734375, "step": 71950 }, { "epoch": 0.6221736085291091, "grad_norm": 25.650434857094265, "learning_rate": 4.677768626945986e-06, "loss": 0.2634368896484375, "step": 71955 }, { "epoch": 0.6222168420506524, "grad_norm": 3.872352367710746, "learning_rate": 4.677599729324276e-06, "loss": 0.028401947021484374, "step": 71960 }, { "epoch": 0.6222600755721956, "grad_norm": 43.233578598051814, "learning_rate": 4.677430823965626e-06, "loss": 0.40528030395507814, "step": 71965 }, { "epoch": 0.6223033090937389, "grad_norm": 6.0779029634717086, "learning_rate": 4.677261910870812e-06, "loss": 0.04152679443359375, "step": 71970 }, { "epoch": 0.6223465426152822, "grad_norm": 5.077326739956006, "learning_rate": 4.677092990040614e-06, "loss": 0.219866943359375, "step": 71975 }, { "epoch": 0.6223897761368254, "grad_norm": 2.8492895194508274, "learning_rate": 4.676924061475812e-06, "loss": 0.04097137451171875, "step": 71980 }, { "epoch": 0.6224330096583687, "grad_norm": 0.6435361946773085, "learning_rate": 4.6767551251771845e-06, "loss": 0.3412956237792969, "step": 71985 }, { "epoch": 0.622476243179912, "grad_norm": 1.5589143493363644, "learning_rate": 4.676586181145511e-06, "loss": 0.08083839416503906, "step": 71990 }, { "epoch": 0.6225194767014552, "grad_norm": 8.904145524282901, "learning_rate": 4.676417229381568e-06, "loss": 0.0872161865234375, "step": 71995 }, { "epoch": 0.6225627102229985, "grad_norm": 0.736756613740831, "learning_rate": 4.6762482698861395e-06, "loss": 0.07022552490234375, "step": 72000 }, { "epoch": 0.6226059437445418, "grad_norm": 16.152764280741156, "learning_rate": 4.676079302660001e-06, "loss": 0.31047210693359373, "step": 72005 }, { "epoch": 0.622649177266085, "grad_norm": 6.268481381676762, "learning_rate": 4.675910327703932e-06, "loss": 0.061851119995117186, "step": 72010 }, { "epoch": 0.6226924107876283, "grad_norm": 23.157994221711096, "learning_rate": 4.675741345018714e-06, "loss": 0.19626846313476562, "step": 72015 }, { "epoch": 0.6227356443091716, "grad_norm": 11.712582460935435, "learning_rate": 4.675572354605124e-06, "loss": 0.2088226318359375, "step": 72020 }, { "epoch": 0.6227788778307148, "grad_norm": 21.028958250213147, "learning_rate": 4.675403356463944e-06, "loss": 0.24548492431640626, "step": 72025 }, { "epoch": 0.6228221113522581, "grad_norm": 15.700424134534408, "learning_rate": 4.6752343505959495e-06, "loss": 0.20072021484375, "step": 72030 }, { "epoch": 0.6228653448738013, "grad_norm": 21.432092107885854, "learning_rate": 4.675065337001924e-06, "loss": 0.19932861328125, "step": 72035 }, { "epoch": 0.6229085783953446, "grad_norm": 6.268792565283696, "learning_rate": 4.6748963156826435e-06, "loss": 0.162615966796875, "step": 72040 }, { "epoch": 0.6229518119168879, "grad_norm": 1.0611401538080385, "learning_rate": 4.67472728663889e-06, "loss": 0.034112548828125, "step": 72045 }, { "epoch": 0.6229950454384311, "grad_norm": 11.650987275309424, "learning_rate": 4.674558249871443e-06, "loss": 0.15962371826171876, "step": 72050 }, { "epoch": 0.6230382789599744, "grad_norm": 3.835898114394756, "learning_rate": 4.674389205381081e-06, "loss": 0.14696502685546875, "step": 72055 }, { "epoch": 0.6230815124815177, "grad_norm": 33.45565877443333, "learning_rate": 4.674220153168583e-06, "loss": 0.42903594970703124, "step": 72060 }, { "epoch": 0.6231247460030609, "grad_norm": 5.3562394889048495, "learning_rate": 4.674051093234731e-06, "loss": 0.14945831298828124, "step": 72065 }, { "epoch": 0.6231679795246042, "grad_norm": 0.6864677082118756, "learning_rate": 4.673882025580302e-06, "loss": 0.30884552001953125, "step": 72070 }, { "epoch": 0.6232112130461475, "grad_norm": 2.839897996298411, "learning_rate": 4.673712950206077e-06, "loss": 0.1288543701171875, "step": 72075 }, { "epoch": 0.6232544465676907, "grad_norm": 58.52932965617086, "learning_rate": 4.673543867112837e-06, "loss": 0.9577896118164062, "step": 72080 }, { "epoch": 0.623297680089234, "grad_norm": 2.0268678084171587, "learning_rate": 4.673374776301359e-06, "loss": 0.05337448120117187, "step": 72085 }, { "epoch": 0.6233409136107773, "grad_norm": 0.24445488297030765, "learning_rate": 4.673205677772425e-06, "loss": 0.1634754180908203, "step": 72090 }, { "epoch": 0.6233841471323205, "grad_norm": 42.76814196137411, "learning_rate": 4.673036571526815e-06, "loss": 0.5293556213378906, "step": 72095 }, { "epoch": 0.6234273806538638, "grad_norm": 6.319515031979165, "learning_rate": 4.672867457565308e-06, "loss": 0.07950439453125, "step": 72100 }, { "epoch": 0.6234706141754071, "grad_norm": 0.11709695329542871, "learning_rate": 4.672698335888684e-06, "loss": 0.17758560180664062, "step": 72105 }, { "epoch": 0.6235138476969503, "grad_norm": 4.572435788814508, "learning_rate": 4.6725292064977225e-06, "loss": 0.09521141052246093, "step": 72110 }, { "epoch": 0.6235570812184936, "grad_norm": 10.441387894215469, "learning_rate": 4.672360069393204e-06, "loss": 0.05540313720703125, "step": 72115 }, { "epoch": 0.6236003147400369, "grad_norm": 19.830022249338473, "learning_rate": 4.672190924575909e-06, "loss": 0.18245773315429686, "step": 72120 }, { "epoch": 0.6236435482615801, "grad_norm": 8.188751901738945, "learning_rate": 4.672021772046617e-06, "loss": 0.08414649963378906, "step": 72125 }, { "epoch": 0.6236867817831233, "grad_norm": 19.638803023112704, "learning_rate": 4.671852611806109e-06, "loss": 0.08587265014648438, "step": 72130 }, { "epoch": 0.6237300153046667, "grad_norm": 2.2237334089215235, "learning_rate": 4.671683443855164e-06, "loss": 0.2045961380004883, "step": 72135 }, { "epoch": 0.6237732488262099, "grad_norm": 4.548447359578314, "learning_rate": 4.671514268194563e-06, "loss": 0.31260986328125, "step": 72140 }, { "epoch": 0.6238164823477531, "grad_norm": 22.1333940225159, "learning_rate": 4.671345084825086e-06, "loss": 0.08846817016601563, "step": 72145 }, { "epoch": 0.6238597158692964, "grad_norm": 3.9226894146450655, "learning_rate": 4.6711758937475125e-06, "loss": 0.1614696502685547, "step": 72150 }, { "epoch": 0.6239029493908397, "grad_norm": 2.4578688228309, "learning_rate": 4.671006694962623e-06, "loss": 0.2603179931640625, "step": 72155 }, { "epoch": 0.6239461829123829, "grad_norm": 13.4253504239464, "learning_rate": 4.670837488471199e-06, "loss": 0.1129486083984375, "step": 72160 }, { "epoch": 0.6239894164339262, "grad_norm": 12.392806514674778, "learning_rate": 4.67066827427402e-06, "loss": 0.08337421417236328, "step": 72165 }, { "epoch": 0.6240326499554695, "grad_norm": 36.52470364426462, "learning_rate": 4.670499052371868e-06, "loss": 0.298577880859375, "step": 72170 }, { "epoch": 0.6240758834770127, "grad_norm": 45.96980515582958, "learning_rate": 4.67032982276552e-06, "loss": 0.416534423828125, "step": 72175 }, { "epoch": 0.624119116998556, "grad_norm": 6.4861379506837595, "learning_rate": 4.670160585455759e-06, "loss": 0.182415771484375, "step": 72180 }, { "epoch": 0.6241623505200993, "grad_norm": 8.843249824699697, "learning_rate": 4.669991340443365e-06, "loss": 0.26841278076171876, "step": 72185 }, { "epoch": 0.6242055840416425, "grad_norm": 2.895878273352566, "learning_rate": 4.669822087729119e-06, "loss": 0.03760833740234375, "step": 72190 }, { "epoch": 0.6242488175631858, "grad_norm": 0.44962166309088647, "learning_rate": 4.6696528273138e-06, "loss": 0.0537628173828125, "step": 72195 }, { "epoch": 0.6242920510847291, "grad_norm": 23.371646058125837, "learning_rate": 4.66948355919819e-06, "loss": 0.30438232421875, "step": 72200 }, { "epoch": 0.6243352846062723, "grad_norm": 1.163604581889322, "learning_rate": 4.66931428338307e-06, "loss": 0.0440277099609375, "step": 72205 }, { "epoch": 0.6243785181278155, "grad_norm": 9.406116500871436, "learning_rate": 4.6691449998692195e-06, "loss": 0.220172119140625, "step": 72210 }, { "epoch": 0.6244217516493589, "grad_norm": 0.4354172033855826, "learning_rate": 4.6689757086574205e-06, "loss": 0.1984588623046875, "step": 72215 }, { "epoch": 0.6244649851709021, "grad_norm": 3.5387467729247484, "learning_rate": 4.668806409748452e-06, "loss": 0.13319091796875, "step": 72220 }, { "epoch": 0.6245082186924453, "grad_norm": 1.3705260575553266, "learning_rate": 4.668637103143097e-06, "loss": 0.04420013427734375, "step": 72225 }, { "epoch": 0.6245514522139887, "grad_norm": 1.0072171096565679, "learning_rate": 4.668467788842134e-06, "loss": 0.3560882568359375, "step": 72230 }, { "epoch": 0.6245946857355319, "grad_norm": 4.432394808587987, "learning_rate": 4.668298466846345e-06, "loss": 0.13971099853515626, "step": 72235 }, { "epoch": 0.6246379192570751, "grad_norm": 5.743110548366877, "learning_rate": 4.668129137156512e-06, "loss": 0.040556716918945315, "step": 72240 }, { "epoch": 0.6246811527786185, "grad_norm": 53.79658798379153, "learning_rate": 4.6679597997734135e-06, "loss": 0.28690567016601565, "step": 72245 }, { "epoch": 0.6247243863001617, "grad_norm": 14.436784298045335, "learning_rate": 4.667790454697833e-06, "loss": 0.1361297607421875, "step": 72250 }, { "epoch": 0.6247676198217049, "grad_norm": 4.336525305013933, "learning_rate": 4.667621101930549e-06, "loss": 0.1388251304626465, "step": 72255 }, { "epoch": 0.6248108533432483, "grad_norm": 1.3171865025477476, "learning_rate": 4.6674517414723445e-06, "loss": 0.0233978271484375, "step": 72260 }, { "epoch": 0.6248540868647915, "grad_norm": 1.1897725354594657, "learning_rate": 4.667282373324e-06, "loss": 0.06420822143554687, "step": 72265 }, { "epoch": 0.6248973203863347, "grad_norm": 9.948337941794149, "learning_rate": 4.667112997486296e-06, "loss": 0.06561393737792968, "step": 72270 }, { "epoch": 0.6249405539078781, "grad_norm": 7.0747308837341, "learning_rate": 4.666943613960014e-06, "loss": 0.17857131958007813, "step": 72275 }, { "epoch": 0.6249837874294213, "grad_norm": 3.8048533682290424, "learning_rate": 4.666774222745937e-06, "loss": 0.07599983215332032, "step": 72280 }, { "epoch": 0.6250270209509645, "grad_norm": 1.9610500960203376, "learning_rate": 4.666604823844843e-06, "loss": 0.17620849609375, "step": 72285 }, { "epoch": 0.6250702544725079, "grad_norm": 0.1619276315324704, "learning_rate": 4.666435417257515e-06, "loss": 0.13337249755859376, "step": 72290 }, { "epoch": 0.6251134879940511, "grad_norm": 18.38490720938015, "learning_rate": 4.666266002984735e-06, "loss": 0.3040771484375, "step": 72295 }, { "epoch": 0.6251567215155943, "grad_norm": 3.6242434714235783, "learning_rate": 4.666096581027283e-06, "loss": 0.24917755126953126, "step": 72300 }, { "epoch": 0.6251999550371375, "grad_norm": 0.9242673632098335, "learning_rate": 4.665927151385941e-06, "loss": 0.100958251953125, "step": 72305 }, { "epoch": 0.6252431885586809, "grad_norm": 1.8407868592030745, "learning_rate": 4.6657577140614895e-06, "loss": 0.1772674560546875, "step": 72310 }, { "epoch": 0.6252864220802241, "grad_norm": 6.77940323894799, "learning_rate": 4.665588269054711e-06, "loss": 0.0542633056640625, "step": 72315 }, { "epoch": 0.6253296556017673, "grad_norm": 19.993226137490744, "learning_rate": 4.6654188163663865e-06, "loss": 0.11573944091796876, "step": 72320 }, { "epoch": 0.6253728891233107, "grad_norm": 0.18795106080392157, "learning_rate": 4.665249355997297e-06, "loss": 0.02766265869140625, "step": 72325 }, { "epoch": 0.6254161226448539, "grad_norm": 12.355876062007368, "learning_rate": 4.6650798879482254e-06, "loss": 0.0851654052734375, "step": 72330 }, { "epoch": 0.6254593561663971, "grad_norm": 25.018060926609287, "learning_rate": 4.6649104122199525e-06, "loss": 0.17064132690429687, "step": 72335 }, { "epoch": 0.6255025896879405, "grad_norm": 34.740069672236935, "learning_rate": 4.664740928813259e-06, "loss": 0.15072021484375, "step": 72340 }, { "epoch": 0.6255458232094837, "grad_norm": 0.5240438764820312, "learning_rate": 4.664571437728928e-06, "loss": 0.06882476806640625, "step": 72345 }, { "epoch": 0.6255890567310269, "grad_norm": 2.906550113978483, "learning_rate": 4.664401938967742e-06, "loss": 0.03695831298828125, "step": 72350 }, { "epoch": 0.6256322902525703, "grad_norm": 2.9408658120622664, "learning_rate": 4.664232432530478e-06, "loss": 0.04794921875, "step": 72355 }, { "epoch": 0.6256755237741135, "grad_norm": 4.089610146925936, "learning_rate": 4.664062918417925e-06, "loss": 0.182965087890625, "step": 72360 }, { "epoch": 0.6257187572956567, "grad_norm": 3.742513271580448, "learning_rate": 4.663893396630858e-06, "loss": 0.2373321533203125, "step": 72365 }, { "epoch": 0.6257619908172001, "grad_norm": 7.356650962424453, "learning_rate": 4.663723867170062e-06, "loss": 0.5129117012023926, "step": 72370 }, { "epoch": 0.6258052243387433, "grad_norm": 1.8269547242936317, "learning_rate": 4.663554330036319e-06, "loss": 0.23744468688964843, "step": 72375 }, { "epoch": 0.6258484578602865, "grad_norm": 0.35436723556448707, "learning_rate": 4.663384785230411e-06, "loss": 0.04524078369140625, "step": 72380 }, { "epoch": 0.6258916913818298, "grad_norm": 7.484330373765928, "learning_rate": 4.66321523275312e-06, "loss": 0.278582763671875, "step": 72385 }, { "epoch": 0.6259349249033731, "grad_norm": 1.278400323521059, "learning_rate": 4.663045672605225e-06, "loss": 0.20712051391601563, "step": 72390 }, { "epoch": 0.6259781584249163, "grad_norm": 14.266960759049272, "learning_rate": 4.662876104787512e-06, "loss": 0.33465118408203126, "step": 72395 }, { "epoch": 0.6260213919464596, "grad_norm": 0.7144452777386546, "learning_rate": 4.662706529300762e-06, "loss": 0.043585205078125, "step": 72400 }, { "epoch": 0.6260646254680029, "grad_norm": 46.27793065606635, "learning_rate": 4.662536946145755e-06, "loss": 0.598358154296875, "step": 72405 }, { "epoch": 0.6261078589895461, "grad_norm": 15.591585156280315, "learning_rate": 4.662367355323273e-06, "loss": 0.2544281005859375, "step": 72410 }, { "epoch": 0.6261510925110894, "grad_norm": 0.18547200995015087, "learning_rate": 4.662197756834103e-06, "loss": 0.1194488525390625, "step": 72415 }, { "epoch": 0.6261943260326327, "grad_norm": 0.7786641932317924, "learning_rate": 4.662028150679022e-06, "loss": 0.0472381591796875, "step": 72420 }, { "epoch": 0.6262375595541759, "grad_norm": 11.811544765610723, "learning_rate": 4.661858536858814e-06, "loss": 0.30687789916992186, "step": 72425 }, { "epoch": 0.6262807930757192, "grad_norm": 8.641467006381367, "learning_rate": 4.661688915374262e-06, "loss": 0.061969757080078125, "step": 72430 }, { "epoch": 0.6263240265972625, "grad_norm": 6.445059118663331, "learning_rate": 4.661519286226147e-06, "loss": 0.09630622863769531, "step": 72435 }, { "epoch": 0.6263672601188057, "grad_norm": 18.619990785426733, "learning_rate": 4.661349649415252e-06, "loss": 0.1875908851623535, "step": 72440 }, { "epoch": 0.626410493640349, "grad_norm": 1.9934430115374366, "learning_rate": 4.661180004942359e-06, "loss": 0.17171630859375, "step": 72445 }, { "epoch": 0.6264537271618923, "grad_norm": 3.19081062302873, "learning_rate": 4.66101035280825e-06, "loss": 0.04076461791992188, "step": 72450 }, { "epoch": 0.6264969606834355, "grad_norm": 50.83965473649644, "learning_rate": 4.660840693013709e-06, "loss": 0.2598722457885742, "step": 72455 }, { "epoch": 0.6265401942049788, "grad_norm": 29.285503751340016, "learning_rate": 4.660671025559518e-06, "loss": 0.14857063293457032, "step": 72460 }, { "epoch": 0.6265834277265221, "grad_norm": 0.5402766860122004, "learning_rate": 4.660501350446458e-06, "loss": 0.08519563674926758, "step": 72465 }, { "epoch": 0.6266266612480653, "grad_norm": 28.92720640115952, "learning_rate": 4.660331667675313e-06, "loss": 0.068890380859375, "step": 72470 }, { "epoch": 0.6266698947696085, "grad_norm": 0.8789608728868894, "learning_rate": 4.660161977246865e-06, "loss": 0.0555511474609375, "step": 72475 }, { "epoch": 0.6267131282911518, "grad_norm": 20.97541000674025, "learning_rate": 4.659992279161897e-06, "loss": 0.20212860107421876, "step": 72480 }, { "epoch": 0.6267563618126951, "grad_norm": 16.211331823872246, "learning_rate": 4.65982257342119e-06, "loss": 0.17656707763671875, "step": 72485 }, { "epoch": 0.6267995953342383, "grad_norm": 1.6212832383942526, "learning_rate": 4.659652860025529e-06, "loss": 0.07218170166015625, "step": 72490 }, { "epoch": 0.6268428288557816, "grad_norm": 6.941770876676933, "learning_rate": 4.659483138975696e-06, "loss": 0.197998046875, "step": 72495 }, { "epoch": 0.6268860623773249, "grad_norm": 19.542567499654485, "learning_rate": 4.659313410272472e-06, "loss": 0.07116355895996093, "step": 72500 }, { "epoch": 0.6269292958988681, "grad_norm": 5.5701884677718105, "learning_rate": 4.6591436739166425e-06, "loss": 0.07928466796875, "step": 72505 }, { "epoch": 0.6269725294204114, "grad_norm": 0.8415297603333297, "learning_rate": 4.658973929908988e-06, "loss": 0.061944198608398435, "step": 72510 }, { "epoch": 0.6270157629419547, "grad_norm": 21.733864156209645, "learning_rate": 4.658804178250292e-06, "loss": 0.18574676513671876, "step": 72515 }, { "epoch": 0.6270589964634979, "grad_norm": 35.40582378696389, "learning_rate": 4.65863441894134e-06, "loss": 0.447601318359375, "step": 72520 }, { "epoch": 0.6271022299850412, "grad_norm": 1.325477995477988, "learning_rate": 4.65846465198291e-06, "loss": 0.23363494873046875, "step": 72525 }, { "epoch": 0.6271454635065845, "grad_norm": 0.26192485631569995, "learning_rate": 4.658294877375789e-06, "loss": 0.17976150512695313, "step": 72530 }, { "epoch": 0.6271886970281277, "grad_norm": 12.833266818707177, "learning_rate": 4.6581250951207585e-06, "loss": 0.17283782958984376, "step": 72535 }, { "epoch": 0.627231930549671, "grad_norm": 67.53348652131561, "learning_rate": 4.657955305218601e-06, "loss": 0.1933929443359375, "step": 72540 }, { "epoch": 0.6272751640712143, "grad_norm": 24.02383615363367, "learning_rate": 4.6577855076701e-06, "loss": 0.094384765625, "step": 72545 }, { "epoch": 0.6273183975927575, "grad_norm": 1.846956055349791, "learning_rate": 4.65761570247604e-06, "loss": 0.269525146484375, "step": 72550 }, { "epoch": 0.6273616311143008, "grad_norm": 28.78744301994896, "learning_rate": 4.657445889637201e-06, "loss": 0.34694061279296873, "step": 72555 }, { "epoch": 0.627404864635844, "grad_norm": 1.0666710290589458, "learning_rate": 4.65727606915437e-06, "loss": 0.02494964599609375, "step": 72560 }, { "epoch": 0.6274480981573873, "grad_norm": 6.431103850681521, "learning_rate": 4.657106241028327e-06, "loss": 0.22310104370117187, "step": 72565 }, { "epoch": 0.6274913316789306, "grad_norm": 3.667948849518206, "learning_rate": 4.656936405259856e-06, "loss": 0.08912811279296876, "step": 72570 }, { "epoch": 0.6275345652004738, "grad_norm": 10.963778817457445, "learning_rate": 4.656766561849742e-06, "loss": 0.16524658203125, "step": 72575 }, { "epoch": 0.6275777987220171, "grad_norm": 3.2329167540788672, "learning_rate": 4.656596710798766e-06, "loss": 0.054412841796875, "step": 72580 }, { "epoch": 0.6276210322435604, "grad_norm": 0.6942101245401483, "learning_rate": 4.6564268521077125e-06, "loss": 0.11970291137695313, "step": 72585 }, { "epoch": 0.6276642657651036, "grad_norm": 1.829336127892375, "learning_rate": 4.656256985777365e-06, "loss": 0.09317245483398437, "step": 72590 }, { "epoch": 0.6277074992866469, "grad_norm": 1.2513194312231855, "learning_rate": 4.656087111808507e-06, "loss": 0.15912322998046874, "step": 72595 }, { "epoch": 0.6277507328081902, "grad_norm": 5.941724630372414, "learning_rate": 4.655917230201921e-06, "loss": 0.3048736572265625, "step": 72600 }, { "epoch": 0.6277939663297334, "grad_norm": 5.251354620653585, "learning_rate": 4.655747340958391e-06, "loss": 0.0947509765625, "step": 72605 }, { "epoch": 0.6278371998512767, "grad_norm": 22.04062553116349, "learning_rate": 4.6555774440787e-06, "loss": 0.15498600006103516, "step": 72610 }, { "epoch": 0.62788043337282, "grad_norm": 0.5750180704401057, "learning_rate": 4.655407539563633e-06, "loss": 0.17638168334960938, "step": 72615 }, { "epoch": 0.6279236668943632, "grad_norm": 29.99205600730385, "learning_rate": 4.655237627413973e-06, "loss": 0.5485252380371094, "step": 72620 }, { "epoch": 0.6279669004159065, "grad_norm": 52.557313912640645, "learning_rate": 4.655067707630503e-06, "loss": 0.397393798828125, "step": 72625 }, { "epoch": 0.6280101339374498, "grad_norm": 2.251010548440045, "learning_rate": 4.654897780214007e-06, "loss": 0.0593048095703125, "step": 72630 }, { "epoch": 0.628053367458993, "grad_norm": 5.368904404485302, "learning_rate": 4.654727845165269e-06, "loss": 0.2007007598876953, "step": 72635 }, { "epoch": 0.6280966009805363, "grad_norm": 13.142812837497585, "learning_rate": 4.6545579024850715e-06, "loss": 0.263763427734375, "step": 72640 }, { "epoch": 0.6281398345020796, "grad_norm": 20.25370564423151, "learning_rate": 4.654387952174199e-06, "loss": 0.1571197509765625, "step": 72645 }, { "epoch": 0.6281830680236228, "grad_norm": 2.9961012749085367, "learning_rate": 4.654217994233437e-06, "loss": 0.172021484375, "step": 72650 }, { "epoch": 0.628226301545166, "grad_norm": 6.840893171358137, "learning_rate": 4.6540480286635664e-06, "loss": 0.22275276184082032, "step": 72655 }, { "epoch": 0.6282695350667094, "grad_norm": 39.79315113076917, "learning_rate": 4.653878055465373e-06, "loss": 0.2672920227050781, "step": 72660 }, { "epoch": 0.6283127685882526, "grad_norm": 5.06016097983416, "learning_rate": 4.65370807463964e-06, "loss": 0.31494598388671874, "step": 72665 }, { "epoch": 0.6283560021097958, "grad_norm": 14.112153746265538, "learning_rate": 4.653538086187152e-06, "loss": 0.4159526824951172, "step": 72670 }, { "epoch": 0.6283992356313391, "grad_norm": 0.5360220653091728, "learning_rate": 4.6533680901086916e-06, "loss": 0.14949951171875, "step": 72675 }, { "epoch": 0.6284424691528824, "grad_norm": 4.154914976744944, "learning_rate": 4.653198086405043e-06, "loss": 0.1273101806640625, "step": 72680 }, { "epoch": 0.6284857026744256, "grad_norm": 15.378245913066698, "learning_rate": 4.653028075076993e-06, "loss": 0.5249298095703125, "step": 72685 }, { "epoch": 0.628528936195969, "grad_norm": 2.7694008067823406, "learning_rate": 4.652858056125322e-06, "loss": 0.07305145263671875, "step": 72690 }, { "epoch": 0.6285721697175122, "grad_norm": 16.117695037362072, "learning_rate": 4.652688029550816e-06, "loss": 0.2233043670654297, "step": 72695 }, { "epoch": 0.6286154032390554, "grad_norm": 27.196823060924867, "learning_rate": 4.65251799535426e-06, "loss": 0.34730224609375, "step": 72700 }, { "epoch": 0.6286586367605987, "grad_norm": 16.073995999548938, "learning_rate": 4.652347953536435e-06, "loss": 0.19242486953735352, "step": 72705 }, { "epoch": 0.628701870282142, "grad_norm": 38.608131379654324, "learning_rate": 4.652177904098128e-06, "loss": 0.19718399047851562, "step": 72710 }, { "epoch": 0.6287451038036852, "grad_norm": 3.1395717458472836, "learning_rate": 4.652007847040124e-06, "loss": 0.13707962036132812, "step": 72715 }, { "epoch": 0.6287883373252285, "grad_norm": 1.1146533385779631, "learning_rate": 4.6518377823632045e-06, "loss": 0.04068603515625, "step": 72720 }, { "epoch": 0.6288315708467718, "grad_norm": 30.417602173235892, "learning_rate": 4.651667710068155e-06, "loss": 0.209735107421875, "step": 72725 }, { "epoch": 0.628874804368315, "grad_norm": 3.671714331695347, "learning_rate": 4.65149763015576e-06, "loss": 0.167303466796875, "step": 72730 }, { "epoch": 0.6289180378898582, "grad_norm": 2.319880519219805, "learning_rate": 4.651327542626804e-06, "loss": 0.05481414794921875, "step": 72735 }, { "epoch": 0.6289612714114016, "grad_norm": 37.121825775988945, "learning_rate": 4.651157447482072e-06, "loss": 0.24883079528808594, "step": 72740 }, { "epoch": 0.6290045049329448, "grad_norm": 9.800281663554992, "learning_rate": 4.650987344722347e-06, "loss": 0.08042221069335938, "step": 72745 }, { "epoch": 0.629047738454488, "grad_norm": 18.211975086473956, "learning_rate": 4.6508172343484135e-06, "loss": 0.1291259765625, "step": 72750 }, { "epoch": 0.6290909719760314, "grad_norm": 1.326628334970323, "learning_rate": 4.650647116361057e-06, "loss": 0.0504241943359375, "step": 72755 }, { "epoch": 0.6291342054975746, "grad_norm": 3.6426905435231753, "learning_rate": 4.650476990761064e-06, "loss": 0.08852882385253906, "step": 72760 }, { "epoch": 0.6291774390191178, "grad_norm": 1.6687409347197992, "learning_rate": 4.6503068575492144e-06, "loss": 0.24366378784179688, "step": 72765 }, { "epoch": 0.6292206725406612, "grad_norm": 2.804280135697225, "learning_rate": 4.650136716726296e-06, "loss": 0.07606048583984375, "step": 72770 }, { "epoch": 0.6292639060622044, "grad_norm": 1.5499392035079544, "learning_rate": 4.649966568293094e-06, "loss": 0.2405414581298828, "step": 72775 }, { "epoch": 0.6293071395837476, "grad_norm": 1.6255585454694608, "learning_rate": 4.6497964122503915e-06, "loss": 0.12297439575195312, "step": 72780 }, { "epoch": 0.629350373105291, "grad_norm": 26.005657952944823, "learning_rate": 4.649626248598972e-06, "loss": 0.20774993896484376, "step": 72785 }, { "epoch": 0.6293936066268342, "grad_norm": 12.609281083562406, "learning_rate": 4.6494560773396246e-06, "loss": 0.08129539489746093, "step": 72790 }, { "epoch": 0.6294368401483774, "grad_norm": 6.52964284732758, "learning_rate": 4.64928589847313e-06, "loss": 0.14706268310546874, "step": 72795 }, { "epoch": 0.6294800736699208, "grad_norm": 40.08630891820384, "learning_rate": 4.649115712000275e-06, "loss": 0.24681243896484376, "step": 72800 }, { "epoch": 0.629523307191464, "grad_norm": 10.20328448244542, "learning_rate": 4.648945517921844e-06, "loss": 0.165496826171875, "step": 72805 }, { "epoch": 0.6295665407130072, "grad_norm": 3.1929784892694215, "learning_rate": 4.648775316238622e-06, "loss": 0.0628854751586914, "step": 72810 }, { "epoch": 0.6296097742345506, "grad_norm": 11.527524757314817, "learning_rate": 4.648605106951394e-06, "loss": 0.24449625015258789, "step": 72815 }, { "epoch": 0.6296530077560938, "grad_norm": 0.9983435233196686, "learning_rate": 4.648434890060944e-06, "loss": 0.03585357666015625, "step": 72820 }, { "epoch": 0.629696241277637, "grad_norm": 11.271404885373203, "learning_rate": 4.64826466556806e-06, "loss": 0.13656005859375, "step": 72825 }, { "epoch": 0.6297394747991802, "grad_norm": 0.270002724036408, "learning_rate": 4.648094433473524e-06, "loss": 0.059835052490234374, "step": 72830 }, { "epoch": 0.6297827083207236, "grad_norm": 5.1376200678765, "learning_rate": 4.647924193778122e-06, "loss": 0.09339942932128906, "step": 72835 }, { "epoch": 0.6298259418422668, "grad_norm": 11.8464358999919, "learning_rate": 4.647753946482639e-06, "loss": 0.111553955078125, "step": 72840 }, { "epoch": 0.62986917536381, "grad_norm": 45.46390169816913, "learning_rate": 4.647583691587861e-06, "loss": 0.3226276397705078, "step": 72845 }, { "epoch": 0.6299124088853534, "grad_norm": 12.360037411371485, "learning_rate": 4.647413429094571e-06, "loss": 0.2329334259033203, "step": 72850 }, { "epoch": 0.6299556424068966, "grad_norm": 3.4911033713216866, "learning_rate": 4.647243159003557e-06, "loss": 0.07192230224609375, "step": 72855 }, { "epoch": 0.6299988759284398, "grad_norm": 16.310543649424087, "learning_rate": 4.647072881315603e-06, "loss": 0.1119537353515625, "step": 72860 }, { "epoch": 0.6300421094499832, "grad_norm": 3.1159750316218244, "learning_rate": 4.646902596031494e-06, "loss": 0.12158966064453125, "step": 72865 }, { "epoch": 0.6300853429715264, "grad_norm": 34.00658862218381, "learning_rate": 4.646732303152017e-06, "loss": 0.13615264892578124, "step": 72870 }, { "epoch": 0.6301285764930696, "grad_norm": 4.300855556981042, "learning_rate": 4.6465620026779556e-06, "loss": 0.04999046325683594, "step": 72875 }, { "epoch": 0.630171810014613, "grad_norm": 5.42012882582661, "learning_rate": 4.646391694610095e-06, "loss": 0.0725738525390625, "step": 72880 }, { "epoch": 0.6302150435361562, "grad_norm": 3.4271268472563783, "learning_rate": 4.6462213789492215e-06, "loss": 0.0385009765625, "step": 72885 }, { "epoch": 0.6302582770576994, "grad_norm": 9.842431330481986, "learning_rate": 4.646051055696122e-06, "loss": 0.055467987060546876, "step": 72890 }, { "epoch": 0.6303015105792428, "grad_norm": 2.4847352042221695, "learning_rate": 4.645880724851578e-06, "loss": 0.10238494873046874, "step": 72895 }, { "epoch": 0.630344744100786, "grad_norm": 0.6079070117176257, "learning_rate": 4.64571038641638e-06, "loss": 0.0942413330078125, "step": 72900 }, { "epoch": 0.6303879776223292, "grad_norm": 1.5512226237722289, "learning_rate": 4.645540040391309e-06, "loss": 0.196142578125, "step": 72905 }, { "epoch": 0.6304312111438725, "grad_norm": 0.39659554185299417, "learning_rate": 4.645369686777154e-06, "loss": 0.12905502319335938, "step": 72910 }, { "epoch": 0.6304744446654158, "grad_norm": 4.942569527702036, "learning_rate": 4.645199325574699e-06, "loss": 0.05400390625, "step": 72915 }, { "epoch": 0.630517678186959, "grad_norm": 17.703837238827614, "learning_rate": 4.64502895678473e-06, "loss": 0.26089935302734374, "step": 72920 }, { "epoch": 0.6305609117085023, "grad_norm": 28.73627345807239, "learning_rate": 4.644858580408033e-06, "loss": 0.27858505249023435, "step": 72925 }, { "epoch": 0.6306041452300456, "grad_norm": 9.020418177284498, "learning_rate": 4.644688196445393e-06, "loss": 0.10848541259765625, "step": 72930 }, { "epoch": 0.6306473787515888, "grad_norm": 0.3891086299308258, "learning_rate": 4.644517804897597e-06, "loss": 0.2316619873046875, "step": 72935 }, { "epoch": 0.6306906122731321, "grad_norm": 0.275293911359979, "learning_rate": 4.644347405765431e-06, "loss": 0.13135299682617188, "step": 72940 }, { "epoch": 0.6307338457946754, "grad_norm": 10.49775755935382, "learning_rate": 4.644176999049678e-06, "loss": 0.2096691131591797, "step": 72945 }, { "epoch": 0.6307770793162186, "grad_norm": 6.045568138544505, "learning_rate": 4.644006584751127e-06, "loss": 0.052996063232421876, "step": 72950 }, { "epoch": 0.6308203128377619, "grad_norm": 9.191799469871818, "learning_rate": 4.6438361628705635e-06, "loss": 0.21483345031738282, "step": 72955 }, { "epoch": 0.6308635463593052, "grad_norm": 3.988871862504563, "learning_rate": 4.643665733408772e-06, "loss": 0.252288818359375, "step": 72960 }, { "epoch": 0.6309067798808484, "grad_norm": 4.365997857476758, "learning_rate": 4.64349529636654e-06, "loss": 0.0595947265625, "step": 72965 }, { "epoch": 0.6309500134023917, "grad_norm": 5.063537003025891, "learning_rate": 4.6433248517446535e-06, "loss": 0.1469879150390625, "step": 72970 }, { "epoch": 0.630993246923935, "grad_norm": 8.865995140123466, "learning_rate": 4.643154399543897e-06, "loss": 0.25567970275878904, "step": 72975 }, { "epoch": 0.6310364804454782, "grad_norm": 2.068463312679183, "learning_rate": 4.642983939765057e-06, "loss": 0.0570709228515625, "step": 72980 }, { "epoch": 0.6310797139670214, "grad_norm": 4.727331523777294, "learning_rate": 4.6428134724089225e-06, "loss": 0.1199493408203125, "step": 72985 }, { "epoch": 0.6311229474885647, "grad_norm": 30.98845319359409, "learning_rate": 4.642642997476275e-06, "loss": 0.16135025024414062, "step": 72990 }, { "epoch": 0.631166181010108, "grad_norm": 13.12008112484613, "learning_rate": 4.642472514967904e-06, "loss": 0.047259521484375, "step": 72995 }, { "epoch": 0.6312094145316512, "grad_norm": 0.885042784579454, "learning_rate": 4.642302024884595e-06, "loss": 0.41246490478515624, "step": 73000 }, { "epoch": 0.6312526480531945, "grad_norm": 39.022588716122556, "learning_rate": 4.642131527227134e-06, "loss": 0.3107147216796875, "step": 73005 }, { "epoch": 0.6312958815747378, "grad_norm": 13.087922847397774, "learning_rate": 4.641961021996308e-06, "loss": 0.127655029296875, "step": 73010 }, { "epoch": 0.631339115096281, "grad_norm": 15.41564386390275, "learning_rate": 4.641790509192902e-06, "loss": 0.2200481414794922, "step": 73015 }, { "epoch": 0.6313823486178243, "grad_norm": 0.34651778143386985, "learning_rate": 4.641619988817704e-06, "loss": 0.2918067932128906, "step": 73020 }, { "epoch": 0.6314255821393676, "grad_norm": 0.9664670981158099, "learning_rate": 4.641449460871499e-06, "loss": 0.020013046264648438, "step": 73025 }, { "epoch": 0.6314688156609108, "grad_norm": 2.0782689628242035, "learning_rate": 4.641278925355075e-06, "loss": 0.05391845703125, "step": 73030 }, { "epoch": 0.6315120491824541, "grad_norm": 2.4540693590010427, "learning_rate": 4.641108382269217e-06, "loss": 0.035219573974609376, "step": 73035 }, { "epoch": 0.6315552827039974, "grad_norm": 11.943313673302756, "learning_rate": 4.640937831614713e-06, "loss": 0.1980712890625, "step": 73040 }, { "epoch": 0.6315985162255406, "grad_norm": 17.405114525374607, "learning_rate": 4.640767273392347e-06, "loss": 0.5714988708496094, "step": 73045 }, { "epoch": 0.6316417497470839, "grad_norm": 1.2512184452728299, "learning_rate": 4.640596707602908e-06, "loss": 0.028014373779296876, "step": 73050 }, { "epoch": 0.6316849832686272, "grad_norm": 15.620627815173796, "learning_rate": 4.640426134247183e-06, "loss": 0.4059154510498047, "step": 73055 }, { "epoch": 0.6317282167901704, "grad_norm": 2.2507967763780976, "learning_rate": 4.640255553325956e-06, "loss": 0.40847091674804686, "step": 73060 }, { "epoch": 0.6317714503117137, "grad_norm": 39.96860404301258, "learning_rate": 4.640084964840018e-06, "loss": 0.32172470092773436, "step": 73065 }, { "epoch": 0.631814683833257, "grad_norm": 7.083468578762124, "learning_rate": 4.6399143687901495e-06, "loss": 0.05097198486328125, "step": 73070 }, { "epoch": 0.6318579173548002, "grad_norm": 32.634180897855686, "learning_rate": 4.6397437651771435e-06, "loss": 0.194879150390625, "step": 73075 }, { "epoch": 0.6319011508763435, "grad_norm": 4.475262542106408, "learning_rate": 4.639573154001783e-06, "loss": 0.12809677124023439, "step": 73080 }, { "epoch": 0.6319443843978867, "grad_norm": 0.467974755666529, "learning_rate": 4.639402535264856e-06, "loss": 0.17307510375976562, "step": 73085 }, { "epoch": 0.63198761791943, "grad_norm": 27.91396334529479, "learning_rate": 4.63923190896715e-06, "loss": 0.18722114562988282, "step": 73090 }, { "epoch": 0.6320308514409733, "grad_norm": 12.452257430184314, "learning_rate": 4.6390612751094515e-06, "loss": 0.05996856689453125, "step": 73095 }, { "epoch": 0.6320740849625165, "grad_norm": 10.649647838405652, "learning_rate": 4.638890633692547e-06, "loss": 0.12464447021484375, "step": 73100 }, { "epoch": 0.6321173184840598, "grad_norm": 2.740655150636677, "learning_rate": 4.638719984717222e-06, "loss": 0.017354202270507813, "step": 73105 }, { "epoch": 0.6321605520056031, "grad_norm": 1.2454372186784533, "learning_rate": 4.638549328184267e-06, "loss": 0.06769981384277343, "step": 73110 }, { "epoch": 0.6322037855271463, "grad_norm": 9.053874479035304, "learning_rate": 4.638378664094466e-06, "loss": 0.4334503173828125, "step": 73115 }, { "epoch": 0.6322470190486896, "grad_norm": 34.890504156841445, "learning_rate": 4.638207992448609e-06, "loss": 0.21190338134765624, "step": 73120 }, { "epoch": 0.6322902525702329, "grad_norm": 0.9891849713995415, "learning_rate": 4.638037313247481e-06, "loss": 0.056049346923828125, "step": 73125 }, { "epoch": 0.6323334860917761, "grad_norm": 2.513597851047801, "learning_rate": 4.6378666264918694e-06, "loss": 0.05494537353515625, "step": 73130 }, { "epoch": 0.6323767196133194, "grad_norm": 29.41177407179236, "learning_rate": 4.637695932182562e-06, "loss": 0.19943161010742189, "step": 73135 }, { "epoch": 0.6324199531348627, "grad_norm": 17.686243443349007, "learning_rate": 4.637525230320345e-06, "loss": 0.1247039794921875, "step": 73140 }, { "epoch": 0.6324631866564059, "grad_norm": 6.220339211496853, "learning_rate": 4.637354520906006e-06, "loss": 0.053947830200195314, "step": 73145 }, { "epoch": 0.6325064201779492, "grad_norm": 30.86396956509311, "learning_rate": 4.637183803940333e-06, "loss": 0.10575637817382813, "step": 73150 }, { "epoch": 0.6325496536994925, "grad_norm": 12.70477515468389, "learning_rate": 4.637013079424114e-06, "loss": 0.10717697143554687, "step": 73155 }, { "epoch": 0.6325928872210357, "grad_norm": 16.424358986316367, "learning_rate": 4.636842347358135e-06, "loss": 0.24825286865234375, "step": 73160 }, { "epoch": 0.6326361207425789, "grad_norm": 2.9716637985642387, "learning_rate": 4.6366716077431825e-06, "loss": 0.2079010009765625, "step": 73165 }, { "epoch": 0.6326793542641223, "grad_norm": 18.857679917795483, "learning_rate": 4.636500860580046e-06, "loss": 0.18545608520507811, "step": 73170 }, { "epoch": 0.6327225877856655, "grad_norm": 22.53277245115451, "learning_rate": 4.636330105869513e-06, "loss": 0.3283935546875, "step": 73175 }, { "epoch": 0.6327658213072087, "grad_norm": 1.5889241054667467, "learning_rate": 4.636159343612369e-06, "loss": 0.19303359985351562, "step": 73180 }, { "epoch": 0.632809054828752, "grad_norm": 25.231669898969972, "learning_rate": 4.635988573809402e-06, "loss": 0.2497802734375, "step": 73185 }, { "epoch": 0.6328522883502953, "grad_norm": 2.435570959661678, "learning_rate": 4.635817796461402e-06, "loss": 0.273992919921875, "step": 73190 }, { "epoch": 0.6328955218718385, "grad_norm": 19.086554850032414, "learning_rate": 4.635647011569153e-06, "loss": 0.1429443359375, "step": 73195 }, { "epoch": 0.6329387553933818, "grad_norm": 3.964368241284076, "learning_rate": 4.635476219133446e-06, "loss": 0.18242359161376953, "step": 73200 }, { "epoch": 0.6329819889149251, "grad_norm": 9.28450592996281, "learning_rate": 4.635305419155067e-06, "loss": 0.386737060546875, "step": 73205 }, { "epoch": 0.6330252224364683, "grad_norm": 1.463526343493883, "learning_rate": 4.635134611634803e-06, "loss": 0.081396484375, "step": 73210 }, { "epoch": 0.6330684559580116, "grad_norm": 11.85385492669003, "learning_rate": 4.634963796573443e-06, "loss": 0.05062713623046875, "step": 73215 }, { "epoch": 0.6331116894795549, "grad_norm": 1.8325624858986078, "learning_rate": 4.634792973971775e-06, "loss": 0.11632843017578125, "step": 73220 }, { "epoch": 0.6331549230010981, "grad_norm": 0.4153647300466885, "learning_rate": 4.634622143830586e-06, "loss": 0.05176239013671875, "step": 73225 }, { "epoch": 0.6331981565226414, "grad_norm": 3.282387475310761, "learning_rate": 4.634451306150664e-06, "loss": 0.2034942626953125, "step": 73230 }, { "epoch": 0.6332413900441847, "grad_norm": 3.8243765962584892, "learning_rate": 4.634280460932797e-06, "loss": 0.023665618896484376, "step": 73235 }, { "epoch": 0.6332846235657279, "grad_norm": 5.217546977005335, "learning_rate": 4.634109608177773e-06, "loss": 0.11430511474609376, "step": 73240 }, { "epoch": 0.6333278570872712, "grad_norm": 14.987195201193302, "learning_rate": 4.633938747886379e-06, "loss": 0.2447174072265625, "step": 73245 }, { "epoch": 0.6333710906088145, "grad_norm": 44.018397892589654, "learning_rate": 4.633767880059406e-06, "loss": 0.3199066162109375, "step": 73250 }, { "epoch": 0.6334143241303577, "grad_norm": 17.78171451625636, "learning_rate": 4.633597004697638e-06, "loss": 0.25635986328125, "step": 73255 }, { "epoch": 0.6334575576519009, "grad_norm": 8.03577367829822, "learning_rate": 4.6334261218018655e-06, "loss": 0.13585205078125, "step": 73260 }, { "epoch": 0.6335007911734443, "grad_norm": 18.121459096304743, "learning_rate": 4.633255231372877e-06, "loss": 0.3723907470703125, "step": 73265 }, { "epoch": 0.6335440246949875, "grad_norm": 0.9986477749322462, "learning_rate": 4.6330843334114585e-06, "loss": 0.10592803955078126, "step": 73270 }, { "epoch": 0.6335872582165307, "grad_norm": 2.350220740094799, "learning_rate": 4.6329134279183995e-06, "loss": 0.064202880859375, "step": 73275 }, { "epoch": 0.6336304917380741, "grad_norm": 0.19134022885376895, "learning_rate": 4.632742514894488e-06, "loss": 0.18356170654296874, "step": 73280 }, { "epoch": 0.6336737252596173, "grad_norm": 16.438843379071855, "learning_rate": 4.632571594340513e-06, "loss": 0.14118385314941406, "step": 73285 }, { "epoch": 0.6337169587811605, "grad_norm": 5.82644297026451, "learning_rate": 4.632400666257261e-06, "loss": 0.5397171020507813, "step": 73290 }, { "epoch": 0.6337601923027039, "grad_norm": 1.8370083262858101, "learning_rate": 4.6322297306455225e-06, "loss": 0.027594757080078126, "step": 73295 }, { "epoch": 0.6338034258242471, "grad_norm": 4.063969140356677, "learning_rate": 4.632058787506084e-06, "loss": 0.231951904296875, "step": 73300 }, { "epoch": 0.6338466593457903, "grad_norm": 2.8673504599889483, "learning_rate": 4.631887836839735e-06, "loss": 0.041607666015625, "step": 73305 }, { "epoch": 0.6338898928673337, "grad_norm": 3.4695266238953386, "learning_rate": 4.631716878647263e-06, "loss": 0.06204833984375, "step": 73310 }, { "epoch": 0.6339331263888769, "grad_norm": 20.37618326329848, "learning_rate": 4.631545912929457e-06, "loss": 0.38868408203125, "step": 73315 }, { "epoch": 0.6339763599104201, "grad_norm": 1.389847818771827, "learning_rate": 4.6313749396871064e-06, "loss": 0.12081298828125, "step": 73320 }, { "epoch": 0.6340195934319635, "grad_norm": 12.866489551888694, "learning_rate": 4.6312039589209975e-06, "loss": 0.13554534912109376, "step": 73325 }, { "epoch": 0.6340628269535067, "grad_norm": 0.9760315703878264, "learning_rate": 4.631032970631921e-06, "loss": 0.04305877685546875, "step": 73330 }, { "epoch": 0.6341060604750499, "grad_norm": 1.1884784246391167, "learning_rate": 4.6308619748206635e-06, "loss": 0.0586090087890625, "step": 73335 }, { "epoch": 0.6341492939965931, "grad_norm": 3.0267125126888663, "learning_rate": 4.6306909714880154e-06, "loss": 0.11582260131835938, "step": 73340 }, { "epoch": 0.6341925275181365, "grad_norm": 57.97012069895924, "learning_rate": 4.630519960634764e-06, "loss": 0.22597503662109375, "step": 73345 }, { "epoch": 0.6342357610396797, "grad_norm": 26.612640415357035, "learning_rate": 4.6303489422616995e-06, "loss": 0.1190617561340332, "step": 73350 }, { "epoch": 0.6342789945612229, "grad_norm": 9.07510823970158, "learning_rate": 4.63017791636961e-06, "loss": 0.1108856201171875, "step": 73355 }, { "epoch": 0.6343222280827663, "grad_norm": 3.2757108333570337, "learning_rate": 4.630006882959283e-06, "loss": 0.116253662109375, "step": 73360 }, { "epoch": 0.6343654616043095, "grad_norm": 0.6059635795456554, "learning_rate": 4.629835842031508e-06, "loss": 0.07918243408203125, "step": 73365 }, { "epoch": 0.6344086951258527, "grad_norm": 0.17779846046540868, "learning_rate": 4.629664793587075e-06, "loss": 0.08547821044921874, "step": 73370 }, { "epoch": 0.6344519286473961, "grad_norm": 6.926373112628298, "learning_rate": 4.629493737626772e-06, "loss": 0.06578216552734376, "step": 73375 }, { "epoch": 0.6344951621689393, "grad_norm": 27.667204029374464, "learning_rate": 4.629322674151387e-06, "loss": 0.2132537841796875, "step": 73380 }, { "epoch": 0.6345383956904825, "grad_norm": 5.7712028143278316, "learning_rate": 4.629151603161711e-06, "loss": 0.077783203125, "step": 73385 }, { "epoch": 0.6345816292120259, "grad_norm": 1.0845115133095395, "learning_rate": 4.62898052465853e-06, "loss": 0.27181053161621094, "step": 73390 }, { "epoch": 0.6346248627335691, "grad_norm": 0.9419844181198621, "learning_rate": 4.6288094386426365e-06, "loss": 0.25663299560546876, "step": 73395 }, { "epoch": 0.6346680962551123, "grad_norm": 0.28866226313349325, "learning_rate": 4.6286383451148175e-06, "loss": 0.1354095458984375, "step": 73400 }, { "epoch": 0.6347113297766557, "grad_norm": 2.4071310153263648, "learning_rate": 4.628467244075861e-06, "loss": 0.18293533325195313, "step": 73405 }, { "epoch": 0.6347545632981989, "grad_norm": 33.330849460750564, "learning_rate": 4.628296135526559e-06, "loss": 0.12055320739746093, "step": 73410 }, { "epoch": 0.6347977968197421, "grad_norm": 2.564531556112521, "learning_rate": 4.6281250194676986e-06, "loss": 0.15427398681640625, "step": 73415 }, { "epoch": 0.6348410303412855, "grad_norm": 10.103229002223749, "learning_rate": 4.62795389590007e-06, "loss": 0.11287193298339844, "step": 73420 }, { "epoch": 0.6348842638628287, "grad_norm": 1.5061758264008835, "learning_rate": 4.627782764824461e-06, "loss": 0.38090267181396487, "step": 73425 }, { "epoch": 0.6349274973843719, "grad_norm": 4.702012460607804, "learning_rate": 4.627611626241662e-06, "loss": 0.0402587890625, "step": 73430 }, { "epoch": 0.6349707309059152, "grad_norm": 2.7948433453594963, "learning_rate": 4.627440480152463e-06, "loss": 0.0486053466796875, "step": 73435 }, { "epoch": 0.6350139644274585, "grad_norm": 13.261363173912937, "learning_rate": 4.6272693265576505e-06, "loss": 0.06224822998046875, "step": 73440 }, { "epoch": 0.6350571979490017, "grad_norm": 14.947584674445622, "learning_rate": 4.627098165458016e-06, "loss": 0.06996917724609375, "step": 73445 }, { "epoch": 0.635100431470545, "grad_norm": 1.439982879096766, "learning_rate": 4.626926996854349e-06, "loss": 0.2915641784667969, "step": 73450 }, { "epoch": 0.6351436649920883, "grad_norm": 6.243833111830772, "learning_rate": 4.626755820747439e-06, "loss": 0.10009918212890626, "step": 73455 }, { "epoch": 0.6351868985136315, "grad_norm": 0.5286546106761199, "learning_rate": 4.626584637138075e-06, "loss": 0.0173828125, "step": 73460 }, { "epoch": 0.6352301320351748, "grad_norm": 5.892540232259591, "learning_rate": 4.626413446027046e-06, "loss": 0.09618682861328125, "step": 73465 }, { "epoch": 0.6352733655567181, "grad_norm": 0.9991340892572853, "learning_rate": 4.626242247415141e-06, "loss": 0.0515045166015625, "step": 73470 }, { "epoch": 0.6353165990782613, "grad_norm": 14.626090104013498, "learning_rate": 4.626071041303151e-06, "loss": 0.3661346435546875, "step": 73475 }, { "epoch": 0.6353598325998046, "grad_norm": 18.58839594632303, "learning_rate": 4.625899827691866e-06, "loss": 0.13644256591796874, "step": 73480 }, { "epoch": 0.6354030661213479, "grad_norm": 3.280821358450475, "learning_rate": 4.625728606582073e-06, "loss": 0.09921722412109375, "step": 73485 }, { "epoch": 0.6354462996428911, "grad_norm": 6.249670098465486, "learning_rate": 4.625557377974565e-06, "loss": 0.22619667053222656, "step": 73490 }, { "epoch": 0.6354895331644344, "grad_norm": 29.341468771255215, "learning_rate": 4.625386141870129e-06, "loss": 0.2490142822265625, "step": 73495 }, { "epoch": 0.6355327666859777, "grad_norm": 3.3936274003133855, "learning_rate": 4.625214898269555e-06, "loss": 0.4287078857421875, "step": 73500 }, { "epoch": 0.6355760002075209, "grad_norm": 13.057685704487948, "learning_rate": 4.625043647173635e-06, "loss": 0.16704940795898438, "step": 73505 }, { "epoch": 0.6356192337290641, "grad_norm": 12.668064621170718, "learning_rate": 4.6248723885831565e-06, "loss": 0.06367416381835937, "step": 73510 }, { "epoch": 0.6356624672506074, "grad_norm": 1.3544389562749488, "learning_rate": 4.6247011224989105e-06, "loss": 0.22695846557617189, "step": 73515 }, { "epoch": 0.6357057007721507, "grad_norm": 2.2129874916100185, "learning_rate": 4.624529848921686e-06, "loss": 0.0701263427734375, "step": 73520 }, { "epoch": 0.635748934293694, "grad_norm": 3.515470102477701, "learning_rate": 4.624358567852274e-06, "loss": 0.036936187744140626, "step": 73525 }, { "epoch": 0.6357921678152372, "grad_norm": 2.077129620744728, "learning_rate": 4.624187279291464e-06, "loss": 0.09942245483398438, "step": 73530 }, { "epoch": 0.6358354013367805, "grad_norm": 1.4102577784292594, "learning_rate": 4.624015983240044e-06, "loss": 0.3118896484375, "step": 73535 }, { "epoch": 0.6358786348583237, "grad_norm": 4.448308223284196, "learning_rate": 4.623844679698808e-06, "loss": 0.14227676391601562, "step": 73540 }, { "epoch": 0.635921868379867, "grad_norm": 0.291860403363193, "learning_rate": 4.623673368668543e-06, "loss": 0.053258514404296874, "step": 73545 }, { "epoch": 0.6359651019014103, "grad_norm": 17.446972491625157, "learning_rate": 4.6235020501500395e-06, "loss": 0.3575897216796875, "step": 73550 }, { "epoch": 0.6360083354229535, "grad_norm": 9.102298486989957, "learning_rate": 4.623330724144088e-06, "loss": 0.3917694091796875, "step": 73555 }, { "epoch": 0.6360515689444968, "grad_norm": 0.8680617845075123, "learning_rate": 4.62315939065148e-06, "loss": 0.1655498504638672, "step": 73560 }, { "epoch": 0.6360948024660401, "grad_norm": 0.4792586826141573, "learning_rate": 4.622988049673002e-06, "loss": 0.25936546325683596, "step": 73565 }, { "epoch": 0.6361380359875833, "grad_norm": 13.96379153173553, "learning_rate": 4.622816701209449e-06, "loss": 0.26243896484375, "step": 73570 }, { "epoch": 0.6361812695091266, "grad_norm": 0.31883890680561683, "learning_rate": 4.622645345261607e-06, "loss": 0.25216522216796877, "step": 73575 }, { "epoch": 0.6362245030306699, "grad_norm": 7.689249541323969, "learning_rate": 4.622473981830268e-06, "loss": 0.24454803466796876, "step": 73580 }, { "epoch": 0.6362677365522131, "grad_norm": 1.2223477780499172, "learning_rate": 4.622302610916224e-06, "loss": 0.21002197265625, "step": 73585 }, { "epoch": 0.6363109700737564, "grad_norm": 24.203382128129224, "learning_rate": 4.622131232520263e-06, "loss": 0.15887298583984374, "step": 73590 }, { "epoch": 0.6363542035952997, "grad_norm": 3.8046682456849434, "learning_rate": 4.621959846643175e-06, "loss": 0.11286964416503906, "step": 73595 }, { "epoch": 0.6363974371168429, "grad_norm": 49.91309096482506, "learning_rate": 4.6217884532857515e-06, "loss": 0.3956329345703125, "step": 73600 }, { "epoch": 0.6364406706383862, "grad_norm": 62.45958547746522, "learning_rate": 4.621617052448785e-06, "loss": 0.2956264495849609, "step": 73605 }, { "epoch": 0.6364839041599294, "grad_norm": 8.07492921297128, "learning_rate": 4.621445644133062e-06, "loss": 0.105743408203125, "step": 73610 }, { "epoch": 0.6365271376814727, "grad_norm": 4.6069443180516405, "learning_rate": 4.621274228339375e-06, "loss": 0.17231597900390624, "step": 73615 }, { "epoch": 0.636570371203016, "grad_norm": 26.33878775293894, "learning_rate": 4.621102805068515e-06, "loss": 0.17017974853515624, "step": 73620 }, { "epoch": 0.6366136047245592, "grad_norm": 28.052209010910953, "learning_rate": 4.6209313743212734e-06, "loss": 0.12615318298339845, "step": 73625 }, { "epoch": 0.6366568382461025, "grad_norm": 0.863511910820279, "learning_rate": 4.620759936098437e-06, "loss": 0.18549041748046874, "step": 73630 }, { "epoch": 0.6367000717676458, "grad_norm": 6.767912741866478, "learning_rate": 4.620588490400801e-06, "loss": 0.16337127685546876, "step": 73635 }, { "epoch": 0.636743305289189, "grad_norm": 1.0099760301580694, "learning_rate": 4.6204170372291535e-06, "loss": 0.13145751953125, "step": 73640 }, { "epoch": 0.6367865388107323, "grad_norm": 19.375881188780856, "learning_rate": 4.6202455765842854e-06, "loss": 0.10125961303710937, "step": 73645 }, { "epoch": 0.6368297723322756, "grad_norm": 2.5768914577735402, "learning_rate": 4.6200741084669885e-06, "loss": 0.0887298583984375, "step": 73650 }, { "epoch": 0.6368730058538188, "grad_norm": 9.938053341809166, "learning_rate": 4.619902632878053e-06, "loss": 0.05966110229492187, "step": 73655 }, { "epoch": 0.6369162393753621, "grad_norm": 39.33786067039568, "learning_rate": 4.619731149818269e-06, "loss": 0.4162841796875, "step": 73660 }, { "epoch": 0.6369594728969054, "grad_norm": 103.10205655707715, "learning_rate": 4.619559659288428e-06, "loss": 0.7772087097167969, "step": 73665 }, { "epoch": 0.6370027064184486, "grad_norm": 5.147079199995623, "learning_rate": 4.619388161289322e-06, "loss": 0.19932861328125, "step": 73670 }, { "epoch": 0.6370459399399919, "grad_norm": 26.265727923197606, "learning_rate": 4.61921665582174e-06, "loss": 0.1382354736328125, "step": 73675 }, { "epoch": 0.6370891734615352, "grad_norm": 30.759715723149718, "learning_rate": 4.619045142886475e-06, "loss": 0.37249755859375, "step": 73680 }, { "epoch": 0.6371324069830784, "grad_norm": 0.5798786331422464, "learning_rate": 4.618873622484315e-06, "loss": 0.12337493896484375, "step": 73685 }, { "epoch": 0.6371756405046216, "grad_norm": 10.432549271691837, "learning_rate": 4.618702094616054e-06, "loss": 0.29149169921875, "step": 73690 }, { "epoch": 0.637218874026165, "grad_norm": 28.896987600468883, "learning_rate": 4.618530559282482e-06, "loss": 0.18388290405273439, "step": 73695 }, { "epoch": 0.6372621075477082, "grad_norm": 0.6121304399487201, "learning_rate": 4.6183590164843894e-06, "loss": 0.10250740051269532, "step": 73700 }, { "epoch": 0.6373053410692514, "grad_norm": 1.5508096439083539, "learning_rate": 4.61818746622257e-06, "loss": 0.283428955078125, "step": 73705 }, { "epoch": 0.6373485745907947, "grad_norm": 1.199649254199831, "learning_rate": 4.618015908497811e-06, "loss": 0.05492706298828125, "step": 73710 }, { "epoch": 0.637391808112338, "grad_norm": 3.1252780258837043, "learning_rate": 4.617844343310906e-06, "loss": 0.25127449035644533, "step": 73715 }, { "epoch": 0.6374350416338812, "grad_norm": 29.317880062939263, "learning_rate": 4.617672770662646e-06, "loss": 0.29482421875, "step": 73720 }, { "epoch": 0.6374782751554245, "grad_norm": 0.5607618318054434, "learning_rate": 4.617501190553822e-06, "loss": 0.506109619140625, "step": 73725 }, { "epoch": 0.6375215086769678, "grad_norm": 58.84917029660644, "learning_rate": 4.617329602985226e-06, "loss": 0.6729568481445313, "step": 73730 }, { "epoch": 0.637564742198511, "grad_norm": 8.953717815243342, "learning_rate": 4.617158007957648e-06, "loss": 0.1438507080078125, "step": 73735 }, { "epoch": 0.6376079757200543, "grad_norm": 2.067251751061946, "learning_rate": 4.61698640547188e-06, "loss": 0.0503387451171875, "step": 73740 }, { "epoch": 0.6376512092415976, "grad_norm": 1.9903791521285066, "learning_rate": 4.616814795528714e-06, "loss": 0.10449371337890626, "step": 73745 }, { "epoch": 0.6376944427631408, "grad_norm": 2.088442529678833, "learning_rate": 4.616643178128942e-06, "loss": 0.166143798828125, "step": 73750 }, { "epoch": 0.6377376762846841, "grad_norm": 7.099687586937969, "learning_rate": 4.616471553273352e-06, "loss": 0.17095947265625, "step": 73755 }, { "epoch": 0.6377809098062274, "grad_norm": 1.6617055905226934, "learning_rate": 4.616299920962741e-06, "loss": 0.03905410766601562, "step": 73760 }, { "epoch": 0.6378241433277706, "grad_norm": 0.17411917039981267, "learning_rate": 4.616128281197896e-06, "loss": 0.18738937377929688, "step": 73765 }, { "epoch": 0.6378673768493139, "grad_norm": 0.5643278003185719, "learning_rate": 4.61595663397961e-06, "loss": 0.18669891357421875, "step": 73770 }, { "epoch": 0.6379106103708572, "grad_norm": 2.3081572331770532, "learning_rate": 4.6157849793086755e-06, "loss": 0.3494964599609375, "step": 73775 }, { "epoch": 0.6379538438924004, "grad_norm": 22.491247328361325, "learning_rate": 4.6156133171858825e-06, "loss": 0.24088134765625, "step": 73780 }, { "epoch": 0.6379970774139436, "grad_norm": 2.422794388602679, "learning_rate": 4.615441647612025e-06, "loss": 0.1484375, "step": 73785 }, { "epoch": 0.638040310935487, "grad_norm": 15.848836220779408, "learning_rate": 4.615269970587892e-06, "loss": 0.19511566162109376, "step": 73790 }, { "epoch": 0.6380835444570302, "grad_norm": 18.530804516044757, "learning_rate": 4.6150982861142775e-06, "loss": 0.2775531768798828, "step": 73795 }, { "epoch": 0.6381267779785734, "grad_norm": 9.25465103778052, "learning_rate": 4.614926594191973e-06, "loss": 0.06426258087158203, "step": 73800 }, { "epoch": 0.6381700115001168, "grad_norm": 0.918705392985054, "learning_rate": 4.614754894821768e-06, "loss": 0.08963165283203126, "step": 73805 }, { "epoch": 0.63821324502166, "grad_norm": 4.6836198565859855, "learning_rate": 4.614583188004457e-06, "loss": 0.22214202880859374, "step": 73810 }, { "epoch": 0.6382564785432032, "grad_norm": 4.402001675708031, "learning_rate": 4.614411473740832e-06, "loss": 0.100531005859375, "step": 73815 }, { "epoch": 0.6382997120647466, "grad_norm": 4.968776055367012, "learning_rate": 4.614239752031682e-06, "loss": 0.20607528686523438, "step": 73820 }, { "epoch": 0.6383429455862898, "grad_norm": 3.397065026145923, "learning_rate": 4.614068022877802e-06, "loss": 0.065338134765625, "step": 73825 }, { "epoch": 0.638386179107833, "grad_norm": 10.8294863407646, "learning_rate": 4.613896286279983e-06, "loss": 0.17059478759765626, "step": 73830 }, { "epoch": 0.6384294126293764, "grad_norm": 15.250047270436665, "learning_rate": 4.613724542239016e-06, "loss": 0.103631591796875, "step": 73835 }, { "epoch": 0.6384726461509196, "grad_norm": 1.7241265086314583, "learning_rate": 4.613552790755695e-06, "loss": 0.18320770263671876, "step": 73840 }, { "epoch": 0.6385158796724628, "grad_norm": 28.656104500260078, "learning_rate": 4.613381031830811e-06, "loss": 0.3641510009765625, "step": 73845 }, { "epoch": 0.6385591131940062, "grad_norm": 189.1435942092042, "learning_rate": 4.613209265465156e-06, "loss": 0.15585479736328126, "step": 73850 }, { "epoch": 0.6386023467155494, "grad_norm": 2.8527320627940385, "learning_rate": 4.613037491659522e-06, "loss": 0.2482269287109375, "step": 73855 }, { "epoch": 0.6386455802370926, "grad_norm": 0.5943741454866135, "learning_rate": 4.612865710414703e-06, "loss": 0.26182861328125, "step": 73860 }, { "epoch": 0.6386888137586358, "grad_norm": 19.199807449830335, "learning_rate": 4.6126939217314885e-06, "loss": 0.081298828125, "step": 73865 }, { "epoch": 0.6387320472801792, "grad_norm": 6.309282535103507, "learning_rate": 4.612522125610672e-06, "loss": 0.04735107421875, "step": 73870 }, { "epoch": 0.6387752808017224, "grad_norm": 25.235523509838675, "learning_rate": 4.612350322053047e-06, "loss": 0.40810546875, "step": 73875 }, { "epoch": 0.6388185143232656, "grad_norm": 1.573155001972207, "learning_rate": 4.612178511059404e-06, "loss": 0.06175155639648437, "step": 73880 }, { "epoch": 0.638861747844809, "grad_norm": 1.9671721618439941, "learning_rate": 4.612006692630537e-06, "loss": 0.04042205810546875, "step": 73885 }, { "epoch": 0.6389049813663522, "grad_norm": 18.987832719581974, "learning_rate": 4.611834866767236e-06, "loss": 0.257061767578125, "step": 73890 }, { "epoch": 0.6389482148878954, "grad_norm": 0.3258991243791618, "learning_rate": 4.611663033470297e-06, "loss": 0.07346229553222657, "step": 73895 }, { "epoch": 0.6389914484094388, "grad_norm": 16.445239837843825, "learning_rate": 4.611491192740509e-06, "loss": 0.07132949829101562, "step": 73900 }, { "epoch": 0.639034681930982, "grad_norm": 5.7957900513005525, "learning_rate": 4.611319344578668e-06, "loss": 0.16121482849121094, "step": 73905 }, { "epoch": 0.6390779154525252, "grad_norm": 12.791837243138566, "learning_rate": 4.611147488985562e-06, "loss": 0.11405506134033203, "step": 73910 }, { "epoch": 0.6391211489740686, "grad_norm": 5.584480061911928, "learning_rate": 4.610975625961987e-06, "loss": 0.0244110107421875, "step": 73915 }, { "epoch": 0.6391643824956118, "grad_norm": 8.93373205397205, "learning_rate": 4.610803755508735e-06, "loss": 0.08374862670898438, "step": 73920 }, { "epoch": 0.639207616017155, "grad_norm": 2.388766484974075, "learning_rate": 4.610631877626598e-06, "loss": 0.1804290771484375, "step": 73925 }, { "epoch": 0.6392508495386984, "grad_norm": 16.446933403303007, "learning_rate": 4.61045999231637e-06, "loss": 0.14843597412109374, "step": 73930 }, { "epoch": 0.6392940830602416, "grad_norm": 5.295984646359416, "learning_rate": 4.610288099578843e-06, "loss": 0.23493690490722657, "step": 73935 }, { "epoch": 0.6393373165817848, "grad_norm": 11.285302838799184, "learning_rate": 4.610116199414808e-06, "loss": 0.137969970703125, "step": 73940 }, { "epoch": 0.6393805501033282, "grad_norm": 5.737057574458202, "learning_rate": 4.6099442918250604e-06, "loss": 0.12243499755859374, "step": 73945 }, { "epoch": 0.6394237836248714, "grad_norm": 1.681938147744449, "learning_rate": 4.609772376810392e-06, "loss": 0.1863574981689453, "step": 73950 }, { "epoch": 0.6394670171464146, "grad_norm": 6.283549524944497, "learning_rate": 4.609600454371596e-06, "loss": 0.0981719970703125, "step": 73955 }, { "epoch": 0.6395102506679579, "grad_norm": 1.799247960275507, "learning_rate": 4.609428524509465e-06, "loss": 0.093377685546875, "step": 73960 }, { "epoch": 0.6395534841895012, "grad_norm": 8.214447820423487, "learning_rate": 4.60925658722479e-06, "loss": 0.10438919067382812, "step": 73965 }, { "epoch": 0.6395967177110444, "grad_norm": 0.13672002881628117, "learning_rate": 4.609084642518368e-06, "loss": 0.13007965087890624, "step": 73970 }, { "epoch": 0.6396399512325877, "grad_norm": 5.977618937554442, "learning_rate": 4.608912690390988e-06, "loss": 0.1655487060546875, "step": 73975 }, { "epoch": 0.639683184754131, "grad_norm": 1.397979583072532, "learning_rate": 4.6087407308434455e-06, "loss": 0.12087974548339844, "step": 73980 }, { "epoch": 0.6397264182756742, "grad_norm": 0.2465218580372998, "learning_rate": 4.608568763876534e-06, "loss": 0.05202932357788086, "step": 73985 }, { "epoch": 0.6397696517972175, "grad_norm": 0.8234742550984498, "learning_rate": 4.608396789491045e-06, "loss": 0.03896331787109375, "step": 73990 }, { "epoch": 0.6398128853187608, "grad_norm": 2.017957915277597, "learning_rate": 4.6082248076877705e-06, "loss": 0.13271141052246094, "step": 73995 }, { "epoch": 0.639856118840304, "grad_norm": 0.8882997457330047, "learning_rate": 4.608052818467507e-06, "loss": 0.14520645141601562, "step": 74000 }, { "epoch": 0.6398993523618473, "grad_norm": 29.87836001365475, "learning_rate": 4.607880821831046e-06, "loss": 0.36897735595703124, "step": 74005 }, { "epoch": 0.6399425858833906, "grad_norm": 4.355386143550085, "learning_rate": 4.60770881777918e-06, "loss": 0.0562286376953125, "step": 74010 }, { "epoch": 0.6399858194049338, "grad_norm": 26.54860112559389, "learning_rate": 4.607536806312704e-06, "loss": 0.10748443603515626, "step": 74015 }, { "epoch": 0.640029052926477, "grad_norm": 0.9938643287257753, "learning_rate": 4.607364787432409e-06, "loss": 0.08958511352539063, "step": 74020 }, { "epoch": 0.6400722864480204, "grad_norm": 30.98810545863923, "learning_rate": 4.6071927611390904e-06, "loss": 0.4216339111328125, "step": 74025 }, { "epoch": 0.6401155199695636, "grad_norm": 0.9759882723444416, "learning_rate": 4.607020727433541e-06, "loss": 0.1329345703125, "step": 74030 }, { "epoch": 0.6401587534911068, "grad_norm": 11.822282781717997, "learning_rate": 4.6068486863165535e-06, "loss": 0.1832916259765625, "step": 74035 }, { "epoch": 0.6402019870126501, "grad_norm": 7.008883881245935, "learning_rate": 4.606676637788922e-06, "loss": 0.08012847900390625, "step": 74040 }, { "epoch": 0.6402452205341934, "grad_norm": 2.2266687736149766, "learning_rate": 4.6065045818514395e-06, "loss": 0.047715377807617185, "step": 74045 }, { "epoch": 0.6402884540557366, "grad_norm": 3.632680138844389, "learning_rate": 4.6063325185049e-06, "loss": 0.037149810791015626, "step": 74050 }, { "epoch": 0.6403316875772799, "grad_norm": 2.512424162100486, "learning_rate": 4.606160447750096e-06, "loss": 0.11000900268554688, "step": 74055 }, { "epoch": 0.6403749210988232, "grad_norm": 4.234452574116374, "learning_rate": 4.605988369587823e-06, "loss": 0.03324127197265625, "step": 74060 }, { "epoch": 0.6404181546203664, "grad_norm": 8.66857606551382, "learning_rate": 4.605816284018874e-06, "loss": 0.176312255859375, "step": 74065 }, { "epoch": 0.6404613881419097, "grad_norm": 0.2654776500265815, "learning_rate": 4.605644191044042e-06, "loss": 0.45424880981445315, "step": 74070 }, { "epoch": 0.640504621663453, "grad_norm": 4.117843240539004, "learning_rate": 4.60547209066412e-06, "loss": 0.03013153076171875, "step": 74075 }, { "epoch": 0.6405478551849962, "grad_norm": 0.15968708080361754, "learning_rate": 4.605299982879903e-06, "loss": 0.19935264587402343, "step": 74080 }, { "epoch": 0.6405910887065395, "grad_norm": 10.418952327771706, "learning_rate": 4.605127867692184e-06, "loss": 0.07493782043457031, "step": 74085 }, { "epoch": 0.6406343222280828, "grad_norm": 19.02765860487868, "learning_rate": 4.604955745101757e-06, "loss": 0.0921173095703125, "step": 74090 }, { "epoch": 0.640677555749626, "grad_norm": 2.507621997139112, "learning_rate": 4.604783615109418e-06, "loss": 0.13766708374023437, "step": 74095 }, { "epoch": 0.6407207892711693, "grad_norm": 0.12169706257396254, "learning_rate": 4.604611477715957e-06, "loss": 0.34311904907226565, "step": 74100 }, { "epoch": 0.6407640227927126, "grad_norm": 10.11415683225373, "learning_rate": 4.604439332922169e-06, "loss": 0.138641357421875, "step": 74105 }, { "epoch": 0.6408072563142558, "grad_norm": 1.6006941351740085, "learning_rate": 4.60426718072885e-06, "loss": 0.2416595458984375, "step": 74110 }, { "epoch": 0.6408504898357991, "grad_norm": 1.2251342922627901, "learning_rate": 4.604095021136792e-06, "loss": 0.0476715087890625, "step": 74115 }, { "epoch": 0.6408937233573424, "grad_norm": 11.716549560181056, "learning_rate": 4.6039228541467885e-06, "loss": 0.4826904296875, "step": 74120 }, { "epoch": 0.6409369568788856, "grad_norm": 3.543546519976372, "learning_rate": 4.6037506797596354e-06, "loss": 0.1261016845703125, "step": 74125 }, { "epoch": 0.6409801904004289, "grad_norm": 2.6178490592539454, "learning_rate": 4.603578497976126e-06, "loss": 0.31849365234375, "step": 74130 }, { "epoch": 0.6410234239219721, "grad_norm": 22.449286085783907, "learning_rate": 4.603406308797055e-06, "loss": 0.16772689819335937, "step": 74135 }, { "epoch": 0.6410666574435154, "grad_norm": 1.2839644690871708, "learning_rate": 4.603234112223214e-06, "loss": 0.1423248291015625, "step": 74140 }, { "epoch": 0.6411098909650587, "grad_norm": 1.799600393838962, "learning_rate": 4.6030619082554e-06, "loss": 0.038959503173828125, "step": 74145 }, { "epoch": 0.6411531244866019, "grad_norm": 5.363835593582387, "learning_rate": 4.602889696894407e-06, "loss": 0.09333000183105469, "step": 74150 }, { "epoch": 0.6411963580081452, "grad_norm": 0.3111166615608293, "learning_rate": 4.602717478141027e-06, "loss": 0.1097412109375, "step": 74155 }, { "epoch": 0.6412395915296885, "grad_norm": 27.25373878201681, "learning_rate": 4.602545251996056e-06, "loss": 0.12701950073242188, "step": 74160 }, { "epoch": 0.6412828250512317, "grad_norm": 3.6513790825955543, "learning_rate": 4.602373018460287e-06, "loss": 0.416070556640625, "step": 74165 }, { "epoch": 0.641326058572775, "grad_norm": 45.031001938078916, "learning_rate": 4.602200777534516e-06, "loss": 0.5077552795410156, "step": 74170 }, { "epoch": 0.6413692920943183, "grad_norm": 32.25576161974131, "learning_rate": 4.602028529219537e-06, "loss": 0.19942703247070312, "step": 74175 }, { "epoch": 0.6414125256158615, "grad_norm": 6.717105928187041, "learning_rate": 4.601856273516145e-06, "loss": 0.10433197021484375, "step": 74180 }, { "epoch": 0.6414557591374048, "grad_norm": 4.713331871223265, "learning_rate": 4.601684010425131e-06, "loss": 0.09390411376953126, "step": 74185 }, { "epoch": 0.641498992658948, "grad_norm": 4.519374725585295, "learning_rate": 4.601511739947293e-06, "loss": 0.19206771850585938, "step": 74190 }, { "epoch": 0.6415422261804913, "grad_norm": 11.790380505008683, "learning_rate": 4.601339462083425e-06, "loss": 0.193719482421875, "step": 74195 }, { "epoch": 0.6415854597020346, "grad_norm": 5.229423307156536, "learning_rate": 4.6011671768343194e-06, "loss": 0.24835052490234374, "step": 74200 }, { "epoch": 0.6416286932235779, "grad_norm": 3.6740915368321154, "learning_rate": 4.600994884200773e-06, "loss": 0.18375701904296876, "step": 74205 }, { "epoch": 0.6416719267451211, "grad_norm": 1.2115423669578462, "learning_rate": 4.600822584183581e-06, "loss": 0.40708160400390625, "step": 74210 }, { "epoch": 0.6417151602666643, "grad_norm": 3.5172197007295583, "learning_rate": 4.600650276783535e-06, "loss": 0.08717327117919922, "step": 74215 }, { "epoch": 0.6417583937882076, "grad_norm": 0.09590306111383766, "learning_rate": 4.600477962001432e-06, "loss": 0.06719493865966797, "step": 74220 }, { "epoch": 0.6418016273097509, "grad_norm": 1.696834859392481, "learning_rate": 4.600305639838065e-06, "loss": 0.12809600830078124, "step": 74225 }, { "epoch": 0.6418448608312941, "grad_norm": 3.5304957745734953, "learning_rate": 4.6001333102942314e-06, "loss": 0.05395660400390625, "step": 74230 }, { "epoch": 0.6418880943528374, "grad_norm": 0.8965776513418446, "learning_rate": 4.599960973370723e-06, "loss": 0.16446418762207032, "step": 74235 }, { "epoch": 0.6419313278743807, "grad_norm": 0.254927710478059, "learning_rate": 4.599788629068337e-06, "loss": 0.0937164306640625, "step": 74240 }, { "epoch": 0.6419745613959239, "grad_norm": 27.066178153353615, "learning_rate": 4.599616277387867e-06, "loss": 0.16227455139160157, "step": 74245 }, { "epoch": 0.6420177949174672, "grad_norm": 10.067114868087046, "learning_rate": 4.599443918330107e-06, "loss": 0.2726264953613281, "step": 74250 }, { "epoch": 0.6420610284390105, "grad_norm": 1.9451696223608568, "learning_rate": 4.599271551895854e-06, "loss": 0.11338567733764648, "step": 74255 }, { "epoch": 0.6421042619605537, "grad_norm": 0.8316348211716558, "learning_rate": 4.599099178085903e-06, "loss": 0.10381660461425782, "step": 74260 }, { "epoch": 0.642147495482097, "grad_norm": 5.8179948277417, "learning_rate": 4.598926796901045e-06, "loss": 0.13802947998046874, "step": 74265 }, { "epoch": 0.6421907290036403, "grad_norm": 2.0215088998644895, "learning_rate": 4.59875440834208e-06, "loss": 0.9203479766845704, "step": 74270 }, { "epoch": 0.6422339625251835, "grad_norm": 15.282754236888785, "learning_rate": 4.598582012409801e-06, "loss": 0.168084716796875, "step": 74275 }, { "epoch": 0.6422771960467268, "grad_norm": 2.0112006472977106, "learning_rate": 4.598409609105002e-06, "loss": 0.12069129943847656, "step": 74280 }, { "epoch": 0.6423204295682701, "grad_norm": 17.984275021613147, "learning_rate": 4.598237198428479e-06, "loss": 0.10660247802734375, "step": 74285 }, { "epoch": 0.6423636630898133, "grad_norm": 5.012769901091029, "learning_rate": 4.598064780381029e-06, "loss": 0.2563484191894531, "step": 74290 }, { "epoch": 0.6424068966113565, "grad_norm": 7.733704346849398, "learning_rate": 4.5978923549634446e-06, "loss": 0.11615943908691406, "step": 74295 }, { "epoch": 0.6424501301328999, "grad_norm": 28.965749705289735, "learning_rate": 4.597719922176521e-06, "loss": 0.09712944030761719, "step": 74300 }, { "epoch": 0.6424933636544431, "grad_norm": 3.802541918628115, "learning_rate": 4.597547482021054e-06, "loss": 0.0669769287109375, "step": 74305 }, { "epoch": 0.6425365971759863, "grad_norm": 30.537375678387605, "learning_rate": 4.597375034497841e-06, "loss": 0.38060569763183594, "step": 74310 }, { "epoch": 0.6425798306975297, "grad_norm": 0.6382260774481742, "learning_rate": 4.597202579607674e-06, "loss": 0.05013427734375, "step": 74315 }, { "epoch": 0.6426230642190729, "grad_norm": 2.315809521079082, "learning_rate": 4.59703011735135e-06, "loss": 0.05226821899414062, "step": 74320 }, { "epoch": 0.6426662977406161, "grad_norm": 13.65193734709583, "learning_rate": 4.596857647729666e-06, "loss": 0.16170425415039064, "step": 74325 }, { "epoch": 0.6427095312621595, "grad_norm": 25.252659767926655, "learning_rate": 4.5966851707434135e-06, "loss": 0.4862091064453125, "step": 74330 }, { "epoch": 0.6427527647837027, "grad_norm": 1.4956916264847229, "learning_rate": 4.59651268639339e-06, "loss": 0.039776611328125, "step": 74335 }, { "epoch": 0.6427959983052459, "grad_norm": 4.680757437097842, "learning_rate": 4.596340194680393e-06, "loss": 0.2208160400390625, "step": 74340 }, { "epoch": 0.6428392318267893, "grad_norm": 9.366277142068778, "learning_rate": 4.596167695605215e-06, "loss": 0.17984619140625, "step": 74345 }, { "epoch": 0.6428824653483325, "grad_norm": 6.072155616194733, "learning_rate": 4.595995189168653e-06, "loss": 0.0977020263671875, "step": 74350 }, { "epoch": 0.6429256988698757, "grad_norm": 7.997170817085685, "learning_rate": 4.5958226753715005e-06, "loss": 0.1418010711669922, "step": 74355 }, { "epoch": 0.6429689323914191, "grad_norm": 19.191490968899817, "learning_rate": 4.595650154214556e-06, "loss": 0.15846023559570313, "step": 74360 }, { "epoch": 0.6430121659129623, "grad_norm": 2.7412522111929634, "learning_rate": 4.595477625698614e-06, "loss": 0.05455360412597656, "step": 74365 }, { "epoch": 0.6430553994345055, "grad_norm": 16.676815158405994, "learning_rate": 4.5953050898244705e-06, "loss": 0.0827423095703125, "step": 74370 }, { "epoch": 0.6430986329560489, "grad_norm": 0.6320785874748805, "learning_rate": 4.59513254659292e-06, "loss": 0.056103515625, "step": 74375 }, { "epoch": 0.6431418664775921, "grad_norm": 18.858661949667933, "learning_rate": 4.59495999600476e-06, "loss": 0.049957275390625, "step": 74380 }, { "epoch": 0.6431850999991353, "grad_norm": 21.98112029792371, "learning_rate": 4.5947874380607865e-06, "loss": 0.31192855834960936, "step": 74385 }, { "epoch": 0.6432283335206785, "grad_norm": 0.696298735480143, "learning_rate": 4.5946148727617915e-06, "loss": 0.2406707763671875, "step": 74390 }, { "epoch": 0.6432715670422219, "grad_norm": 6.996521278397048, "learning_rate": 4.5944423001085745e-06, "loss": 0.041588211059570314, "step": 74395 }, { "epoch": 0.6433148005637651, "grad_norm": 6.116934102313834, "learning_rate": 4.5942697201019324e-06, "loss": 0.0589599609375, "step": 74400 }, { "epoch": 0.6433580340853083, "grad_norm": 51.40308797206611, "learning_rate": 4.594097132742657e-06, "loss": 0.29633102416992185, "step": 74405 }, { "epoch": 0.6434012676068517, "grad_norm": 5.3727471693810855, "learning_rate": 4.593924538031548e-06, "loss": 0.025933837890625, "step": 74410 }, { "epoch": 0.6434445011283949, "grad_norm": 13.996108585966207, "learning_rate": 4.593751935969399e-06, "loss": 0.09443511962890624, "step": 74415 }, { "epoch": 0.6434877346499381, "grad_norm": 7.634629391365439, "learning_rate": 4.593579326557007e-06, "loss": 0.17903003692626954, "step": 74420 }, { "epoch": 0.6435309681714815, "grad_norm": 4.218707560703077, "learning_rate": 4.593406709795168e-06, "loss": 0.078204345703125, "step": 74425 }, { "epoch": 0.6435742016930247, "grad_norm": 4.402823914142428, "learning_rate": 4.593234085684679e-06, "loss": 0.2558074951171875, "step": 74430 }, { "epoch": 0.6436174352145679, "grad_norm": 13.450865614943403, "learning_rate": 4.593061454226333e-06, "loss": 0.1549163818359375, "step": 74435 }, { "epoch": 0.6436606687361113, "grad_norm": 6.860739197479317, "learning_rate": 4.59288881542093e-06, "loss": 0.100762939453125, "step": 74440 }, { "epoch": 0.6437039022576545, "grad_norm": 6.737647644979468, "learning_rate": 4.592716169269265e-06, "loss": 0.09935150146484376, "step": 74445 }, { "epoch": 0.6437471357791977, "grad_norm": 6.410204880947861, "learning_rate": 4.5925435157721324e-06, "loss": 0.139483642578125, "step": 74450 }, { "epoch": 0.6437903693007411, "grad_norm": 0.8480184711500961, "learning_rate": 4.59237085493033e-06, "loss": 0.1345458984375, "step": 74455 }, { "epoch": 0.6438336028222843, "grad_norm": 22.929472341834405, "learning_rate": 4.5921981867446544e-06, "loss": 0.1183868408203125, "step": 74460 }, { "epoch": 0.6438768363438275, "grad_norm": 48.44218156128871, "learning_rate": 4.592025511215902e-06, "loss": 0.22169189453125, "step": 74465 }, { "epoch": 0.6439200698653708, "grad_norm": 2.849396468494553, "learning_rate": 4.591852828344869e-06, "loss": 0.13031005859375, "step": 74470 }, { "epoch": 0.6439633033869141, "grad_norm": 0.9746994884009736, "learning_rate": 4.591680138132349e-06, "loss": 0.139141845703125, "step": 74475 }, { "epoch": 0.6440065369084573, "grad_norm": 7.605286279525541, "learning_rate": 4.5915074405791426e-06, "loss": 0.26363525390625, "step": 74480 }, { "epoch": 0.6440497704300006, "grad_norm": 12.64544235183988, "learning_rate": 4.591334735686043e-06, "loss": 0.10403633117675781, "step": 74485 }, { "epoch": 0.6440930039515439, "grad_norm": 25.198360552322505, "learning_rate": 4.591162023453851e-06, "loss": 0.269488525390625, "step": 74490 }, { "epoch": 0.6441362374730871, "grad_norm": 2.8504996200147863, "learning_rate": 4.5909893038833575e-06, "loss": 0.04682235717773438, "step": 74495 }, { "epoch": 0.6441794709946304, "grad_norm": 4.466167403707793, "learning_rate": 4.590816576975364e-06, "loss": 0.2956024169921875, "step": 74500 }, { "epoch": 0.6442227045161737, "grad_norm": 3.9241882025480455, "learning_rate": 4.590643842730663e-06, "loss": 0.360498046875, "step": 74505 }, { "epoch": 0.6442659380377169, "grad_norm": 12.747871411662235, "learning_rate": 4.590471101150055e-06, "loss": 0.22486572265625, "step": 74510 }, { "epoch": 0.6443091715592602, "grad_norm": 0.5052523841896406, "learning_rate": 4.590298352234334e-06, "loss": 0.02541351318359375, "step": 74515 }, { "epoch": 0.6443524050808035, "grad_norm": 0.595895678515646, "learning_rate": 4.590125595984297e-06, "loss": 0.1314830780029297, "step": 74520 }, { "epoch": 0.6443956386023467, "grad_norm": 16.071010476658625, "learning_rate": 4.589952832400742e-06, "loss": 0.17975540161132814, "step": 74525 }, { "epoch": 0.64443887212389, "grad_norm": 19.28851879048348, "learning_rate": 4.589780061484465e-06, "loss": 0.2184429168701172, "step": 74530 }, { "epoch": 0.6444821056454333, "grad_norm": 2.413452000812654, "learning_rate": 4.589607283236263e-06, "loss": 0.09564628601074218, "step": 74535 }, { "epoch": 0.6445253391669765, "grad_norm": 21.27143260025927, "learning_rate": 4.589434497656932e-06, "loss": 0.5987815856933594, "step": 74540 }, { "epoch": 0.6445685726885197, "grad_norm": 27.083209400488176, "learning_rate": 4.58926170474727e-06, "loss": 0.14036407470703124, "step": 74545 }, { "epoch": 0.6446118062100631, "grad_norm": 11.53720274487668, "learning_rate": 4.5890889045080735e-06, "loss": 0.088128662109375, "step": 74550 }, { "epoch": 0.6446550397316063, "grad_norm": 38.37568730679178, "learning_rate": 4.58891609694014e-06, "loss": 0.2641314506530762, "step": 74555 }, { "epoch": 0.6446982732531495, "grad_norm": 11.671497904684163, "learning_rate": 4.588743282044264e-06, "loss": 0.20264129638671874, "step": 74560 }, { "epoch": 0.6447415067746928, "grad_norm": 11.690458822658757, "learning_rate": 4.588570459821246e-06, "loss": 0.09694900512695312, "step": 74565 }, { "epoch": 0.6447847402962361, "grad_norm": 5.003542565572646, "learning_rate": 4.588397630271881e-06, "loss": 0.1161764144897461, "step": 74570 }, { "epoch": 0.6448279738177793, "grad_norm": 0.40092314162733506, "learning_rate": 4.588224793396966e-06, "loss": 0.1782459259033203, "step": 74575 }, { "epoch": 0.6448712073393226, "grad_norm": 3.421280992549283, "learning_rate": 4.5880519491973e-06, "loss": 0.10012435913085938, "step": 74580 }, { "epoch": 0.6449144408608659, "grad_norm": 35.904387403069755, "learning_rate": 4.587879097673677e-06, "loss": 0.4006011962890625, "step": 74585 }, { "epoch": 0.6449576743824091, "grad_norm": 1.714192434513268, "learning_rate": 4.5877062388268965e-06, "loss": 0.4263916015625, "step": 74590 }, { "epoch": 0.6450009079039524, "grad_norm": 13.493087917072309, "learning_rate": 4.587533372657755e-06, "loss": 0.100396728515625, "step": 74595 }, { "epoch": 0.6450441414254957, "grad_norm": 18.504389736467555, "learning_rate": 4.58736049916705e-06, "loss": 0.152203369140625, "step": 74600 }, { "epoch": 0.6450873749470389, "grad_norm": 3.8830554273603997, "learning_rate": 4.587187618355579e-06, "loss": 0.05614166259765625, "step": 74605 }, { "epoch": 0.6451306084685822, "grad_norm": 6.356703315127631, "learning_rate": 4.587014730224139e-06, "loss": 0.150634765625, "step": 74610 }, { "epoch": 0.6451738419901255, "grad_norm": 8.595039616694594, "learning_rate": 4.586841834773526e-06, "loss": 0.0656951904296875, "step": 74615 }, { "epoch": 0.6452170755116687, "grad_norm": 13.37423887253168, "learning_rate": 4.58666893200454e-06, "loss": 0.20432281494140625, "step": 74620 }, { "epoch": 0.645260309033212, "grad_norm": 72.5923779529411, "learning_rate": 4.586496021917977e-06, "loss": 0.20216064453125, "step": 74625 }, { "epoch": 0.6453035425547553, "grad_norm": 11.998369811532177, "learning_rate": 4.586323104514634e-06, "loss": 0.3512767791748047, "step": 74630 }, { "epoch": 0.6453467760762985, "grad_norm": 3.536121768185365, "learning_rate": 4.58615017979531e-06, "loss": 0.1511322021484375, "step": 74635 }, { "epoch": 0.6453900095978418, "grad_norm": 5.324719121155961, "learning_rate": 4.585977247760801e-06, "loss": 0.30940399169921873, "step": 74640 }, { "epoch": 0.645433243119385, "grad_norm": 12.66175344059472, "learning_rate": 4.585804308411905e-06, "loss": 0.420343017578125, "step": 74645 }, { "epoch": 0.6454764766409283, "grad_norm": 5.991174354583864, "learning_rate": 4.585631361749419e-06, "loss": 0.16414909362792968, "step": 74650 }, { "epoch": 0.6455197101624716, "grad_norm": 0.8564578133733907, "learning_rate": 4.5854584077741415e-06, "loss": 0.13811798095703126, "step": 74655 }, { "epoch": 0.6455629436840148, "grad_norm": 1.21140828779813, "learning_rate": 4.585285446486871e-06, "loss": 0.2278651237487793, "step": 74660 }, { "epoch": 0.6456061772055581, "grad_norm": 1.9195378768166285, "learning_rate": 4.585112477888403e-06, "loss": 0.06654129028320313, "step": 74665 }, { "epoch": 0.6456494107271014, "grad_norm": 9.572999633770658, "learning_rate": 4.584939501979536e-06, "loss": 0.09894561767578125, "step": 74670 }, { "epoch": 0.6456926442486446, "grad_norm": 65.98380648754085, "learning_rate": 4.58476651876107e-06, "loss": 0.06137847900390625, "step": 74675 }, { "epoch": 0.6457358777701879, "grad_norm": 55.44651415877992, "learning_rate": 4.584593528233798e-06, "loss": 0.3770343780517578, "step": 74680 }, { "epoch": 0.6457791112917312, "grad_norm": 55.53290982236381, "learning_rate": 4.5844205303985225e-06, "loss": 0.20182952880859376, "step": 74685 }, { "epoch": 0.6458223448132744, "grad_norm": 14.474191123133272, "learning_rate": 4.58424752525604e-06, "loss": 0.2345611572265625, "step": 74690 }, { "epoch": 0.6458655783348177, "grad_norm": 10.956675560541722, "learning_rate": 4.584074512807147e-06, "loss": 0.19451370239257812, "step": 74695 }, { "epoch": 0.645908811856361, "grad_norm": 1.4174918675107464, "learning_rate": 4.583901493052642e-06, "loss": 0.06574172973632812, "step": 74700 }, { "epoch": 0.6459520453779042, "grad_norm": 28.05259234468206, "learning_rate": 4.583728465993323e-06, "loss": 0.13361282348632814, "step": 74705 }, { "epoch": 0.6459952788994475, "grad_norm": 3.626803276458315, "learning_rate": 4.5835554316299895e-06, "loss": 0.12046051025390625, "step": 74710 }, { "epoch": 0.6460385124209908, "grad_norm": 11.634477223352423, "learning_rate": 4.583382389963438e-06, "loss": 0.1469738006591797, "step": 74715 }, { "epoch": 0.646081745942534, "grad_norm": 5.356729884503018, "learning_rate": 4.583209340994466e-06, "loss": 0.11362457275390625, "step": 74720 }, { "epoch": 0.6461249794640773, "grad_norm": 19.997829727276546, "learning_rate": 4.583036284723873e-06, "loss": 0.3137825012207031, "step": 74725 }, { "epoch": 0.6461682129856205, "grad_norm": 13.252082362856846, "learning_rate": 4.582863221152456e-06, "loss": 0.12164535522460937, "step": 74730 }, { "epoch": 0.6462114465071638, "grad_norm": 1.0097353979228014, "learning_rate": 4.582690150281015e-06, "loss": 0.2025623321533203, "step": 74735 }, { "epoch": 0.646254680028707, "grad_norm": 21.51967923009965, "learning_rate": 4.582517072110346e-06, "loss": 0.13208847045898436, "step": 74740 }, { "epoch": 0.6462979135502503, "grad_norm": 0.184861954281375, "learning_rate": 4.582343986641248e-06, "loss": 0.02713470458984375, "step": 74745 }, { "epoch": 0.6463411470717936, "grad_norm": 1.1261141735064155, "learning_rate": 4.58217089387452e-06, "loss": 0.2669258117675781, "step": 74750 }, { "epoch": 0.6463843805933368, "grad_norm": 34.073126010682984, "learning_rate": 4.581997793810959e-06, "loss": 0.433685302734375, "step": 74755 }, { "epoch": 0.6464276141148801, "grad_norm": 1.1922085612292457, "learning_rate": 4.581824686451363e-06, "loss": 0.03560028076171875, "step": 74760 }, { "epoch": 0.6464708476364234, "grad_norm": 5.892631971693643, "learning_rate": 4.581651571796534e-06, "loss": 0.16256103515625, "step": 74765 }, { "epoch": 0.6465140811579666, "grad_norm": 0.4234476991838321, "learning_rate": 4.581478449847265e-06, "loss": 0.128851318359375, "step": 74770 }, { "epoch": 0.6465573146795099, "grad_norm": 12.341321059622535, "learning_rate": 4.581305320604359e-06, "loss": 0.248297119140625, "step": 74775 }, { "epoch": 0.6466005482010532, "grad_norm": 0.19119726764611064, "learning_rate": 4.581132184068612e-06, "loss": 0.45320968627929686, "step": 74780 }, { "epoch": 0.6466437817225964, "grad_norm": 6.78406812456177, "learning_rate": 4.580959040240823e-06, "loss": 0.11596298217773438, "step": 74785 }, { "epoch": 0.6466870152441397, "grad_norm": 28.630281686222858, "learning_rate": 4.58078588912179e-06, "loss": 0.24013671875, "step": 74790 }, { "epoch": 0.646730248765683, "grad_norm": 1.4859158866752817, "learning_rate": 4.580612730712313e-06, "loss": 0.3124603271484375, "step": 74795 }, { "epoch": 0.6467734822872262, "grad_norm": 4.9569581014162605, "learning_rate": 4.58043956501319e-06, "loss": 0.08203620910644531, "step": 74800 }, { "epoch": 0.6468167158087695, "grad_norm": 2.2899120354293996, "learning_rate": 4.580266392025218e-06, "loss": 0.08788986206054687, "step": 74805 }, { "epoch": 0.6468599493303128, "grad_norm": 19.00269899959483, "learning_rate": 4.5800932117491986e-06, "loss": 0.07432022094726562, "step": 74810 }, { "epoch": 0.646903182851856, "grad_norm": 15.680792018201052, "learning_rate": 4.579920024185928e-06, "loss": 0.3112518310546875, "step": 74815 }, { "epoch": 0.6469464163733992, "grad_norm": 18.17753583178992, "learning_rate": 4.579746829336206e-06, "loss": 0.13575267791748047, "step": 74820 }, { "epoch": 0.6469896498949426, "grad_norm": 13.321468211263557, "learning_rate": 4.579573627200831e-06, "loss": 0.33111419677734377, "step": 74825 }, { "epoch": 0.6470328834164858, "grad_norm": 1.6007176271529535, "learning_rate": 4.5794004177806026e-06, "loss": 0.1434844970703125, "step": 74830 }, { "epoch": 0.647076116938029, "grad_norm": 11.566504087933454, "learning_rate": 4.5792272010763185e-06, "loss": 0.25000152587890623, "step": 74835 }, { "epoch": 0.6471193504595724, "grad_norm": 9.677385716887036, "learning_rate": 4.579053977088778e-06, "loss": 0.16523895263671876, "step": 74840 }, { "epoch": 0.6471625839811156, "grad_norm": 1.2446285389453948, "learning_rate": 4.57888074581878e-06, "loss": 0.0876556396484375, "step": 74845 }, { "epoch": 0.6472058175026588, "grad_norm": 3.6894402316717025, "learning_rate": 4.578707507267125e-06, "loss": 0.1737579345703125, "step": 74850 }, { "epoch": 0.6472490510242022, "grad_norm": 8.472884388214542, "learning_rate": 4.578534261434609e-06, "loss": 0.03208160400390625, "step": 74855 }, { "epoch": 0.6472922845457454, "grad_norm": 7.885304698391263, "learning_rate": 4.578361008322033e-06, "loss": 0.15125579833984376, "step": 74860 }, { "epoch": 0.6473355180672886, "grad_norm": 21.712960373229276, "learning_rate": 4.578187747930196e-06, "loss": 0.4804420471191406, "step": 74865 }, { "epoch": 0.647378751588832, "grad_norm": 12.402119900041043, "learning_rate": 4.578014480259894e-06, "loss": 0.44145889282226564, "step": 74870 }, { "epoch": 0.6474219851103752, "grad_norm": 3.0751031570991, "learning_rate": 4.577841205311932e-06, "loss": 0.07380218505859375, "step": 74875 }, { "epoch": 0.6474652186319184, "grad_norm": 2.656275178013226, "learning_rate": 4.577667923087104e-06, "loss": 0.07314682006835938, "step": 74880 }, { "epoch": 0.6475084521534618, "grad_norm": 0.13507602388175444, "learning_rate": 4.577494633586211e-06, "loss": 0.19590072631835936, "step": 74885 }, { "epoch": 0.647551685675005, "grad_norm": 20.81865816179239, "learning_rate": 4.577321336810052e-06, "loss": 0.18535995483398438, "step": 74890 }, { "epoch": 0.6475949191965482, "grad_norm": 2.5504395407167593, "learning_rate": 4.577148032759427e-06, "loss": 0.034346389770507815, "step": 74895 }, { "epoch": 0.6476381527180916, "grad_norm": 13.918557344656557, "learning_rate": 4.576974721435134e-06, "loss": 0.05576400756835938, "step": 74900 }, { "epoch": 0.6476813862396348, "grad_norm": 15.153539559353943, "learning_rate": 4.576801402837974e-06, "loss": 0.22513427734375, "step": 74905 }, { "epoch": 0.647724619761178, "grad_norm": 1.036132165960198, "learning_rate": 4.576628076968744e-06, "loss": 0.060558319091796875, "step": 74910 }, { "epoch": 0.6477678532827212, "grad_norm": 7.38476179035454, "learning_rate": 4.576454743828247e-06, "loss": 0.04070587158203125, "step": 74915 }, { "epoch": 0.6478110868042646, "grad_norm": 2.156518860906239, "learning_rate": 4.576281403417277e-06, "loss": 0.1803863525390625, "step": 74920 }, { "epoch": 0.6478543203258078, "grad_norm": 4.977111185405864, "learning_rate": 4.576108055736638e-06, "loss": 0.09898796081542968, "step": 74925 }, { "epoch": 0.647897553847351, "grad_norm": 10.14787313009605, "learning_rate": 4.575934700787127e-06, "loss": 0.20294647216796874, "step": 74930 }, { "epoch": 0.6479407873688944, "grad_norm": 7.86047571520432, "learning_rate": 4.575761338569545e-06, "loss": 0.07872238159179687, "step": 74935 }, { "epoch": 0.6479840208904376, "grad_norm": 1.30693863101227, "learning_rate": 4.575587969084691e-06, "loss": 0.1471527099609375, "step": 74940 }, { "epoch": 0.6480272544119808, "grad_norm": 15.36019365591339, "learning_rate": 4.575414592333366e-06, "loss": 0.1079376220703125, "step": 74945 }, { "epoch": 0.6480704879335242, "grad_norm": 9.759088971388998, "learning_rate": 4.575241208316365e-06, "loss": 0.2800590515136719, "step": 74950 }, { "epoch": 0.6481137214550674, "grad_norm": 19.9626835663502, "learning_rate": 4.575067817034492e-06, "loss": 0.126043701171875, "step": 74955 }, { "epoch": 0.6481569549766106, "grad_norm": 0.6207569790120292, "learning_rate": 4.5748944184885465e-06, "loss": 0.17135887145996093, "step": 74960 }, { "epoch": 0.648200188498154, "grad_norm": 1.3138453810261208, "learning_rate": 4.5747210126793255e-06, "loss": 0.06912574768066407, "step": 74965 }, { "epoch": 0.6482434220196972, "grad_norm": 35.40521183192663, "learning_rate": 4.574547599607632e-06, "loss": 0.2335479736328125, "step": 74970 }, { "epoch": 0.6482866555412404, "grad_norm": 6.9593126169001325, "learning_rate": 4.574374179274263e-06, "loss": 0.08271484375, "step": 74975 }, { "epoch": 0.6483298890627838, "grad_norm": 8.561268452219698, "learning_rate": 4.574200751680019e-06, "loss": 0.04227180480957031, "step": 74980 }, { "epoch": 0.648373122584327, "grad_norm": 17.04089513639992, "learning_rate": 4.574027316825701e-06, "loss": 0.07725982666015625, "step": 74985 }, { "epoch": 0.6484163561058702, "grad_norm": 1.162718720195107, "learning_rate": 4.573853874712109e-06, "loss": 0.09673309326171875, "step": 74990 }, { "epoch": 0.6484595896274135, "grad_norm": 7.255615664636072, "learning_rate": 4.573680425340041e-06, "loss": 0.28111801147460935, "step": 74995 }, { "epoch": 0.6485028231489568, "grad_norm": 0.38868281963818807, "learning_rate": 4.5735069687102975e-06, "loss": 0.022150802612304687, "step": 75000 }, { "epoch": 0.6485460566705, "grad_norm": 0.6262941667301577, "learning_rate": 4.573333504823679e-06, "loss": 0.04300117492675781, "step": 75005 }, { "epoch": 0.6485892901920433, "grad_norm": 39.274881269961924, "learning_rate": 4.5731600336809856e-06, "loss": 0.8635971069335937, "step": 75010 }, { "epoch": 0.6486325237135866, "grad_norm": 3.5872633494434294, "learning_rate": 4.572986555283017e-06, "loss": 0.08537063598632813, "step": 75015 }, { "epoch": 0.6486757572351298, "grad_norm": 13.769916112644967, "learning_rate": 4.572813069630574e-06, "loss": 0.06451263427734374, "step": 75020 }, { "epoch": 0.648718990756673, "grad_norm": 1.4094983508997294, "learning_rate": 4.572639576724456e-06, "loss": 0.025945281982421874, "step": 75025 }, { "epoch": 0.6487622242782164, "grad_norm": 5.098666111271238, "learning_rate": 4.572466076565462e-06, "loss": 0.073944091796875, "step": 75030 }, { "epoch": 0.6488054577997596, "grad_norm": 17.531544209956692, "learning_rate": 4.572292569154395e-06, "loss": 0.25752410888671873, "step": 75035 }, { "epoch": 0.6488486913213029, "grad_norm": 0.8367877691047936, "learning_rate": 4.572119054492053e-06, "loss": 0.19330291748046874, "step": 75040 }, { "epoch": 0.6488919248428462, "grad_norm": 0.3406825451204203, "learning_rate": 4.571945532579236e-06, "loss": 0.239508056640625, "step": 75045 }, { "epoch": 0.6489351583643894, "grad_norm": 3.1152273554325958, "learning_rate": 4.571772003416746e-06, "loss": 0.097149658203125, "step": 75050 }, { "epoch": 0.6489783918859326, "grad_norm": 1.0248749083541833, "learning_rate": 4.571598467005383e-06, "loss": 0.04582481384277344, "step": 75055 }, { "epoch": 0.649021625407476, "grad_norm": 18.06846625046443, "learning_rate": 4.571424923345946e-06, "loss": 0.06775474548339844, "step": 75060 }, { "epoch": 0.6490648589290192, "grad_norm": 23.76961368038139, "learning_rate": 4.5712513724392365e-06, "loss": 0.2817953109741211, "step": 75065 }, { "epoch": 0.6491080924505624, "grad_norm": 2.101260738176022, "learning_rate": 4.5710778142860536e-06, "loss": 0.12809486389160157, "step": 75070 }, { "epoch": 0.6491513259721058, "grad_norm": 36.53473254234812, "learning_rate": 4.570904248887199e-06, "loss": 0.18823928833007814, "step": 75075 }, { "epoch": 0.649194559493649, "grad_norm": 1.4461995671424308, "learning_rate": 4.570730676243473e-06, "loss": 0.36910400390625, "step": 75080 }, { "epoch": 0.6492377930151922, "grad_norm": 17.241012369662847, "learning_rate": 4.570557096355676e-06, "loss": 0.134918212890625, "step": 75085 }, { "epoch": 0.6492810265367355, "grad_norm": 0.6169352951495357, "learning_rate": 4.570383509224609e-06, "loss": 0.17074928283691407, "step": 75090 }, { "epoch": 0.6493242600582788, "grad_norm": 1.0659172896335576, "learning_rate": 4.570209914851071e-06, "loss": 0.16025848388671876, "step": 75095 }, { "epoch": 0.649367493579822, "grad_norm": 0.625523632158434, "learning_rate": 4.570036313235864e-06, "loss": 0.17475242614746095, "step": 75100 }, { "epoch": 0.6494107271013653, "grad_norm": 3.4914964135367925, "learning_rate": 4.569862704379788e-06, "loss": 0.3175640106201172, "step": 75105 }, { "epoch": 0.6494539606229086, "grad_norm": 2.644487353422937, "learning_rate": 4.569689088283644e-06, "loss": 0.0515411376953125, "step": 75110 }, { "epoch": 0.6494971941444518, "grad_norm": 27.573565961581764, "learning_rate": 4.569515464948233e-06, "loss": 0.10293426513671874, "step": 75115 }, { "epoch": 0.6495404276659951, "grad_norm": 1.0902409842427925, "learning_rate": 4.569341834374356e-06, "loss": 0.055892562866210936, "step": 75120 }, { "epoch": 0.6495836611875384, "grad_norm": 1.5406290568413346, "learning_rate": 4.5691681965628115e-06, "loss": 0.138055419921875, "step": 75125 }, { "epoch": 0.6496268947090816, "grad_norm": 0.032788288898967614, "learning_rate": 4.568994551514402e-06, "loss": 0.0893585205078125, "step": 75130 }, { "epoch": 0.6496701282306249, "grad_norm": 3.4348537188277044, "learning_rate": 4.5688208992299295e-06, "loss": 0.21505126953125, "step": 75135 }, { "epoch": 0.6497133617521682, "grad_norm": 6.150180335488286, "learning_rate": 4.568647239710194e-06, "loss": 0.1860872268676758, "step": 75140 }, { "epoch": 0.6497565952737114, "grad_norm": 13.721432590888822, "learning_rate": 4.568473572955995e-06, "loss": 0.2760894775390625, "step": 75145 }, { "epoch": 0.6497998287952547, "grad_norm": 5.766079800456218, "learning_rate": 4.568299898968134e-06, "loss": 0.0689208984375, "step": 75150 }, { "epoch": 0.649843062316798, "grad_norm": 7.631035968214457, "learning_rate": 4.568126217747413e-06, "loss": 0.33055877685546875, "step": 75155 }, { "epoch": 0.6498862958383412, "grad_norm": 1.1857794109033575, "learning_rate": 4.567952529294633e-06, "loss": 0.2079315185546875, "step": 75160 }, { "epoch": 0.6499295293598845, "grad_norm": 8.064031072853844, "learning_rate": 4.567778833610594e-06, "loss": 0.09006271362304688, "step": 75165 }, { "epoch": 0.6499727628814277, "grad_norm": 1.1937345140375053, "learning_rate": 4.567605130696098e-06, "loss": 0.09777412414550782, "step": 75170 }, { "epoch": 0.650015996402971, "grad_norm": 6.19224808563283, "learning_rate": 4.567431420551945e-06, "loss": 0.3125244140625, "step": 75175 }, { "epoch": 0.6500592299245143, "grad_norm": 7.919126456779183, "learning_rate": 4.567257703178936e-06, "loss": 0.18207244873046874, "step": 75180 }, { "epoch": 0.6501024634460575, "grad_norm": 12.520588742989334, "learning_rate": 4.5670839785778755e-06, "loss": 0.36324920654296877, "step": 75185 }, { "epoch": 0.6501456969676008, "grad_norm": 6.847591508878233, "learning_rate": 4.566910246749559e-06, "loss": 0.23040771484375, "step": 75190 }, { "epoch": 0.6501889304891441, "grad_norm": 23.70384146968068, "learning_rate": 4.566736507694793e-06, "loss": 0.2184051513671875, "step": 75195 }, { "epoch": 0.6502321640106873, "grad_norm": 17.693726166631272, "learning_rate": 4.5665627614143765e-06, "loss": 0.14890213012695314, "step": 75200 }, { "epoch": 0.6502753975322306, "grad_norm": 30.030665340096938, "learning_rate": 4.566389007909111e-06, "loss": 0.14877777099609374, "step": 75205 }, { "epoch": 0.6503186310537739, "grad_norm": 13.51745339714628, "learning_rate": 4.566215247179797e-06, "loss": 0.22025642395019532, "step": 75210 }, { "epoch": 0.6503618645753171, "grad_norm": 15.01292728600095, "learning_rate": 4.566041479227237e-06, "loss": 0.07257919311523438, "step": 75215 }, { "epoch": 0.6504050980968604, "grad_norm": 3.9452223828371147, "learning_rate": 4.565867704052232e-06, "loss": 0.21024932861328124, "step": 75220 }, { "epoch": 0.6504483316184037, "grad_norm": 7.68174888235615, "learning_rate": 4.5656939216555845e-06, "loss": 0.08846511840820312, "step": 75225 }, { "epoch": 0.6504915651399469, "grad_norm": 0.9698893464407072, "learning_rate": 4.565520132038094e-06, "loss": 0.07454185485839844, "step": 75230 }, { "epoch": 0.6505347986614902, "grad_norm": 6.776140513484127, "learning_rate": 4.565346335200563e-06, "loss": 0.10995330810546874, "step": 75235 }, { "epoch": 0.6505780321830335, "grad_norm": 11.844249273137427, "learning_rate": 4.565172531143793e-06, "loss": 0.22312164306640625, "step": 75240 }, { "epoch": 0.6506212657045767, "grad_norm": 2.3090294969388463, "learning_rate": 4.564998719868585e-06, "loss": 0.270782470703125, "step": 75245 }, { "epoch": 0.65066449922612, "grad_norm": 20.938582430409507, "learning_rate": 4.5648249013757425e-06, "loss": 0.0767852783203125, "step": 75250 }, { "epoch": 0.6507077327476632, "grad_norm": 12.531277081918065, "learning_rate": 4.564651075666065e-06, "loss": 0.2094696044921875, "step": 75255 }, { "epoch": 0.6507509662692065, "grad_norm": 13.290389923914617, "learning_rate": 4.564477242740355e-06, "loss": 0.09093093872070312, "step": 75260 }, { "epoch": 0.6507941997907497, "grad_norm": 5.035278441809691, "learning_rate": 4.5643034025994155e-06, "loss": 0.10399856567382812, "step": 75265 }, { "epoch": 0.650837433312293, "grad_norm": 2.2139564430568726, "learning_rate": 4.564129555244046e-06, "loss": 0.20756149291992188, "step": 75270 }, { "epoch": 0.6508806668338363, "grad_norm": 4.828569410420537, "learning_rate": 4.5639557006750485e-06, "loss": 0.45070343017578124, "step": 75275 }, { "epoch": 0.6509239003553795, "grad_norm": 26.234021850148988, "learning_rate": 4.5637818388932265e-06, "loss": 0.2432159423828125, "step": 75280 }, { "epoch": 0.6509671338769228, "grad_norm": 40.24315910637952, "learning_rate": 4.56360796989938e-06, "loss": 0.27427520751953127, "step": 75285 }, { "epoch": 0.6510103673984661, "grad_norm": 3.9430169447995826, "learning_rate": 4.563434093694313e-06, "loss": 0.09317398071289062, "step": 75290 }, { "epoch": 0.6510536009200093, "grad_norm": 4.117518459986426, "learning_rate": 4.563260210278825e-06, "loss": 0.15433578491210936, "step": 75295 }, { "epoch": 0.6510968344415526, "grad_norm": 23.1447719708329, "learning_rate": 4.5630863196537184e-06, "loss": 0.177392578125, "step": 75300 }, { "epoch": 0.6511400679630959, "grad_norm": 1.4526027407706887, "learning_rate": 4.5629124218197975e-06, "loss": 0.05306396484375, "step": 75305 }, { "epoch": 0.6511833014846391, "grad_norm": 7.733897414255729, "learning_rate": 4.5627385167778625e-06, "loss": 0.11132278442382812, "step": 75310 }, { "epoch": 0.6512265350061824, "grad_norm": 0.216855258057323, "learning_rate": 4.562564604528714e-06, "loss": 0.07626113891601563, "step": 75315 }, { "epoch": 0.6512697685277257, "grad_norm": 5.636299787353812, "learning_rate": 4.562390685073157e-06, "loss": 0.1080657958984375, "step": 75320 }, { "epoch": 0.6513130020492689, "grad_norm": 33.114434687163296, "learning_rate": 4.562216758411991e-06, "loss": 0.384771728515625, "step": 75325 }, { "epoch": 0.6513562355708122, "grad_norm": 0.7879955668205855, "learning_rate": 4.56204282454602e-06, "loss": 0.17141036987304686, "step": 75330 }, { "epoch": 0.6513994690923555, "grad_norm": 5.702600435796597, "learning_rate": 4.5618688834760454e-06, "loss": 0.0827972412109375, "step": 75335 }, { "epoch": 0.6514427026138987, "grad_norm": 20.72477746560003, "learning_rate": 4.56169493520287e-06, "loss": 0.0899566650390625, "step": 75340 }, { "epoch": 0.6514859361354419, "grad_norm": 8.418164373312907, "learning_rate": 4.561520979727296e-06, "loss": 0.2607889175415039, "step": 75345 }, { "epoch": 0.6515291696569853, "grad_norm": 22.04646814713398, "learning_rate": 4.561347017050124e-06, "loss": 0.0970123291015625, "step": 75350 }, { "epoch": 0.6515724031785285, "grad_norm": 0.4768371585545882, "learning_rate": 4.5611730471721575e-06, "loss": 0.05951995849609375, "step": 75355 }, { "epoch": 0.6516156367000717, "grad_norm": 0.8886120572195966, "learning_rate": 4.5609990700942e-06, "loss": 0.026635360717773438, "step": 75360 }, { "epoch": 0.6516588702216151, "grad_norm": 20.199029681135723, "learning_rate": 4.560825085817052e-06, "loss": 0.23103675842285157, "step": 75365 }, { "epoch": 0.6517021037431583, "grad_norm": 3.3542025102224406, "learning_rate": 4.560651094341516e-06, "loss": 0.1380340576171875, "step": 75370 }, { "epoch": 0.6517453372647015, "grad_norm": 2.2049819647963567, "learning_rate": 4.5604770956683965e-06, "loss": 0.07642440795898438, "step": 75375 }, { "epoch": 0.6517885707862449, "grad_norm": 0.9868805300236088, "learning_rate": 4.560303089798494e-06, "loss": 0.15608139038085939, "step": 75380 }, { "epoch": 0.6518318043077881, "grad_norm": 12.428058325255043, "learning_rate": 4.5601290767326104e-06, "loss": 0.04861373901367187, "step": 75385 }, { "epoch": 0.6518750378293313, "grad_norm": 1.1986540480255408, "learning_rate": 4.559955056471551e-06, "loss": 0.13717060089111327, "step": 75390 }, { "epoch": 0.6519182713508747, "grad_norm": 13.397164096868092, "learning_rate": 4.559781029016115e-06, "loss": 0.5453178405761718, "step": 75395 }, { "epoch": 0.6519615048724179, "grad_norm": 0.5365334973976157, "learning_rate": 4.559606994367108e-06, "loss": 0.12396430969238281, "step": 75400 }, { "epoch": 0.6520047383939611, "grad_norm": 1.5682725967216906, "learning_rate": 4.5594329525253315e-06, "loss": 0.0372161865234375, "step": 75405 }, { "epoch": 0.6520479719155045, "grad_norm": 0.29144291596846594, "learning_rate": 4.559258903491588e-06, "loss": 0.16330337524414062, "step": 75410 }, { "epoch": 0.6520912054370477, "grad_norm": 13.48364879275411, "learning_rate": 4.559084847266679e-06, "loss": 0.16486053466796874, "step": 75415 }, { "epoch": 0.6521344389585909, "grad_norm": 15.656174437181644, "learning_rate": 4.558910783851409e-06, "loss": 0.08248672485351563, "step": 75420 }, { "epoch": 0.6521776724801343, "grad_norm": 9.119349126698163, "learning_rate": 4.55873671324658e-06, "loss": 0.10410423278808593, "step": 75425 }, { "epoch": 0.6522209060016775, "grad_norm": 45.803420065090755, "learning_rate": 4.558562635452996e-06, "loss": 0.2818733215332031, "step": 75430 }, { "epoch": 0.6522641395232207, "grad_norm": 5.137632572609606, "learning_rate": 4.558388550471458e-06, "loss": 0.09641876220703124, "step": 75435 }, { "epoch": 0.6523073730447639, "grad_norm": 34.454919601731866, "learning_rate": 4.55821445830277e-06, "loss": 0.22740573883056642, "step": 75440 }, { "epoch": 0.6523506065663073, "grad_norm": 12.075498677609112, "learning_rate": 4.5580403589477345e-06, "loss": 0.10547103881835937, "step": 75445 }, { "epoch": 0.6523938400878505, "grad_norm": 0.16175841587411832, "learning_rate": 4.557866252407155e-06, "loss": 0.015883636474609376, "step": 75450 }, { "epoch": 0.6524370736093937, "grad_norm": 8.254015482714697, "learning_rate": 4.557692138681834e-06, "loss": 0.2392974853515625, "step": 75455 }, { "epoch": 0.6524803071309371, "grad_norm": 2.0479224684177004, "learning_rate": 4.557518017772574e-06, "loss": 0.08755874633789062, "step": 75460 }, { "epoch": 0.6525235406524803, "grad_norm": 18.8605161761929, "learning_rate": 4.557343889680178e-06, "loss": 0.36229190826416013, "step": 75465 }, { "epoch": 0.6525667741740235, "grad_norm": 25.903911221084925, "learning_rate": 4.5571697544054505e-06, "loss": 0.29275970458984374, "step": 75470 }, { "epoch": 0.6526100076955669, "grad_norm": 3.060151526553671, "learning_rate": 4.556995611949193e-06, "loss": 0.17662200927734376, "step": 75475 }, { "epoch": 0.6526532412171101, "grad_norm": 0.9573005883090135, "learning_rate": 4.55682146231221e-06, "loss": 0.08346748352050781, "step": 75480 }, { "epoch": 0.6526964747386533, "grad_norm": 21.904089743567727, "learning_rate": 4.556647305495304e-06, "loss": 0.2722440719604492, "step": 75485 }, { "epoch": 0.6527397082601967, "grad_norm": 0.40477455909499704, "learning_rate": 4.556473141499278e-06, "loss": 0.16421241760253907, "step": 75490 }, { "epoch": 0.6527829417817399, "grad_norm": 2.398283979274457, "learning_rate": 4.556298970324935e-06, "loss": 0.15057373046875, "step": 75495 }, { "epoch": 0.6528261753032831, "grad_norm": 6.463584375026799, "learning_rate": 4.556124791973078e-06, "loss": 0.189788818359375, "step": 75500 }, { "epoch": 0.6528694088248265, "grad_norm": 0.11894743130165053, "learning_rate": 4.5559506064445126e-06, "loss": 0.03857498168945313, "step": 75505 }, { "epoch": 0.6529126423463697, "grad_norm": 0.4879238043650314, "learning_rate": 4.5557764137400395e-06, "loss": 0.13661041259765624, "step": 75510 }, { "epoch": 0.6529558758679129, "grad_norm": 14.721271876209443, "learning_rate": 4.555602213860464e-06, "loss": 0.0807037353515625, "step": 75515 }, { "epoch": 0.6529991093894562, "grad_norm": 0.9774836494631295, "learning_rate": 4.555428006806588e-06, "loss": 0.9114776611328125, "step": 75520 }, { "epoch": 0.6530423429109995, "grad_norm": 20.371803388547125, "learning_rate": 4.555253792579214e-06, "loss": 0.41552886962890623, "step": 75525 }, { "epoch": 0.6530855764325427, "grad_norm": 20.886110462245664, "learning_rate": 4.555079571179149e-06, "loss": 0.16577835083007814, "step": 75530 }, { "epoch": 0.653128809954086, "grad_norm": 39.986860774136325, "learning_rate": 4.554905342607194e-06, "loss": 0.17197742462158203, "step": 75535 }, { "epoch": 0.6531720434756293, "grad_norm": 0.6779812235389552, "learning_rate": 4.554731106864153e-06, "loss": 0.019037628173828126, "step": 75540 }, { "epoch": 0.6532152769971725, "grad_norm": 26.557704839758447, "learning_rate": 4.5545568639508295e-06, "loss": 0.2661834716796875, "step": 75545 }, { "epoch": 0.6532585105187158, "grad_norm": 0.9393608544432395, "learning_rate": 4.554382613868026e-06, "loss": 0.04736824035644531, "step": 75550 }, { "epoch": 0.6533017440402591, "grad_norm": 4.764039682522629, "learning_rate": 4.5542083566165474e-06, "loss": 0.08035125732421874, "step": 75555 }, { "epoch": 0.6533449775618023, "grad_norm": 20.575515084194056, "learning_rate": 4.554034092197198e-06, "loss": 0.179046630859375, "step": 75560 }, { "epoch": 0.6533882110833455, "grad_norm": 2.6928941176553227, "learning_rate": 4.553859820610782e-06, "loss": 0.12999725341796875, "step": 75565 }, { "epoch": 0.6534314446048889, "grad_norm": 10.059713230949159, "learning_rate": 4.5536855418580995e-06, "loss": 0.103173828125, "step": 75570 }, { "epoch": 0.6534746781264321, "grad_norm": 2.1983862214563294, "learning_rate": 4.553511255939957e-06, "loss": 0.09832382202148438, "step": 75575 }, { "epoch": 0.6535179116479753, "grad_norm": 1.9003872394246961, "learning_rate": 4.5533369628571585e-06, "loss": 0.062242889404296876, "step": 75580 }, { "epoch": 0.6535611451695187, "grad_norm": 2.9822117502434766, "learning_rate": 4.553162662610507e-06, "loss": 0.24293899536132812, "step": 75585 }, { "epoch": 0.6536043786910619, "grad_norm": 2.3696980003682664, "learning_rate": 4.552988355200807e-06, "loss": 0.09941558837890625, "step": 75590 }, { "epoch": 0.6536476122126051, "grad_norm": 45.13601641502207, "learning_rate": 4.552814040628861e-06, "loss": 0.1501974105834961, "step": 75595 }, { "epoch": 0.6536908457341485, "grad_norm": 32.25895516009417, "learning_rate": 4.552639718895474e-06, "loss": 0.20605316162109374, "step": 75600 }, { "epoch": 0.6537340792556917, "grad_norm": 1.1916297192590042, "learning_rate": 4.552465390001449e-06, "loss": 0.16968536376953125, "step": 75605 }, { "epoch": 0.6537773127772349, "grad_norm": 10.705415276852294, "learning_rate": 4.552291053947593e-06, "loss": 0.27530059814453123, "step": 75610 }, { "epoch": 0.6538205462987782, "grad_norm": 24.384896872717718, "learning_rate": 4.552116710734707e-06, "loss": 0.09554672241210938, "step": 75615 }, { "epoch": 0.6538637798203215, "grad_norm": 30.86175922159469, "learning_rate": 4.551942360363595e-06, "loss": 0.30312576293945315, "step": 75620 }, { "epoch": 0.6539070133418647, "grad_norm": 0.40993874116387163, "learning_rate": 4.551768002835063e-06, "loss": 0.07748031616210938, "step": 75625 }, { "epoch": 0.653950246863408, "grad_norm": 34.20887058667518, "learning_rate": 4.551593638149913e-06, "loss": 0.287945556640625, "step": 75630 }, { "epoch": 0.6539934803849513, "grad_norm": 0.5135404827617508, "learning_rate": 4.5514192663089506e-06, "loss": 0.08913116455078125, "step": 75635 }, { "epoch": 0.6540367139064945, "grad_norm": 0.7653674122669792, "learning_rate": 4.55124488731298e-06, "loss": 0.12115020751953125, "step": 75640 }, { "epoch": 0.6540799474280378, "grad_norm": 7.072185893382836, "learning_rate": 4.551070501162806e-06, "loss": 0.09646453857421874, "step": 75645 }, { "epoch": 0.6541231809495811, "grad_norm": 6.128807446002314, "learning_rate": 4.55089610785923e-06, "loss": 0.1078369140625, "step": 75650 }, { "epoch": 0.6541664144711243, "grad_norm": 14.0503163052047, "learning_rate": 4.550721707403059e-06, "loss": 0.110076904296875, "step": 75655 }, { "epoch": 0.6542096479926676, "grad_norm": 0.3476067276933997, "learning_rate": 4.550547299795097e-06, "loss": 0.12443695068359376, "step": 75660 }, { "epoch": 0.6542528815142109, "grad_norm": 0.23947925044177776, "learning_rate": 4.5503728850361474e-06, "loss": 0.30490570068359374, "step": 75665 }, { "epoch": 0.6542961150357541, "grad_norm": 4.202286695664399, "learning_rate": 4.550198463127014e-06, "loss": 0.15144815444946289, "step": 75670 }, { "epoch": 0.6543393485572974, "grad_norm": 14.097840076703395, "learning_rate": 4.550024034068504e-06, "loss": 0.31886749267578124, "step": 75675 }, { "epoch": 0.6543825820788407, "grad_norm": 1.2596821939827076, "learning_rate": 4.549849597861419e-06, "loss": 0.1257488250732422, "step": 75680 }, { "epoch": 0.6544258156003839, "grad_norm": 1.1729473659568248, "learning_rate": 4.549675154506566e-06, "loss": 0.20535888671875, "step": 75685 }, { "epoch": 0.6544690491219272, "grad_norm": 19.928141701241824, "learning_rate": 4.549500704004746e-06, "loss": 0.15814361572265626, "step": 75690 }, { "epoch": 0.6545122826434704, "grad_norm": 7.396253146486615, "learning_rate": 4.549326246356767e-06, "loss": 0.06957130432128907, "step": 75695 }, { "epoch": 0.6545555161650137, "grad_norm": 0.5289139874107315, "learning_rate": 4.549151781563433e-06, "loss": 0.1639801025390625, "step": 75700 }, { "epoch": 0.654598749686557, "grad_norm": 27.812946557642363, "learning_rate": 4.548977309625546e-06, "loss": 0.08728179931640626, "step": 75705 }, { "epoch": 0.6546419832081002, "grad_norm": 9.39386208495906, "learning_rate": 4.548802830543914e-06, "loss": 0.1148956298828125, "step": 75710 }, { "epoch": 0.6546852167296435, "grad_norm": 1.9050311074036423, "learning_rate": 4.548628344319339e-06, "loss": 0.0870330810546875, "step": 75715 }, { "epoch": 0.6547284502511868, "grad_norm": 18.081941584454807, "learning_rate": 4.548453850952628e-06, "loss": 0.17063064575195314, "step": 75720 }, { "epoch": 0.65477168377273, "grad_norm": 2.2359531563544968, "learning_rate": 4.548279350444585e-06, "loss": 0.17593002319335938, "step": 75725 }, { "epoch": 0.6548149172942733, "grad_norm": 5.234784151854341, "learning_rate": 4.548104842796013e-06, "loss": 0.14066848754882813, "step": 75730 }, { "epoch": 0.6548581508158166, "grad_norm": 3.6000278718461534, "learning_rate": 4.547930328007719e-06, "loss": 0.0303924560546875, "step": 75735 }, { "epoch": 0.6549013843373598, "grad_norm": 49.110300711005046, "learning_rate": 4.547755806080507e-06, "loss": 0.5972076416015625, "step": 75740 }, { "epoch": 0.6549446178589031, "grad_norm": 0.17175190264493032, "learning_rate": 4.547581277015181e-06, "loss": 0.11053390502929687, "step": 75745 }, { "epoch": 0.6549878513804464, "grad_norm": 3.3825785668496464, "learning_rate": 4.547406740812549e-06, "loss": 0.26373291015625, "step": 75750 }, { "epoch": 0.6550310849019896, "grad_norm": 2.1205221754655206, "learning_rate": 4.547232197473413e-06, "loss": 0.03260993957519531, "step": 75755 }, { "epoch": 0.6550743184235329, "grad_norm": 8.547960174792896, "learning_rate": 4.547057646998577e-06, "loss": 0.1347076416015625, "step": 75760 }, { "epoch": 0.6551175519450761, "grad_norm": 7.529594666652603, "learning_rate": 4.546883089388851e-06, "loss": 0.0726715087890625, "step": 75765 }, { "epoch": 0.6551607854666194, "grad_norm": 2.0693428654711976, "learning_rate": 4.546708524645035e-06, "loss": 0.157720947265625, "step": 75770 }, { "epoch": 0.6552040189881626, "grad_norm": 6.498716662747156, "learning_rate": 4.546533952767935e-06, "loss": 0.03326873779296875, "step": 75775 }, { "epoch": 0.655247252509706, "grad_norm": 12.907636738891616, "learning_rate": 4.546359373758359e-06, "loss": 0.059014892578125, "step": 75780 }, { "epoch": 0.6552904860312492, "grad_norm": 4.914055259748412, "learning_rate": 4.54618478761711e-06, "loss": 0.1002685546875, "step": 75785 }, { "epoch": 0.6553337195527924, "grad_norm": 1.1746162872002146, "learning_rate": 4.546010194344993e-06, "loss": 0.0882843017578125, "step": 75790 }, { "epoch": 0.6553769530743357, "grad_norm": 22.28978803207979, "learning_rate": 4.545835593942813e-06, "loss": 0.16729888916015626, "step": 75795 }, { "epoch": 0.655420186595879, "grad_norm": 11.575581878449057, "learning_rate": 4.545660986411376e-06, "loss": 0.344805908203125, "step": 75800 }, { "epoch": 0.6554634201174222, "grad_norm": 3.98309541918297, "learning_rate": 4.545486371751487e-06, "loss": 0.37526779174804686, "step": 75805 }, { "epoch": 0.6555066536389655, "grad_norm": 6.613967082894372, "learning_rate": 4.545311749963952e-06, "loss": 0.05475540161132812, "step": 75810 }, { "epoch": 0.6555498871605088, "grad_norm": 23.16319159686809, "learning_rate": 4.545137121049576e-06, "loss": 0.09606475830078125, "step": 75815 }, { "epoch": 0.655593120682052, "grad_norm": 5.960923303340144, "learning_rate": 4.544962485009164e-06, "loss": 0.620098876953125, "step": 75820 }, { "epoch": 0.6556363542035953, "grad_norm": 36.115565781307254, "learning_rate": 4.5447878418435215e-06, "loss": 0.18161277770996093, "step": 75825 }, { "epoch": 0.6556795877251386, "grad_norm": 0.08445100189890416, "learning_rate": 4.544613191553453e-06, "loss": 0.10424728393554687, "step": 75830 }, { "epoch": 0.6557228212466818, "grad_norm": 2.125438208314465, "learning_rate": 4.544438534139767e-06, "loss": 0.25353469848632815, "step": 75835 }, { "epoch": 0.6557660547682251, "grad_norm": 14.59163946557489, "learning_rate": 4.5442638696032644e-06, "loss": 0.114215087890625, "step": 75840 }, { "epoch": 0.6558092882897684, "grad_norm": 4.8709909614722555, "learning_rate": 4.544089197944755e-06, "loss": 0.13773727416992188, "step": 75845 }, { "epoch": 0.6558525218113116, "grad_norm": 11.906888733651241, "learning_rate": 4.543914519165042e-06, "loss": 0.28834381103515627, "step": 75850 }, { "epoch": 0.6558957553328549, "grad_norm": 18.015801143879305, "learning_rate": 4.543739833264932e-06, "loss": 0.36954193115234374, "step": 75855 }, { "epoch": 0.6559389888543982, "grad_norm": 1.3255236181175287, "learning_rate": 4.54356514024523e-06, "loss": 0.22991485595703126, "step": 75860 }, { "epoch": 0.6559822223759414, "grad_norm": 0.9673805856930955, "learning_rate": 4.543390440106743e-06, "loss": 0.36873626708984375, "step": 75865 }, { "epoch": 0.6560254558974846, "grad_norm": 0.1707768197089351, "learning_rate": 4.5432157328502736e-06, "loss": 0.11653900146484375, "step": 75870 }, { "epoch": 0.656068689419028, "grad_norm": 3.545016536907956, "learning_rate": 4.54304101847663e-06, "loss": 0.08140182495117188, "step": 75875 }, { "epoch": 0.6561119229405712, "grad_norm": 1.525965403168843, "learning_rate": 4.54286629698662e-06, "loss": 0.08939590454101562, "step": 75880 }, { "epoch": 0.6561551564621144, "grad_norm": 11.350072654591274, "learning_rate": 4.542691568381044e-06, "loss": 0.04369354248046875, "step": 75885 }, { "epoch": 0.6561983899836578, "grad_norm": 9.803645269580766, "learning_rate": 4.542516832660712e-06, "loss": 0.20133209228515625, "step": 75890 }, { "epoch": 0.656241623505201, "grad_norm": 19.200341286281247, "learning_rate": 4.54234208982643e-06, "loss": 0.28311767578125, "step": 75895 }, { "epoch": 0.6562848570267442, "grad_norm": 4.717191473316156, "learning_rate": 4.542167339879001e-06, "loss": 0.12740478515625, "step": 75900 }, { "epoch": 0.6563280905482876, "grad_norm": 0.7074292136193612, "learning_rate": 4.541992582819233e-06, "loss": 0.2798919677734375, "step": 75905 }, { "epoch": 0.6563713240698308, "grad_norm": 13.74613714775428, "learning_rate": 4.541817818647931e-06, "loss": 0.11328125, "step": 75910 }, { "epoch": 0.656414557591374, "grad_norm": 0.4603670430643765, "learning_rate": 4.541643047365901e-06, "loss": 0.19914093017578124, "step": 75915 }, { "epoch": 0.6564577911129174, "grad_norm": 5.75740727655194, "learning_rate": 4.54146826897395e-06, "loss": 0.05657501220703125, "step": 75920 }, { "epoch": 0.6565010246344606, "grad_norm": 3.5891448320027752, "learning_rate": 4.541293483472884e-06, "loss": 0.0596435546875, "step": 75925 }, { "epoch": 0.6565442581560038, "grad_norm": 11.235558627027663, "learning_rate": 4.541118690863509e-06, "loss": 0.2376739501953125, "step": 75930 }, { "epoch": 0.6565874916775472, "grad_norm": 25.545602184855593, "learning_rate": 4.540943891146629e-06, "loss": 0.52843017578125, "step": 75935 }, { "epoch": 0.6566307251990904, "grad_norm": 16.314162192547286, "learning_rate": 4.540769084323055e-06, "loss": 0.49070205688476565, "step": 75940 }, { "epoch": 0.6566739587206336, "grad_norm": 1.983567103978766, "learning_rate": 4.540594270393587e-06, "loss": 0.05732879638671875, "step": 75945 }, { "epoch": 0.6567171922421768, "grad_norm": 11.18993803198225, "learning_rate": 4.540419449359036e-06, "loss": 0.40599594116210935, "step": 75950 }, { "epoch": 0.6567604257637202, "grad_norm": 0.5180984070410767, "learning_rate": 4.540244621220207e-06, "loss": 0.0210357666015625, "step": 75955 }, { "epoch": 0.6568036592852634, "grad_norm": 5.834942949146578, "learning_rate": 4.540069785977904e-06, "loss": 0.09295387268066406, "step": 75960 }, { "epoch": 0.6568468928068066, "grad_norm": 9.745374682455639, "learning_rate": 4.539894943632936e-06, "loss": 0.084808349609375, "step": 75965 }, { "epoch": 0.65689012632835, "grad_norm": 10.2252277894298, "learning_rate": 4.5397200941861086e-06, "loss": 0.23465423583984374, "step": 75970 }, { "epoch": 0.6569333598498932, "grad_norm": 5.011470361759208, "learning_rate": 4.539545237638229e-06, "loss": 0.16402587890625, "step": 75975 }, { "epoch": 0.6569765933714364, "grad_norm": 2.1169111458512413, "learning_rate": 4.539370373990103e-06, "loss": 0.01979217529296875, "step": 75980 }, { "epoch": 0.6570198268929798, "grad_norm": 14.663440893027035, "learning_rate": 4.539195503242536e-06, "loss": 0.13490066528320313, "step": 75985 }, { "epoch": 0.657063060414523, "grad_norm": 6.520563208314991, "learning_rate": 4.539020625396336e-06, "loss": 0.04998092651367188, "step": 75990 }, { "epoch": 0.6571062939360662, "grad_norm": 4.393678141800739, "learning_rate": 4.538845740452308e-06, "loss": 0.14142990112304688, "step": 75995 }, { "epoch": 0.6571495274576096, "grad_norm": 3.8258360095045787, "learning_rate": 4.538670848411259e-06, "loss": 0.13386764526367187, "step": 76000 }, { "epoch": 0.6571927609791528, "grad_norm": 13.45712932475012, "learning_rate": 4.538495949273998e-06, "loss": 0.166888427734375, "step": 76005 }, { "epoch": 0.657235994500696, "grad_norm": 5.068250584323297, "learning_rate": 4.538321043041328e-06, "loss": 0.026751708984375, "step": 76010 }, { "epoch": 0.6572792280222394, "grad_norm": 0.5501706593876656, "learning_rate": 4.538146129714057e-06, "loss": 0.139678955078125, "step": 76015 }, { "epoch": 0.6573224615437826, "grad_norm": 0.4203035666459783, "learning_rate": 4.537971209292993e-06, "loss": 0.2732656478881836, "step": 76020 }, { "epoch": 0.6573656950653258, "grad_norm": 6.291321494602763, "learning_rate": 4.537796281778941e-06, "loss": 0.22567710876464844, "step": 76025 }, { "epoch": 0.6574089285868692, "grad_norm": 0.42769145783915496, "learning_rate": 4.537621347172709e-06, "loss": 0.12746734619140626, "step": 76030 }, { "epoch": 0.6574521621084124, "grad_norm": 16.28979911307027, "learning_rate": 4.537446405475103e-06, "loss": 0.1494293212890625, "step": 76035 }, { "epoch": 0.6574953956299556, "grad_norm": 4.365833548611852, "learning_rate": 4.5372714566869305e-06, "loss": 0.10404319763183593, "step": 76040 }, { "epoch": 0.6575386291514989, "grad_norm": 9.449524964116888, "learning_rate": 4.5370965008089975e-06, "loss": 0.32475624084472654, "step": 76045 }, { "epoch": 0.6575818626730422, "grad_norm": 7.54693469097031, "learning_rate": 4.536921537842111e-06, "loss": 0.06175537109375, "step": 76050 }, { "epoch": 0.6576250961945854, "grad_norm": 2.3605221175978994, "learning_rate": 4.536746567787078e-06, "loss": 0.03158721923828125, "step": 76055 }, { "epoch": 0.6576683297161287, "grad_norm": 0.6787141348291984, "learning_rate": 4.536571590644707e-06, "loss": 0.28432464599609375, "step": 76060 }, { "epoch": 0.657711563237672, "grad_norm": 1.5622492647878694, "learning_rate": 4.536396606415801e-06, "loss": 0.0993743896484375, "step": 76065 }, { "epoch": 0.6577547967592152, "grad_norm": 3.5684206708220927, "learning_rate": 4.536221615101171e-06, "loss": 0.0708587646484375, "step": 76070 }, { "epoch": 0.6577980302807585, "grad_norm": 10.611020066489328, "learning_rate": 4.5360466167016245e-06, "loss": 0.09609909057617187, "step": 76075 }, { "epoch": 0.6578412638023018, "grad_norm": 2.5466405337051357, "learning_rate": 4.535871611217964e-06, "loss": 0.30932464599609377, "step": 76080 }, { "epoch": 0.657884497323845, "grad_norm": 63.210276539704765, "learning_rate": 4.535696598651e-06, "loss": 0.367041015625, "step": 76085 }, { "epoch": 0.6579277308453882, "grad_norm": 0.5315869124774398, "learning_rate": 4.5355215790015395e-06, "loss": 0.11577568054199219, "step": 76090 }, { "epoch": 0.6579709643669316, "grad_norm": 1.4933734242021222, "learning_rate": 4.53534655227039e-06, "loss": 0.046649169921875, "step": 76095 }, { "epoch": 0.6580141978884748, "grad_norm": 9.717620122723167, "learning_rate": 4.535171518458356e-06, "loss": 0.05386199951171875, "step": 76100 }, { "epoch": 0.658057431410018, "grad_norm": 1.23515394585501, "learning_rate": 4.534996477566248e-06, "loss": 0.1103759765625, "step": 76105 }, { "epoch": 0.6581006649315614, "grad_norm": 3.6639899127587507, "learning_rate": 4.534821429594871e-06, "loss": 0.15411605834960937, "step": 76110 }, { "epoch": 0.6581438984531046, "grad_norm": 1.3458276199694088, "learning_rate": 4.534646374545034e-06, "loss": 0.0639556884765625, "step": 76115 }, { "epoch": 0.6581871319746478, "grad_norm": 18.399597382224105, "learning_rate": 4.534471312417544e-06, "loss": 0.1584625244140625, "step": 76120 }, { "epoch": 0.6582303654961911, "grad_norm": 29.34478793883277, "learning_rate": 4.534296243213206e-06, "loss": 0.10559463500976562, "step": 76125 }, { "epoch": 0.6582735990177344, "grad_norm": 27.99421880729404, "learning_rate": 4.53412116693283e-06, "loss": 0.23617172241210938, "step": 76130 }, { "epoch": 0.6583168325392776, "grad_norm": 3.83525430214928, "learning_rate": 4.533946083577224e-06, "loss": 0.1303955078125, "step": 76135 }, { "epoch": 0.6583600660608209, "grad_norm": 23.306293240670794, "learning_rate": 4.533770993147193e-06, "loss": 0.21598663330078124, "step": 76140 }, { "epoch": 0.6584032995823642, "grad_norm": 0.32102420022803857, "learning_rate": 4.533595895643545e-06, "loss": 0.063763427734375, "step": 76145 }, { "epoch": 0.6584465331039074, "grad_norm": 1.4893729205321566, "learning_rate": 4.5334207910670895e-06, "loss": 0.2761322021484375, "step": 76150 }, { "epoch": 0.6584897666254507, "grad_norm": 40.470337139094035, "learning_rate": 4.533245679418633e-06, "loss": 0.2986095428466797, "step": 76155 }, { "epoch": 0.658533000146994, "grad_norm": 10.559822975022204, "learning_rate": 4.5330705606989815e-06, "loss": 0.05604095458984375, "step": 76160 }, { "epoch": 0.6585762336685372, "grad_norm": 61.063169646614014, "learning_rate": 4.5328954349089455e-06, "loss": 0.4264556884765625, "step": 76165 }, { "epoch": 0.6586194671900805, "grad_norm": 2.264630374650702, "learning_rate": 4.532720302049331e-06, "loss": 0.03583831787109375, "step": 76170 }, { "epoch": 0.6586627007116238, "grad_norm": 2.430528614137823, "learning_rate": 4.532545162120945e-06, "loss": 0.14871292114257811, "step": 76175 }, { "epoch": 0.658705934233167, "grad_norm": 19.78757040809363, "learning_rate": 4.532370015124596e-06, "loss": 0.23031558990478515, "step": 76180 }, { "epoch": 0.6587491677547103, "grad_norm": 7.558210574423873, "learning_rate": 4.532194861061093e-06, "loss": 0.1252777099609375, "step": 76185 }, { "epoch": 0.6587924012762536, "grad_norm": 12.185537253874221, "learning_rate": 4.532019699931242e-06, "loss": 0.13678970336914062, "step": 76190 }, { "epoch": 0.6588356347977968, "grad_norm": 10.225352458891498, "learning_rate": 4.531844531735852e-06, "loss": 0.07686538696289062, "step": 76195 }, { "epoch": 0.6588788683193401, "grad_norm": 29.890176101806684, "learning_rate": 4.53166935647573e-06, "loss": 0.24203643798828126, "step": 76200 }, { "epoch": 0.6589221018408834, "grad_norm": 8.868185995045604, "learning_rate": 4.531494174151684e-06, "loss": 0.08882293701171876, "step": 76205 }, { "epoch": 0.6589653353624266, "grad_norm": 26.15594606009714, "learning_rate": 4.531318984764523e-06, "loss": 0.3848602294921875, "step": 76210 }, { "epoch": 0.6590085688839699, "grad_norm": 13.657621178681515, "learning_rate": 4.531143788315054e-06, "loss": 0.116461181640625, "step": 76215 }, { "epoch": 0.6590518024055131, "grad_norm": 8.325120068706276, "learning_rate": 4.530968584804085e-06, "loss": 0.10323028564453125, "step": 76220 }, { "epoch": 0.6590950359270564, "grad_norm": 0.4830930180614213, "learning_rate": 4.530793374232423e-06, "loss": 0.06581859588623047, "step": 76225 }, { "epoch": 0.6591382694485997, "grad_norm": 5.685759817703638, "learning_rate": 4.53061815660088e-06, "loss": 0.10037841796875, "step": 76230 }, { "epoch": 0.6591815029701429, "grad_norm": 30.699613830950145, "learning_rate": 4.530442931910259e-06, "loss": 0.22041015625, "step": 76235 }, { "epoch": 0.6592247364916862, "grad_norm": 1.3056467138474066, "learning_rate": 4.530267700161371e-06, "loss": 0.1151397705078125, "step": 76240 }, { "epoch": 0.6592679700132295, "grad_norm": 2.896748168794002, "learning_rate": 4.530092461355024e-06, "loss": 0.08333358764648438, "step": 76245 }, { "epoch": 0.6593112035347727, "grad_norm": 4.641641456830486, "learning_rate": 4.529917215492024e-06, "loss": 0.17071380615234374, "step": 76250 }, { "epoch": 0.659354437056316, "grad_norm": 2.792303361318766, "learning_rate": 4.529741962573183e-06, "loss": 0.07151565551757813, "step": 76255 }, { "epoch": 0.6593976705778593, "grad_norm": 0.24478149039502728, "learning_rate": 4.529566702599307e-06, "loss": 0.28666534423828127, "step": 76260 }, { "epoch": 0.6594409040994025, "grad_norm": 18.9318866454667, "learning_rate": 4.529391435571204e-06, "loss": 0.3408927917480469, "step": 76265 }, { "epoch": 0.6594841376209458, "grad_norm": 22.23202797005512, "learning_rate": 4.529216161489682e-06, "loss": 0.12100830078125, "step": 76270 }, { "epoch": 0.659527371142489, "grad_norm": 10.772493799614695, "learning_rate": 4.5290408803555525e-06, "loss": 0.141851806640625, "step": 76275 }, { "epoch": 0.6595706046640323, "grad_norm": 0.6408685660950586, "learning_rate": 4.52886559216962e-06, "loss": 0.017229413986206053, "step": 76280 }, { "epoch": 0.6596138381855756, "grad_norm": 8.211203486692021, "learning_rate": 4.528690296932693e-06, "loss": 0.13335113525390624, "step": 76285 }, { "epoch": 0.6596570717071188, "grad_norm": 15.165990421056563, "learning_rate": 4.528514994645584e-06, "loss": 0.10712013244628907, "step": 76290 }, { "epoch": 0.6597003052286621, "grad_norm": 39.01402630165187, "learning_rate": 4.528339685309099e-06, "loss": 0.18365478515625, "step": 76295 }, { "epoch": 0.6597435387502053, "grad_norm": 10.205004174992741, "learning_rate": 4.528164368924046e-06, "loss": 0.06409873962402343, "step": 76300 }, { "epoch": 0.6597867722717486, "grad_norm": 24.78899510674462, "learning_rate": 4.527989045491232e-06, "loss": 0.18012237548828125, "step": 76305 }, { "epoch": 0.6598300057932919, "grad_norm": 38.250565892706284, "learning_rate": 4.527813715011469e-06, "loss": 0.1884521484375, "step": 76310 }, { "epoch": 0.6598732393148351, "grad_norm": 5.924307321590875, "learning_rate": 4.527638377485564e-06, "loss": 0.09127197265625, "step": 76315 }, { "epoch": 0.6599164728363784, "grad_norm": 0.9442751274984539, "learning_rate": 4.5274630329143255e-06, "loss": 0.06366958618164062, "step": 76320 }, { "epoch": 0.6599597063579217, "grad_norm": 9.927588767781334, "learning_rate": 4.527287681298563e-06, "loss": 0.1762237548828125, "step": 76325 }, { "epoch": 0.6600029398794649, "grad_norm": 0.8783028263088025, "learning_rate": 4.527112322639085e-06, "loss": 0.15050735473632812, "step": 76330 }, { "epoch": 0.6600461734010082, "grad_norm": 4.112705854623762, "learning_rate": 4.5269369569366995e-06, "loss": 0.06397171020507812, "step": 76335 }, { "epoch": 0.6600894069225515, "grad_norm": 0.058726188285134436, "learning_rate": 4.526761584192215e-06, "loss": 0.0350250244140625, "step": 76340 }, { "epoch": 0.6601326404440947, "grad_norm": 16.27182668235603, "learning_rate": 4.5265862044064415e-06, "loss": 0.49736175537109373, "step": 76345 }, { "epoch": 0.660175873965638, "grad_norm": 39.626278325568684, "learning_rate": 4.526410817580187e-06, "loss": 0.8968437194824219, "step": 76350 }, { "epoch": 0.6602191074871813, "grad_norm": 0.32773908879631436, "learning_rate": 4.526235423714262e-06, "loss": 0.091400146484375, "step": 76355 }, { "epoch": 0.6602623410087245, "grad_norm": 8.075091908590068, "learning_rate": 4.526060022809474e-06, "loss": 0.282720947265625, "step": 76360 }, { "epoch": 0.6603055745302678, "grad_norm": 1.3240603661163595, "learning_rate": 4.52588461486663e-06, "loss": 0.040362548828125, "step": 76365 }, { "epoch": 0.6603488080518111, "grad_norm": 1.8377402470977597, "learning_rate": 4.525709199886542e-06, "loss": 0.14944076538085938, "step": 76370 }, { "epoch": 0.6603920415733543, "grad_norm": 1.0496860945303197, "learning_rate": 4.525533777870018e-06, "loss": 0.19607391357421874, "step": 76375 }, { "epoch": 0.6604352750948976, "grad_norm": 2.6280403991182983, "learning_rate": 4.525358348817867e-06, "loss": 0.1216552734375, "step": 76380 }, { "epoch": 0.6604785086164409, "grad_norm": 0.21192926790698155, "learning_rate": 4.525182912730899e-06, "loss": 0.09141349792480469, "step": 76385 }, { "epoch": 0.6605217421379841, "grad_norm": 3.7212663768273666, "learning_rate": 4.525007469609921e-06, "loss": 0.17332305908203124, "step": 76390 }, { "epoch": 0.6605649756595273, "grad_norm": 14.630120631379045, "learning_rate": 4.524832019455744e-06, "loss": 0.4137054443359375, "step": 76395 }, { "epoch": 0.6606082091810707, "grad_norm": 5.631237573989136, "learning_rate": 4.524656562269176e-06, "loss": 0.03771209716796875, "step": 76400 }, { "epoch": 0.6606514427026139, "grad_norm": 11.312177499472288, "learning_rate": 4.524481098051026e-06, "loss": 0.06839599609375, "step": 76405 }, { "epoch": 0.6606946762241571, "grad_norm": 4.8497744154773645, "learning_rate": 4.524305626802105e-06, "loss": 0.147637939453125, "step": 76410 }, { "epoch": 0.6607379097457005, "grad_norm": 4.610717081966094, "learning_rate": 4.524130148523222e-06, "loss": 0.040102386474609376, "step": 76415 }, { "epoch": 0.6607811432672437, "grad_norm": 4.580812519981046, "learning_rate": 4.523954663215183e-06, "loss": 0.128662109375, "step": 76420 }, { "epoch": 0.6608243767887869, "grad_norm": 5.5428450585844535, "learning_rate": 4.523779170878802e-06, "loss": 0.18350830078125, "step": 76425 }, { "epoch": 0.6608676103103303, "grad_norm": 0.8324212926183464, "learning_rate": 4.523603671514885e-06, "loss": 0.06468276977539063, "step": 76430 }, { "epoch": 0.6609108438318735, "grad_norm": 22.855835942625998, "learning_rate": 4.523428165124242e-06, "loss": 0.14440040588378905, "step": 76435 }, { "epoch": 0.6609540773534167, "grad_norm": 24.034599500261695, "learning_rate": 4.523252651707685e-06, "loss": 0.261761474609375, "step": 76440 }, { "epoch": 0.66099731087496, "grad_norm": 0.3046251287033761, "learning_rate": 4.523077131266018e-06, "loss": 0.29711456298828126, "step": 76445 }, { "epoch": 0.6610405443965033, "grad_norm": 0.31905561205730265, "learning_rate": 4.522901603800056e-06, "loss": 0.05454864501953125, "step": 76450 }, { "epoch": 0.6610837779180465, "grad_norm": 4.168461009908697, "learning_rate": 4.522726069310607e-06, "loss": 0.185430908203125, "step": 76455 }, { "epoch": 0.6611270114395899, "grad_norm": 1.0540041028225602, "learning_rate": 4.522550527798478e-06, "loss": 0.119866943359375, "step": 76460 }, { "epoch": 0.6611702449611331, "grad_norm": 4.290385562174598, "learning_rate": 4.522374979264482e-06, "loss": 0.1181640625, "step": 76465 }, { "epoch": 0.6612134784826763, "grad_norm": 4.484187935409744, "learning_rate": 4.522199423709427e-06, "loss": 0.0414520263671875, "step": 76470 }, { "epoch": 0.6612567120042195, "grad_norm": 1.675393149346225, "learning_rate": 4.522023861134122e-06, "loss": 0.08172416687011719, "step": 76475 }, { "epoch": 0.6612999455257629, "grad_norm": 5.90597158155271, "learning_rate": 4.521848291539378e-06, "loss": 0.0654022216796875, "step": 76480 }, { "epoch": 0.6613431790473061, "grad_norm": 0.27587183607602395, "learning_rate": 4.5216727149260045e-06, "loss": 0.30675277709960935, "step": 76485 }, { "epoch": 0.6613864125688493, "grad_norm": 1.7297798809632718, "learning_rate": 4.52149713129481e-06, "loss": 0.09037742614746094, "step": 76490 }, { "epoch": 0.6614296460903927, "grad_norm": 1.9061821021231584, "learning_rate": 4.5213215406466055e-06, "loss": 0.04896125793457031, "step": 76495 }, { "epoch": 0.6614728796119359, "grad_norm": 12.874182108459115, "learning_rate": 4.521145942982201e-06, "loss": 0.051699161529541016, "step": 76500 }, { "epoch": 0.6615161131334791, "grad_norm": 38.297451870761606, "learning_rate": 4.520970338302405e-06, "loss": 0.409210205078125, "step": 76505 }, { "epoch": 0.6615593466550225, "grad_norm": 12.293498870853872, "learning_rate": 4.520794726608028e-06, "loss": 0.08375473022460937, "step": 76510 }, { "epoch": 0.6616025801765657, "grad_norm": 18.486095546581048, "learning_rate": 4.520619107899881e-06, "loss": 0.14914398193359374, "step": 76515 }, { "epoch": 0.6616458136981089, "grad_norm": 0.14353525876368997, "learning_rate": 4.520443482178774e-06, "loss": 0.20721702575683593, "step": 76520 }, { "epoch": 0.6616890472196523, "grad_norm": 2.402110716598855, "learning_rate": 4.520267849445514e-06, "loss": 0.2309906005859375, "step": 76525 }, { "epoch": 0.6617322807411955, "grad_norm": 6.597174961159192, "learning_rate": 4.520092209700914e-06, "loss": 0.12200775146484374, "step": 76530 }, { "epoch": 0.6617755142627387, "grad_norm": 53.13006251666629, "learning_rate": 4.519916562945782e-06, "loss": 0.3418552398681641, "step": 76535 }, { "epoch": 0.6618187477842821, "grad_norm": 2.008789357792722, "learning_rate": 4.51974090918093e-06, "loss": 0.12931175231933595, "step": 76540 }, { "epoch": 0.6618619813058253, "grad_norm": 3.9077678683780728, "learning_rate": 4.519565248407167e-06, "loss": 0.044084930419921876, "step": 76545 }, { "epoch": 0.6619052148273685, "grad_norm": 1.2752794967852685, "learning_rate": 4.5193895806253045e-06, "loss": 0.09061126708984375, "step": 76550 }, { "epoch": 0.6619484483489119, "grad_norm": 7.851569493200638, "learning_rate": 4.51921390583615e-06, "loss": 0.11149520874023437, "step": 76555 }, { "epoch": 0.6619916818704551, "grad_norm": 0.19168756102486165, "learning_rate": 4.519038224040516e-06, "loss": 0.051981353759765626, "step": 76560 }, { "epoch": 0.6620349153919983, "grad_norm": 16.319716717094643, "learning_rate": 4.518862535239211e-06, "loss": 0.4750732421875, "step": 76565 }, { "epoch": 0.6620781489135416, "grad_norm": 9.574309186067934, "learning_rate": 4.518686839433047e-06, "loss": 0.08802108764648438, "step": 76570 }, { "epoch": 0.6621213824350849, "grad_norm": 3.605399901964548, "learning_rate": 4.518511136622833e-06, "loss": 0.0907928466796875, "step": 76575 }, { "epoch": 0.6621646159566281, "grad_norm": 16.295544035018192, "learning_rate": 4.518335426809381e-06, "loss": 0.21413803100585938, "step": 76580 }, { "epoch": 0.6622078494781714, "grad_norm": 10.181615624277352, "learning_rate": 4.518159709993499e-06, "loss": 0.08733577728271484, "step": 76585 }, { "epoch": 0.6622510829997147, "grad_norm": 1.4723161543836711, "learning_rate": 4.517983986175999e-06, "loss": 0.41033477783203126, "step": 76590 }, { "epoch": 0.6622943165212579, "grad_norm": 1.8519798157212464, "learning_rate": 4.51780825535769e-06, "loss": 0.19197502136230468, "step": 76595 }, { "epoch": 0.6623375500428011, "grad_norm": 37.29036314721975, "learning_rate": 4.5176325175393845e-06, "loss": 0.20469589233398439, "step": 76600 }, { "epoch": 0.6623807835643445, "grad_norm": 1.4417782560005632, "learning_rate": 4.517456772721891e-06, "loss": 0.150048828125, "step": 76605 }, { "epoch": 0.6624240170858877, "grad_norm": 7.261769796779425, "learning_rate": 4.517281020906022e-06, "loss": 0.1369647979736328, "step": 76610 }, { "epoch": 0.662467250607431, "grad_norm": 1.9265535366837212, "learning_rate": 4.517105262092588e-06, "loss": 0.0909820556640625, "step": 76615 }, { "epoch": 0.6625104841289743, "grad_norm": 10.543824821116905, "learning_rate": 4.516929496282396e-06, "loss": 0.117938232421875, "step": 76620 }, { "epoch": 0.6625537176505175, "grad_norm": 0.9131042917178768, "learning_rate": 4.516753723476261e-06, "loss": 0.2097026824951172, "step": 76625 }, { "epoch": 0.6625969511720607, "grad_norm": 35.816066553668, "learning_rate": 4.5165779436749915e-06, "loss": 0.16213302612304686, "step": 76630 }, { "epoch": 0.6626401846936041, "grad_norm": 0.8340151551066118, "learning_rate": 4.5164021568793975e-06, "loss": 0.027256011962890625, "step": 76635 }, { "epoch": 0.6626834182151473, "grad_norm": 8.014492371333802, "learning_rate": 4.516226363090292e-06, "loss": 0.2222137451171875, "step": 76640 }, { "epoch": 0.6627266517366905, "grad_norm": 3.5111869925137627, "learning_rate": 4.516050562308484e-06, "loss": 0.16026611328125, "step": 76645 }, { "epoch": 0.6627698852582338, "grad_norm": 6.196228926225239, "learning_rate": 4.515874754534784e-06, "loss": 0.19028491973876954, "step": 76650 }, { "epoch": 0.6628131187797771, "grad_norm": 1.6340481115085352, "learning_rate": 4.515698939770005e-06, "loss": 0.23248291015625, "step": 76655 }, { "epoch": 0.6628563523013203, "grad_norm": 0.9044815116975514, "learning_rate": 4.5155231180149565e-06, "loss": 0.047107696533203125, "step": 76660 }, { "epoch": 0.6628995858228636, "grad_norm": 0.31973436495638746, "learning_rate": 4.515347289270448e-06, "loss": 0.1796112060546875, "step": 76665 }, { "epoch": 0.6629428193444069, "grad_norm": 8.237800719678683, "learning_rate": 4.5151714535372926e-06, "loss": 0.616485595703125, "step": 76670 }, { "epoch": 0.6629860528659501, "grad_norm": 5.823106317116713, "learning_rate": 4.514995610816299e-06, "loss": 0.0495513916015625, "step": 76675 }, { "epoch": 0.6630292863874934, "grad_norm": 2.317900133843622, "learning_rate": 4.514819761108282e-06, "loss": 0.31602783203125, "step": 76680 }, { "epoch": 0.6630725199090367, "grad_norm": 0.099245762532497, "learning_rate": 4.514643904414048e-06, "loss": 0.2378154754638672, "step": 76685 }, { "epoch": 0.6631157534305799, "grad_norm": 14.487735630332969, "learning_rate": 4.5144680407344104e-06, "loss": 0.1799713134765625, "step": 76690 }, { "epoch": 0.6631589869521232, "grad_norm": 2.485826440263468, "learning_rate": 4.514292170070181e-06, "loss": 0.2680198669433594, "step": 76695 }, { "epoch": 0.6632022204736665, "grad_norm": 1.238120307303795, "learning_rate": 4.51411629242217e-06, "loss": 0.0620361328125, "step": 76700 }, { "epoch": 0.6632454539952097, "grad_norm": 0.18196635090338056, "learning_rate": 4.513940407791187e-06, "loss": 0.09514389038085938, "step": 76705 }, { "epoch": 0.663288687516753, "grad_norm": 2.7977992107295697, "learning_rate": 4.513764516178046e-06, "loss": 0.17421112060546876, "step": 76710 }, { "epoch": 0.6633319210382963, "grad_norm": 17.939702216390923, "learning_rate": 4.513588617583556e-06, "loss": 0.09887580871582032, "step": 76715 }, { "epoch": 0.6633751545598395, "grad_norm": 0.8201636507596217, "learning_rate": 4.513412712008529e-06, "loss": 0.12387351989746094, "step": 76720 }, { "epoch": 0.6634183880813828, "grad_norm": 11.317492421730233, "learning_rate": 4.513236799453777e-06, "loss": 0.5955078125, "step": 76725 }, { "epoch": 0.6634616216029261, "grad_norm": 4.125159045095781, "learning_rate": 4.513060879920109e-06, "loss": 0.21605224609375, "step": 76730 }, { "epoch": 0.6635048551244693, "grad_norm": 0.15186844422035964, "learning_rate": 4.51288495340834e-06, "loss": 0.11755867004394531, "step": 76735 }, { "epoch": 0.6635480886460126, "grad_norm": 0.7755504995649432, "learning_rate": 4.512709019919278e-06, "loss": 0.14241943359375, "step": 76740 }, { "epoch": 0.6635913221675558, "grad_norm": 12.247852971551806, "learning_rate": 4.512533079453736e-06, "loss": 0.091900634765625, "step": 76745 }, { "epoch": 0.6636345556890991, "grad_norm": 16.71825490773526, "learning_rate": 4.512357132012526e-06, "loss": 0.09826869964599609, "step": 76750 }, { "epoch": 0.6636777892106424, "grad_norm": 1.4224899596384473, "learning_rate": 4.512181177596457e-06, "loss": 0.09847412109375, "step": 76755 }, { "epoch": 0.6637210227321856, "grad_norm": 11.655862360988078, "learning_rate": 4.512005216206342e-06, "loss": 0.2764427185058594, "step": 76760 }, { "epoch": 0.6637642562537289, "grad_norm": 7.165175434913332, "learning_rate": 4.511829247842993e-06, "loss": 0.06830902099609375, "step": 76765 }, { "epoch": 0.6638074897752722, "grad_norm": 2.461924758750683, "learning_rate": 4.511653272507221e-06, "loss": 0.3119384765625, "step": 76770 }, { "epoch": 0.6638507232968154, "grad_norm": 0.6846567289115699, "learning_rate": 4.511477290199839e-06, "loss": 0.344195556640625, "step": 76775 }, { "epoch": 0.6638939568183587, "grad_norm": 38.48469911121888, "learning_rate": 4.511301300921657e-06, "loss": 0.10904045104980468, "step": 76780 }, { "epoch": 0.663937190339902, "grad_norm": 5.037987859807689, "learning_rate": 4.511125304673485e-06, "loss": 0.26533203125, "step": 76785 }, { "epoch": 0.6639804238614452, "grad_norm": 3.535655058364793, "learning_rate": 4.5109493014561386e-06, "loss": 0.21944961547851563, "step": 76790 }, { "epoch": 0.6640236573829885, "grad_norm": 25.44612477067453, "learning_rate": 4.510773291270426e-06, "loss": 0.16377716064453124, "step": 76795 }, { "epoch": 0.6640668909045317, "grad_norm": 0.17304954339273965, "learning_rate": 4.510597274117162e-06, "loss": 0.17208786010742189, "step": 76800 }, { "epoch": 0.664110124426075, "grad_norm": 22.509193784956175, "learning_rate": 4.510421249997156e-06, "loss": 0.16317138671875, "step": 76805 }, { "epoch": 0.6641533579476183, "grad_norm": 1.377155569383674, "learning_rate": 4.51024521891122e-06, "loss": 0.3743927001953125, "step": 76810 }, { "epoch": 0.6641965914691615, "grad_norm": 2.0109378227326107, "learning_rate": 4.510069180860168e-06, "loss": 0.047137451171875, "step": 76815 }, { "epoch": 0.6642398249907048, "grad_norm": 5.489664337499126, "learning_rate": 4.5098931358448095e-06, "loss": 0.09999465942382812, "step": 76820 }, { "epoch": 0.664283058512248, "grad_norm": 4.633093779609062, "learning_rate": 4.509717083865958e-06, "loss": 0.15672149658203124, "step": 76825 }, { "epoch": 0.6643262920337913, "grad_norm": 8.871303533571222, "learning_rate": 4.509541024924423e-06, "loss": 0.23943252563476564, "step": 76830 }, { "epoch": 0.6643695255553346, "grad_norm": 21.51068424583155, "learning_rate": 4.50936495902102e-06, "loss": 0.19177093505859374, "step": 76835 }, { "epoch": 0.6644127590768778, "grad_norm": 8.11458247164116, "learning_rate": 4.5091888861565585e-06, "loss": 0.2577239990234375, "step": 76840 }, { "epoch": 0.6644559925984211, "grad_norm": 0.8812761360076973, "learning_rate": 4.50901280633185e-06, "loss": 0.223553466796875, "step": 76845 }, { "epoch": 0.6644992261199644, "grad_norm": 4.432167397708576, "learning_rate": 4.50883671954771e-06, "loss": 0.3305030822753906, "step": 76850 }, { "epoch": 0.6645424596415076, "grad_norm": 40.15584005271467, "learning_rate": 4.508660625804948e-06, "loss": 0.45413970947265625, "step": 76855 }, { "epoch": 0.6645856931630509, "grad_norm": 14.416695165500066, "learning_rate": 4.5084845251043746e-06, "loss": 0.14036865234375, "step": 76860 }, { "epoch": 0.6646289266845942, "grad_norm": 0.5952306141623545, "learning_rate": 4.5083084174468065e-06, "loss": 0.30375518798828127, "step": 76865 }, { "epoch": 0.6646721602061374, "grad_norm": 1.0138353795997064, "learning_rate": 4.508132302833051e-06, "loss": 0.25291290283203127, "step": 76870 }, { "epoch": 0.6647153937276807, "grad_norm": 3.624793250642585, "learning_rate": 4.507956181263923e-06, "loss": 0.174444580078125, "step": 76875 }, { "epoch": 0.664758627249224, "grad_norm": 0.8049561061502732, "learning_rate": 4.507780052740235e-06, "loss": 0.13471221923828125, "step": 76880 }, { "epoch": 0.6648018607707672, "grad_norm": 0.29307465155310847, "learning_rate": 4.507603917262799e-06, "loss": 0.09513320922851562, "step": 76885 }, { "epoch": 0.6648450942923105, "grad_norm": 6.319636613853477, "learning_rate": 4.5074277748324254e-06, "loss": 0.2123565673828125, "step": 76890 }, { "epoch": 0.6648883278138538, "grad_norm": 2.830882117805078, "learning_rate": 4.50725162544993e-06, "loss": 0.05711822509765625, "step": 76895 }, { "epoch": 0.664931561335397, "grad_norm": 5.002789333616627, "learning_rate": 4.507075469116121e-06, "loss": 0.3036918640136719, "step": 76900 }, { "epoch": 0.6649747948569403, "grad_norm": 7.941627484053565, "learning_rate": 4.506899305831815e-06, "loss": 0.05268173217773438, "step": 76905 }, { "epoch": 0.6650180283784836, "grad_norm": 2.034853010796832, "learning_rate": 4.506723135597822e-06, "loss": 0.029021072387695312, "step": 76910 }, { "epoch": 0.6650612619000268, "grad_norm": 18.856293649271038, "learning_rate": 4.506546958414955e-06, "loss": 0.23370895385742188, "step": 76915 }, { "epoch": 0.66510449542157, "grad_norm": 20.66831749215768, "learning_rate": 4.506370774284027e-06, "loss": 0.22921371459960938, "step": 76920 }, { "epoch": 0.6651477289431134, "grad_norm": 5.266695989780764, "learning_rate": 4.50619458320585e-06, "loss": 0.09750175476074219, "step": 76925 }, { "epoch": 0.6651909624646566, "grad_norm": 5.223126999034638, "learning_rate": 4.506018385181235e-06, "loss": 0.10504302978515626, "step": 76930 }, { "epoch": 0.6652341959861998, "grad_norm": 7.610472695376158, "learning_rate": 4.505842180210998e-06, "loss": 0.2698402404785156, "step": 76935 }, { "epoch": 0.6652774295077432, "grad_norm": 23.983955433074097, "learning_rate": 4.505665968295949e-06, "loss": 0.09026107788085938, "step": 76940 }, { "epoch": 0.6653206630292864, "grad_norm": 5.055239064904683, "learning_rate": 4.505489749436903e-06, "loss": 0.09484519958496093, "step": 76945 }, { "epoch": 0.6653638965508296, "grad_norm": 9.564134829747609, "learning_rate": 4.505313523634671e-06, "loss": 0.2192962646484375, "step": 76950 }, { "epoch": 0.665407130072373, "grad_norm": 33.289831251211446, "learning_rate": 4.5051372908900654e-06, "loss": 0.25631866455078123, "step": 76955 }, { "epoch": 0.6654503635939162, "grad_norm": 1.30888458090648, "learning_rate": 4.504961051203899e-06, "loss": 0.06894989013671875, "step": 76960 }, { "epoch": 0.6654935971154594, "grad_norm": 19.559237349009134, "learning_rate": 4.504784804576986e-06, "loss": 0.07462196350097657, "step": 76965 }, { "epoch": 0.6655368306370028, "grad_norm": 0.1912457961003238, "learning_rate": 4.504608551010138e-06, "loss": 0.03213424682617187, "step": 76970 }, { "epoch": 0.665580064158546, "grad_norm": 4.093324840964101, "learning_rate": 4.504432290504169e-06, "loss": 0.14709701538085937, "step": 76975 }, { "epoch": 0.6656232976800892, "grad_norm": 8.65280467521397, "learning_rate": 4.504256023059891e-06, "loss": 0.1692779541015625, "step": 76980 }, { "epoch": 0.6656665312016325, "grad_norm": 12.870375231938324, "learning_rate": 4.504079748678117e-06, "loss": 0.22512741088867189, "step": 76985 }, { "epoch": 0.6657097647231758, "grad_norm": 5.3356568955417, "learning_rate": 4.50390346735966e-06, "loss": 0.3206672668457031, "step": 76990 }, { "epoch": 0.665752998244719, "grad_norm": 3.3548987351576516, "learning_rate": 4.503727179105333e-06, "loss": 0.15429000854492186, "step": 76995 }, { "epoch": 0.6657962317662622, "grad_norm": 0.5414072690482822, "learning_rate": 4.5035508839159494e-06, "loss": 0.05811553001403809, "step": 77000 }, { "epoch": 0.6658394652878056, "grad_norm": 3.504472083471062, "learning_rate": 4.503374581792322e-06, "loss": 0.22240085601806642, "step": 77005 }, { "epoch": 0.6658826988093488, "grad_norm": 1.4079068627443394, "learning_rate": 4.5031982727352644e-06, "loss": 0.20937347412109375, "step": 77010 }, { "epoch": 0.665925932330892, "grad_norm": 30.300644421342025, "learning_rate": 4.503021956745588e-06, "loss": 0.213751220703125, "step": 77015 }, { "epoch": 0.6659691658524354, "grad_norm": 1.1699902230378167, "learning_rate": 4.502845633824107e-06, "loss": 0.4417724609375, "step": 77020 }, { "epoch": 0.6660123993739786, "grad_norm": 4.102236103967093, "learning_rate": 4.502669303971636e-06, "loss": 0.02492694854736328, "step": 77025 }, { "epoch": 0.6660556328955218, "grad_norm": 16.824668909630486, "learning_rate": 4.502492967188987e-06, "loss": 0.283740234375, "step": 77030 }, { "epoch": 0.6660988664170652, "grad_norm": 24.708926345593913, "learning_rate": 4.502316623476972e-06, "loss": 0.07356338500976563, "step": 77035 }, { "epoch": 0.6661420999386084, "grad_norm": 1.6895962601175707, "learning_rate": 4.502140272836406e-06, "loss": 0.11608047485351562, "step": 77040 }, { "epoch": 0.6661853334601516, "grad_norm": 1.7212428745877935, "learning_rate": 4.501963915268102e-06, "loss": 0.2569232940673828, "step": 77045 }, { "epoch": 0.666228566981695, "grad_norm": 5.885818639610094, "learning_rate": 4.5017875507728724e-06, "loss": 0.23987579345703125, "step": 77050 }, { "epoch": 0.6662718005032382, "grad_norm": 10.331596090984014, "learning_rate": 4.5016111793515325e-06, "loss": 0.12103805541992188, "step": 77055 }, { "epoch": 0.6663150340247814, "grad_norm": 1.1071825592923454, "learning_rate": 4.5014348010048935e-06, "loss": 0.0632537841796875, "step": 77060 }, { "epoch": 0.6663582675463248, "grad_norm": 1.0684395455983038, "learning_rate": 4.501258415733771e-06, "loss": 0.10921478271484375, "step": 77065 }, { "epoch": 0.666401501067868, "grad_norm": 10.652071122986635, "learning_rate": 4.501082023538975e-06, "loss": 0.163482666015625, "step": 77070 }, { "epoch": 0.6664447345894112, "grad_norm": 4.533526202474426, "learning_rate": 4.500905624421324e-06, "loss": 0.03512229919433594, "step": 77075 }, { "epoch": 0.6664879681109546, "grad_norm": 3.1181291229633112, "learning_rate": 4.500729218381628e-06, "loss": 0.10481300354003906, "step": 77080 }, { "epoch": 0.6665312016324978, "grad_norm": 8.045703810035334, "learning_rate": 4.500552805420701e-06, "loss": 0.14932098388671874, "step": 77085 }, { "epoch": 0.666574435154041, "grad_norm": 0.833488907976956, "learning_rate": 4.500376385539359e-06, "loss": 0.269830322265625, "step": 77090 }, { "epoch": 0.6666176686755843, "grad_norm": 27.59612479397434, "learning_rate": 4.500199958738411e-06, "loss": 0.1466583251953125, "step": 77095 }, { "epoch": 0.6666609021971276, "grad_norm": 30.075928280005062, "learning_rate": 4.5000235250186735e-06, "loss": 0.17821044921875, "step": 77100 }, { "epoch": 0.6667041357186708, "grad_norm": 1.401913581497293, "learning_rate": 4.499847084380961e-06, "loss": 0.12170372009277344, "step": 77105 }, { "epoch": 0.666747369240214, "grad_norm": 27.756771744914065, "learning_rate": 4.499670636826087e-06, "loss": 0.08237972259521484, "step": 77110 }, { "epoch": 0.6667906027617574, "grad_norm": 28.81345361441092, "learning_rate": 4.499494182354864e-06, "loss": 0.33491973876953124, "step": 77115 }, { "epoch": 0.6668338362833006, "grad_norm": 7.549003575392963, "learning_rate": 4.4993177209681055e-06, "loss": 0.04538669586181641, "step": 77120 }, { "epoch": 0.6668770698048438, "grad_norm": 1.33841612927409, "learning_rate": 4.499141252666627e-06, "loss": 0.22858810424804688, "step": 77125 }, { "epoch": 0.6669203033263872, "grad_norm": 26.9879807574719, "learning_rate": 4.498964777451242e-06, "loss": 0.3179058074951172, "step": 77130 }, { "epoch": 0.6669635368479304, "grad_norm": 23.83493556980475, "learning_rate": 4.498788295322762e-06, "loss": 0.16911392211914061, "step": 77135 }, { "epoch": 0.6670067703694736, "grad_norm": 19.47800775531193, "learning_rate": 4.498611806282005e-06, "loss": 0.310479736328125, "step": 77140 }, { "epoch": 0.667050003891017, "grad_norm": 2.146713626653315, "learning_rate": 4.498435310329781e-06, "loss": 0.11460628509521484, "step": 77145 }, { "epoch": 0.6670932374125602, "grad_norm": 21.184223366396115, "learning_rate": 4.498258807466907e-06, "loss": 0.13990745544433594, "step": 77150 }, { "epoch": 0.6671364709341034, "grad_norm": 0.37435052044577316, "learning_rate": 4.498082297694195e-06, "loss": 0.1338714599609375, "step": 77155 }, { "epoch": 0.6671797044556468, "grad_norm": 21.10040058174205, "learning_rate": 4.49790578101246e-06, "loss": 0.13390121459960938, "step": 77160 }, { "epoch": 0.66722293797719, "grad_norm": 32.91431757939785, "learning_rate": 4.497729257422516e-06, "loss": 0.6465118408203125, "step": 77165 }, { "epoch": 0.6672661714987332, "grad_norm": 13.897942664437902, "learning_rate": 4.4975527269251774e-06, "loss": 0.3006317138671875, "step": 77170 }, { "epoch": 0.6673094050202765, "grad_norm": 21.65829894051794, "learning_rate": 4.4973761895212576e-06, "loss": 0.3554981231689453, "step": 77175 }, { "epoch": 0.6673526385418198, "grad_norm": 4.811571933966041, "learning_rate": 4.497199645211571e-06, "loss": 0.0911285400390625, "step": 77180 }, { "epoch": 0.667395872063363, "grad_norm": 16.010921726439793, "learning_rate": 4.497023093996932e-06, "loss": 0.0888916015625, "step": 77185 }, { "epoch": 0.6674391055849063, "grad_norm": 30.89705843602666, "learning_rate": 4.4968465358781544e-06, "loss": 0.21442108154296874, "step": 77190 }, { "epoch": 0.6674823391064496, "grad_norm": 4.377226838521585, "learning_rate": 4.496669970856053e-06, "loss": 0.07942047119140624, "step": 77195 }, { "epoch": 0.6675255726279928, "grad_norm": 35.05772211757537, "learning_rate": 4.496493398931442e-06, "loss": 0.2450439453125, "step": 77200 }, { "epoch": 0.6675688061495361, "grad_norm": 25.399568405999993, "learning_rate": 4.496316820105136e-06, "loss": 0.3827861785888672, "step": 77205 }, { "epoch": 0.6676120396710794, "grad_norm": 6.439948316805434, "learning_rate": 4.4961402343779496e-06, "loss": 0.19005126953125, "step": 77210 }, { "epoch": 0.6676552731926226, "grad_norm": 1.981073155358982, "learning_rate": 4.495963641750694e-06, "loss": 0.058242416381835936, "step": 77215 }, { "epoch": 0.6676985067141659, "grad_norm": 0.7346894544353587, "learning_rate": 4.49578704222419e-06, "loss": 0.16640129089355468, "step": 77220 }, { "epoch": 0.6677417402357092, "grad_norm": 6.317903150855223, "learning_rate": 4.4956104357992455e-06, "loss": 0.42835845947265627, "step": 77225 }, { "epoch": 0.6677849737572524, "grad_norm": 12.877581656016694, "learning_rate": 4.495433822476679e-06, "loss": 0.36433563232421873, "step": 77230 }, { "epoch": 0.6678282072787957, "grad_norm": 2.378848077508897, "learning_rate": 4.495257202257304e-06, "loss": 0.06695098876953125, "step": 77235 }, { "epoch": 0.667871440800339, "grad_norm": 18.333943399700438, "learning_rate": 4.495080575141934e-06, "loss": 0.1877277374267578, "step": 77240 }, { "epoch": 0.6679146743218822, "grad_norm": 10.689168835716995, "learning_rate": 4.4949039411313856e-06, "loss": 0.45319442749023436, "step": 77245 }, { "epoch": 0.6679579078434255, "grad_norm": 23.891310450710584, "learning_rate": 4.494727300226472e-06, "loss": 0.2705837249755859, "step": 77250 }, { "epoch": 0.6680011413649687, "grad_norm": 5.521252947409342, "learning_rate": 4.494550652428008e-06, "loss": 0.15378189086914062, "step": 77255 }, { "epoch": 0.668044374886512, "grad_norm": 9.366761056165855, "learning_rate": 4.494373997736808e-06, "loss": 0.378076171875, "step": 77260 }, { "epoch": 0.6680876084080553, "grad_norm": 16.492026446829353, "learning_rate": 4.494197336153687e-06, "loss": 0.14039306640625, "step": 77265 }, { "epoch": 0.6681308419295985, "grad_norm": 19.967704515377648, "learning_rate": 4.494020667679461e-06, "loss": 0.068048095703125, "step": 77270 }, { "epoch": 0.6681740754511418, "grad_norm": 2.598681336669276, "learning_rate": 4.493843992314943e-06, "loss": 0.285400390625, "step": 77275 }, { "epoch": 0.668217308972685, "grad_norm": 0.7043904464151368, "learning_rate": 4.493667310060949e-06, "loss": 0.137799072265625, "step": 77280 }, { "epoch": 0.6682605424942283, "grad_norm": 0.8790574847646329, "learning_rate": 4.493490620918293e-06, "loss": 0.21229820251464843, "step": 77285 }, { "epoch": 0.6683037760157716, "grad_norm": 2.5324234847945477, "learning_rate": 4.49331392488779e-06, "loss": 0.156817626953125, "step": 77290 }, { "epoch": 0.6683470095373149, "grad_norm": 14.156030793239811, "learning_rate": 4.4931372219702555e-06, "loss": 0.13975372314453124, "step": 77295 }, { "epoch": 0.6683902430588581, "grad_norm": 0.43876487186514396, "learning_rate": 4.492960512166504e-06, "loss": 0.12219581604003907, "step": 77300 }, { "epoch": 0.6684334765804014, "grad_norm": 3.001718999823138, "learning_rate": 4.492783795477351e-06, "loss": 0.0370758056640625, "step": 77305 }, { "epoch": 0.6684767101019446, "grad_norm": 1.0750353157158976, "learning_rate": 4.49260707190361e-06, "loss": 0.08882522583007812, "step": 77310 }, { "epoch": 0.6685199436234879, "grad_norm": 4.0546738805897435, "learning_rate": 4.492430341446098e-06, "loss": 0.199346923828125, "step": 77315 }, { "epoch": 0.6685631771450312, "grad_norm": 8.206654343667822, "learning_rate": 4.492253604105628e-06, "loss": 0.2442291259765625, "step": 77320 }, { "epoch": 0.6686064106665744, "grad_norm": 0.23412649379216127, "learning_rate": 4.492076859883017e-06, "loss": 0.15955657958984376, "step": 77325 }, { "epoch": 0.6686496441881177, "grad_norm": 11.276057689233141, "learning_rate": 4.49190010877908e-06, "loss": 0.07388992309570312, "step": 77330 }, { "epoch": 0.668692877709661, "grad_norm": 6.098249098416783, "learning_rate": 4.49172335079463e-06, "loss": 0.08606147766113281, "step": 77335 }, { "epoch": 0.6687361112312042, "grad_norm": 58.2358762437482, "learning_rate": 4.491546585930485e-06, "loss": 0.40774993896484374, "step": 77340 }, { "epoch": 0.6687793447527475, "grad_norm": 1.3096655149570098, "learning_rate": 4.4913698141874584e-06, "loss": 0.2573150634765625, "step": 77345 }, { "epoch": 0.6688225782742907, "grad_norm": 2.7304753409312434, "learning_rate": 4.491193035566366e-06, "loss": 0.116046142578125, "step": 77350 }, { "epoch": 0.668865811795834, "grad_norm": 5.587545087552799, "learning_rate": 4.491016250068023e-06, "loss": 0.07375106811523438, "step": 77355 }, { "epoch": 0.6689090453173773, "grad_norm": 2.120015388148049, "learning_rate": 4.490839457693246e-06, "loss": 0.295916748046875, "step": 77360 }, { "epoch": 0.6689522788389205, "grad_norm": 4.597707851713197, "learning_rate": 4.490662658442848e-06, "loss": 0.08565902709960938, "step": 77365 }, { "epoch": 0.6689955123604638, "grad_norm": 30.696346245116484, "learning_rate": 4.490485852317645e-06, "loss": 0.251947021484375, "step": 77370 }, { "epoch": 0.6690387458820071, "grad_norm": 28.291303242849576, "learning_rate": 4.490309039318454e-06, "loss": 0.3995628356933594, "step": 77375 }, { "epoch": 0.6690819794035503, "grad_norm": 14.004212733017871, "learning_rate": 4.490132219446089e-06, "loss": 0.09134292602539062, "step": 77380 }, { "epoch": 0.6691252129250936, "grad_norm": 2.462954037230116, "learning_rate": 4.4899553927013655e-06, "loss": 0.1059539794921875, "step": 77385 }, { "epoch": 0.6691684464466369, "grad_norm": 6.725476595242596, "learning_rate": 4.4897785590851e-06, "loss": 0.1247589111328125, "step": 77390 }, { "epoch": 0.6692116799681801, "grad_norm": 14.325203225985172, "learning_rate": 4.489601718598108e-06, "loss": 0.457867431640625, "step": 77395 }, { "epoch": 0.6692549134897234, "grad_norm": 16.274916567228377, "learning_rate": 4.4894248712412035e-06, "loss": 0.41234130859375, "step": 77400 }, { "epoch": 0.6692981470112667, "grad_norm": 4.999978455154525, "learning_rate": 4.4892480170152036e-06, "loss": 0.1481781005859375, "step": 77405 }, { "epoch": 0.6693413805328099, "grad_norm": 12.034825820500451, "learning_rate": 4.489071155920923e-06, "loss": 0.1605224609375, "step": 77410 }, { "epoch": 0.6693846140543532, "grad_norm": 18.38050118053968, "learning_rate": 4.488894287959177e-06, "loss": 0.1929107666015625, "step": 77415 }, { "epoch": 0.6694278475758965, "grad_norm": 5.927244323208206, "learning_rate": 4.488717413130783e-06, "loss": 0.069122314453125, "step": 77420 }, { "epoch": 0.6694710810974397, "grad_norm": 6.704952885928954, "learning_rate": 4.4885405314365574e-06, "loss": 0.14330368041992186, "step": 77425 }, { "epoch": 0.6695143146189829, "grad_norm": 5.3846339723928205, "learning_rate": 4.488363642877313e-06, "loss": 0.047821044921875, "step": 77430 }, { "epoch": 0.6695575481405263, "grad_norm": 5.421942515452395, "learning_rate": 4.4881867474538665e-06, "loss": 0.1874755859375, "step": 77435 }, { "epoch": 0.6696007816620695, "grad_norm": 1.1963300057733484, "learning_rate": 4.488009845167034e-06, "loss": 0.1370075225830078, "step": 77440 }, { "epoch": 0.6696440151836127, "grad_norm": 2.3461117277249617, "learning_rate": 4.487832936017632e-06, "loss": 0.25789794921875, "step": 77445 }, { "epoch": 0.6696872487051561, "grad_norm": 12.839855478442688, "learning_rate": 4.487656020006477e-06, "loss": 0.51365966796875, "step": 77450 }, { "epoch": 0.6697304822266993, "grad_norm": 22.981175827341488, "learning_rate": 4.487479097134382e-06, "loss": 0.0985809326171875, "step": 77455 }, { "epoch": 0.6697737157482425, "grad_norm": 2.831095811968119, "learning_rate": 4.487302167402166e-06, "loss": 0.09263916015625, "step": 77460 }, { "epoch": 0.6698169492697859, "grad_norm": 37.99328722379374, "learning_rate": 4.487125230810644e-06, "loss": 0.634640121459961, "step": 77465 }, { "epoch": 0.6698601827913291, "grad_norm": 32.48996425466985, "learning_rate": 4.486948287360632e-06, "loss": 0.40725021362304686, "step": 77470 }, { "epoch": 0.6699034163128723, "grad_norm": 7.116981966980816, "learning_rate": 4.486771337052945e-06, "loss": 0.08123092651367188, "step": 77475 }, { "epoch": 0.6699466498344157, "grad_norm": 6.358187516351054, "learning_rate": 4.4865943798884e-06, "loss": 0.1672119140625, "step": 77480 }, { "epoch": 0.6699898833559589, "grad_norm": 14.520185993662947, "learning_rate": 4.486417415867814e-06, "loss": 0.11297760009765626, "step": 77485 }, { "epoch": 0.6700331168775021, "grad_norm": 8.001447993966824, "learning_rate": 4.486240444992001e-06, "loss": 0.11006011962890624, "step": 77490 }, { "epoch": 0.6700763503990455, "grad_norm": 0.24531745250233303, "learning_rate": 4.486063467261779e-06, "loss": 0.07339820861816407, "step": 77495 }, { "epoch": 0.6701195839205887, "grad_norm": 0.8277436545887555, "learning_rate": 4.485886482677965e-06, "loss": 0.0260162353515625, "step": 77500 }, { "epoch": 0.6701628174421319, "grad_norm": 0.7215505855190691, "learning_rate": 4.485709491241372e-06, "loss": 0.07859344482421875, "step": 77505 }, { "epoch": 0.6702060509636752, "grad_norm": 11.9320209966947, "learning_rate": 4.485532492952819e-06, "loss": 0.0682769775390625, "step": 77510 }, { "epoch": 0.6702492844852185, "grad_norm": 0.22305053629251065, "learning_rate": 4.4853554878131206e-06, "loss": 0.11408157348632812, "step": 77515 }, { "epoch": 0.6702925180067617, "grad_norm": 4.983106586839446, "learning_rate": 4.485178475823094e-06, "loss": 0.46454925537109376, "step": 77520 }, { "epoch": 0.6703357515283049, "grad_norm": 11.198829641061122, "learning_rate": 4.4850014569835565e-06, "loss": 0.10024871826171874, "step": 77525 }, { "epoch": 0.6703789850498483, "grad_norm": 0.11943048558343751, "learning_rate": 4.484824431295322e-06, "loss": 0.09291152954101563, "step": 77530 }, { "epoch": 0.6704222185713915, "grad_norm": 1.5146772706957865, "learning_rate": 4.4846473987592105e-06, "loss": 0.05146865844726563, "step": 77535 }, { "epoch": 0.6704654520929347, "grad_norm": 31.321192620872342, "learning_rate": 4.484470359376035e-06, "loss": 0.540740966796875, "step": 77540 }, { "epoch": 0.6705086856144781, "grad_norm": 5.373094367009103, "learning_rate": 4.484293313146613e-06, "loss": 0.1506561279296875, "step": 77545 }, { "epoch": 0.6705519191360213, "grad_norm": 1.4456176285592612, "learning_rate": 4.484116260071762e-06, "loss": 0.05494203567504883, "step": 77550 }, { "epoch": 0.6705951526575645, "grad_norm": 3.8293119248500025, "learning_rate": 4.4839392001522985e-06, "loss": 0.3261451721191406, "step": 77555 }, { "epoch": 0.6706383861791079, "grad_norm": 14.151333251343173, "learning_rate": 4.483762133389038e-06, "loss": 0.07609481811523437, "step": 77560 }, { "epoch": 0.6706816197006511, "grad_norm": 2.7870151021424565, "learning_rate": 4.483585059782797e-06, "loss": 0.02509613037109375, "step": 77565 }, { "epoch": 0.6707248532221943, "grad_norm": 2.728060662352371, "learning_rate": 4.483407979334394e-06, "loss": 0.057990646362304686, "step": 77570 }, { "epoch": 0.6707680867437377, "grad_norm": 7.809116533619171, "learning_rate": 4.483230892044643e-06, "loss": 0.13907394409179688, "step": 77575 }, { "epoch": 0.6708113202652809, "grad_norm": 5.79217304788701, "learning_rate": 4.483053797914363e-06, "loss": 0.05609817504882812, "step": 77580 }, { "epoch": 0.6708545537868241, "grad_norm": 0.20760380331353007, "learning_rate": 4.48287669694437e-06, "loss": 0.1297637939453125, "step": 77585 }, { "epoch": 0.6708977873083675, "grad_norm": 12.80102917535969, "learning_rate": 4.482699589135481e-06, "loss": 0.14437255859375, "step": 77590 }, { "epoch": 0.6709410208299107, "grad_norm": 0.6673471000199792, "learning_rate": 4.4825224744885126e-06, "loss": 0.08557662963867188, "step": 77595 }, { "epoch": 0.6709842543514539, "grad_norm": 3.6149772286068673, "learning_rate": 4.482345353004281e-06, "loss": 0.063336181640625, "step": 77600 }, { "epoch": 0.6710274878729972, "grad_norm": 3.457951115104985, "learning_rate": 4.482168224683603e-06, "loss": 0.08084983825683593, "step": 77605 }, { "epoch": 0.6710707213945405, "grad_norm": 55.809031836732345, "learning_rate": 4.481991089527297e-06, "loss": 0.19163589477539061, "step": 77610 }, { "epoch": 0.6711139549160837, "grad_norm": 1.0738811508619606, "learning_rate": 4.481813947536179e-06, "loss": 0.1491485595703125, "step": 77615 }, { "epoch": 0.671157188437627, "grad_norm": 2.720975247084631, "learning_rate": 4.481636798711067e-06, "loss": 0.06437568664550782, "step": 77620 }, { "epoch": 0.6712004219591703, "grad_norm": 6.987635859678522, "learning_rate": 4.481459643052775e-06, "loss": 0.1558990478515625, "step": 77625 }, { "epoch": 0.6712436554807135, "grad_norm": 4.014476781159799, "learning_rate": 4.481282480562123e-06, "loss": 0.4080482482910156, "step": 77630 }, { "epoch": 0.6712868890022567, "grad_norm": 11.215334414342118, "learning_rate": 4.481105311239927e-06, "loss": 0.036244964599609374, "step": 77635 }, { "epoch": 0.6713301225238001, "grad_norm": 30.65624596801941, "learning_rate": 4.480928135087004e-06, "loss": 0.436187744140625, "step": 77640 }, { "epoch": 0.6713733560453433, "grad_norm": 5.528856715891408, "learning_rate": 4.480750952104172e-06, "loss": 0.12528076171875, "step": 77645 }, { "epoch": 0.6714165895668865, "grad_norm": 0.5582335639099544, "learning_rate": 4.480573762292247e-06, "loss": 0.0840057373046875, "step": 77650 }, { "epoch": 0.6714598230884299, "grad_norm": 2.1737637741055438, "learning_rate": 4.4803965656520464e-06, "loss": 0.08704910278320313, "step": 77655 }, { "epoch": 0.6715030566099731, "grad_norm": 17.07082151056475, "learning_rate": 4.480219362184388e-06, "loss": 0.2681671142578125, "step": 77660 }, { "epoch": 0.6715462901315163, "grad_norm": 10.62801118956696, "learning_rate": 4.480042151890088e-06, "loss": 0.231573486328125, "step": 77665 }, { "epoch": 0.6715895236530597, "grad_norm": 0.6837407511706993, "learning_rate": 4.479864934769965e-06, "loss": 0.0670440673828125, "step": 77670 }, { "epoch": 0.6716327571746029, "grad_norm": 0.4122871507647522, "learning_rate": 4.479687710824836e-06, "loss": 0.08516387939453125, "step": 77675 }, { "epoch": 0.6716759906961461, "grad_norm": 3.084725730482454, "learning_rate": 4.479510480055518e-06, "loss": 0.03136425018310547, "step": 77680 }, { "epoch": 0.6717192242176895, "grad_norm": 10.000758572032282, "learning_rate": 4.479333242462828e-06, "loss": 0.1346649169921875, "step": 77685 }, { "epoch": 0.6717624577392327, "grad_norm": 3.9504365174312017, "learning_rate": 4.479155998047583e-06, "loss": 0.096917724609375, "step": 77690 }, { "epoch": 0.6718056912607759, "grad_norm": 0.5368113242656509, "learning_rate": 4.478978746810602e-06, "loss": 0.05660247802734375, "step": 77695 }, { "epoch": 0.6718489247823192, "grad_norm": 0.860108146720087, "learning_rate": 4.478801488752702e-06, "loss": 0.59873046875, "step": 77700 }, { "epoch": 0.6718921583038625, "grad_norm": 7.04434250464685, "learning_rate": 4.4786242238747e-06, "loss": 0.054257774353027345, "step": 77705 }, { "epoch": 0.6719353918254057, "grad_norm": 0.29007540405251, "learning_rate": 4.478446952177414e-06, "loss": 0.6593391418457031, "step": 77710 }, { "epoch": 0.671978625346949, "grad_norm": 2.6221581457751775, "learning_rate": 4.478269673661661e-06, "loss": 0.07044620513916015, "step": 77715 }, { "epoch": 0.6720218588684923, "grad_norm": 4.484477425834912, "learning_rate": 4.478092388328259e-06, "loss": 0.06994171142578125, "step": 77720 }, { "epoch": 0.6720650923900355, "grad_norm": 15.841930250098963, "learning_rate": 4.477915096178025e-06, "loss": 0.158331298828125, "step": 77725 }, { "epoch": 0.6721083259115788, "grad_norm": 10.530334456376352, "learning_rate": 4.477737797211778e-06, "loss": 0.24693450927734376, "step": 77730 }, { "epoch": 0.6721515594331221, "grad_norm": 2.407795056977839, "learning_rate": 4.4775604914303335e-06, "loss": 0.09427032470703126, "step": 77735 }, { "epoch": 0.6721947929546653, "grad_norm": 22.8896642522067, "learning_rate": 4.477383178834512e-06, "loss": 0.194146728515625, "step": 77740 }, { "epoch": 0.6722380264762086, "grad_norm": 7.483901693249754, "learning_rate": 4.477205859425129e-06, "loss": 0.108563232421875, "step": 77745 }, { "epoch": 0.6722812599977519, "grad_norm": 38.41908576597944, "learning_rate": 4.477028533203003e-06, "loss": 0.3806243896484375, "step": 77750 }, { "epoch": 0.6723244935192951, "grad_norm": 17.543630380316614, "learning_rate": 4.476851200168953e-06, "loss": 0.087567138671875, "step": 77755 }, { "epoch": 0.6723677270408384, "grad_norm": 17.470469108351622, "learning_rate": 4.476673860323795e-06, "loss": 0.10986328125, "step": 77760 }, { "epoch": 0.6724109605623817, "grad_norm": 8.317482946161446, "learning_rate": 4.476496513668348e-06, "loss": 0.1718597412109375, "step": 77765 }, { "epoch": 0.6724541940839249, "grad_norm": 33.76644938727535, "learning_rate": 4.476319160203429e-06, "loss": 0.6440338134765625, "step": 77770 }, { "epoch": 0.6724974276054682, "grad_norm": 15.756064511675829, "learning_rate": 4.476141799929857e-06, "loss": 0.2144378662109375, "step": 77775 }, { "epoch": 0.6725406611270114, "grad_norm": 115.03202980482446, "learning_rate": 4.475964432848449e-06, "loss": 0.09249114990234375, "step": 77780 }, { "epoch": 0.6725838946485547, "grad_norm": 10.224808429040998, "learning_rate": 4.475787058960023e-06, "loss": 0.1647216796875, "step": 77785 }, { "epoch": 0.672627128170098, "grad_norm": 2.4775521408248324, "learning_rate": 4.475609678265399e-06, "loss": 0.03083648681640625, "step": 77790 }, { "epoch": 0.6726703616916412, "grad_norm": 34.85988766737483, "learning_rate": 4.475432290765392e-06, "loss": 0.22249908447265626, "step": 77795 }, { "epoch": 0.6727135952131845, "grad_norm": 22.242276634763936, "learning_rate": 4.475254896460822e-06, "loss": 0.1601715087890625, "step": 77800 }, { "epoch": 0.6727568287347278, "grad_norm": 3.8297608711312834, "learning_rate": 4.475077495352507e-06, "loss": 0.17686080932617188, "step": 77805 }, { "epoch": 0.672800062256271, "grad_norm": 37.127724592186816, "learning_rate": 4.474900087441266e-06, "loss": 0.0386199951171875, "step": 77810 }, { "epoch": 0.6728432957778143, "grad_norm": 14.35521225933131, "learning_rate": 4.4747226727279145e-06, "loss": 0.11035003662109374, "step": 77815 }, { "epoch": 0.6728865292993575, "grad_norm": 34.410971797436105, "learning_rate": 4.474545251213274e-06, "loss": 0.2287109375, "step": 77820 }, { "epoch": 0.6729297628209008, "grad_norm": 13.264246944612582, "learning_rate": 4.47436782289816e-06, "loss": 0.03760814666748047, "step": 77825 }, { "epoch": 0.6729729963424441, "grad_norm": 0.8862118337511752, "learning_rate": 4.474190387783391e-06, "loss": 0.42549915313720704, "step": 77830 }, { "epoch": 0.6730162298639873, "grad_norm": 1.3923701294962614, "learning_rate": 4.474012945869787e-06, "loss": 0.3749542236328125, "step": 77835 }, { "epoch": 0.6730594633855306, "grad_norm": 21.85175332235309, "learning_rate": 4.473835497158166e-06, "loss": 0.3942527770996094, "step": 77840 }, { "epoch": 0.6731026969070739, "grad_norm": 1.4525825494600821, "learning_rate": 4.473658041649345e-06, "loss": 0.03462982177734375, "step": 77845 }, { "epoch": 0.6731459304286171, "grad_norm": 4.275760028457671, "learning_rate": 4.473480579344145e-06, "loss": 0.15362091064453126, "step": 77850 }, { "epoch": 0.6731891639501604, "grad_norm": 0.7323365021330526, "learning_rate": 4.473303110243382e-06, "loss": 0.03622932434082031, "step": 77855 }, { "epoch": 0.6732323974717037, "grad_norm": 9.466485319487813, "learning_rate": 4.473125634347875e-06, "loss": 0.3869384765625, "step": 77860 }, { "epoch": 0.6732756309932469, "grad_norm": 1.509559728393512, "learning_rate": 4.472948151658443e-06, "loss": 0.0596893310546875, "step": 77865 }, { "epoch": 0.6733188645147902, "grad_norm": 6.2452426944209485, "learning_rate": 4.472770662175904e-06, "loss": 0.11727752685546874, "step": 77870 }, { "epoch": 0.6733620980363334, "grad_norm": 0.6455235276275674, "learning_rate": 4.472593165901076e-06, "loss": 0.08521270751953125, "step": 77875 }, { "epoch": 0.6734053315578767, "grad_norm": 11.221851909351502, "learning_rate": 4.47241566283478e-06, "loss": 0.07368011474609375, "step": 77880 }, { "epoch": 0.67344856507942, "grad_norm": 30.10300884894198, "learning_rate": 4.472238152977831e-06, "loss": 0.2178680419921875, "step": 77885 }, { "epoch": 0.6734917986009632, "grad_norm": 4.6908698286020325, "learning_rate": 4.4720606363310525e-06, "loss": 0.0721099853515625, "step": 77890 }, { "epoch": 0.6735350321225065, "grad_norm": 13.058969285040607, "learning_rate": 4.471883112895258e-06, "loss": 0.5581817626953125, "step": 77895 }, { "epoch": 0.6735782656440498, "grad_norm": 52.70398444019203, "learning_rate": 4.47170558267127e-06, "loss": 0.2188124656677246, "step": 77900 }, { "epoch": 0.673621499165593, "grad_norm": 1.9940960595109143, "learning_rate": 4.471528045659906e-06, "loss": 0.13357086181640626, "step": 77905 }, { "epoch": 0.6736647326871363, "grad_norm": 2.820115017309687, "learning_rate": 4.471350501861984e-06, "loss": 0.0584716796875, "step": 77910 }, { "epoch": 0.6737079662086796, "grad_norm": 0.7190827826169718, "learning_rate": 4.471172951278323e-06, "loss": 0.1585845947265625, "step": 77915 }, { "epoch": 0.6737511997302228, "grad_norm": 0.9073078094604288, "learning_rate": 4.470995393909744e-06, "loss": 0.121356201171875, "step": 77920 }, { "epoch": 0.6737944332517661, "grad_norm": 4.611247265628842, "learning_rate": 4.470817829757062e-06, "loss": 0.058469390869140624, "step": 77925 }, { "epoch": 0.6738376667733094, "grad_norm": 3.8586825090240806, "learning_rate": 4.4706402588211e-06, "loss": 0.07601318359375, "step": 77930 }, { "epoch": 0.6738809002948526, "grad_norm": 12.733767389279068, "learning_rate": 4.470462681102674e-06, "loss": 0.085845947265625, "step": 77935 }, { "epoch": 0.6739241338163959, "grad_norm": 3.318154604717987, "learning_rate": 4.470285096602605e-06, "loss": 0.02951812744140625, "step": 77940 }, { "epoch": 0.6739673673379392, "grad_norm": 6.623546110779521, "learning_rate": 4.47010750532171e-06, "loss": 0.09341278076171874, "step": 77945 }, { "epoch": 0.6740106008594824, "grad_norm": 3.207536467944273, "learning_rate": 4.46992990726081e-06, "loss": 0.06495208740234375, "step": 77950 }, { "epoch": 0.6740538343810256, "grad_norm": 2.1868672851076063, "learning_rate": 4.469752302420723e-06, "loss": 0.059075927734375, "step": 77955 }, { "epoch": 0.674097067902569, "grad_norm": 8.28505904987618, "learning_rate": 4.469574690802268e-06, "loss": 0.121112060546875, "step": 77960 }, { "epoch": 0.6741403014241122, "grad_norm": 9.588144609537355, "learning_rate": 4.4693970724062644e-06, "loss": 0.2593963623046875, "step": 77965 }, { "epoch": 0.6741835349456554, "grad_norm": 3.331144980123652, "learning_rate": 4.469219447233531e-06, "loss": 0.17438507080078125, "step": 77970 }, { "epoch": 0.6742267684671988, "grad_norm": 29.552618624456315, "learning_rate": 4.469041815284887e-06, "loss": 0.338818359375, "step": 77975 }, { "epoch": 0.674270001988742, "grad_norm": 13.451804544225022, "learning_rate": 4.4688641765611536e-06, "loss": 0.25917205810546873, "step": 77980 }, { "epoch": 0.6743132355102852, "grad_norm": 48.03478215731525, "learning_rate": 4.468686531063148e-06, "loss": 0.5712615966796875, "step": 77985 }, { "epoch": 0.6743564690318286, "grad_norm": 2.575452370871625, "learning_rate": 4.4685088787916885e-06, "loss": 0.03543167114257813, "step": 77990 }, { "epoch": 0.6743997025533718, "grad_norm": 0.9041396718031547, "learning_rate": 4.468331219747597e-06, "loss": 0.08023834228515625, "step": 77995 }, { "epoch": 0.674442936074915, "grad_norm": 2.694716877044519, "learning_rate": 4.468153553931691e-06, "loss": 0.0368896484375, "step": 78000 }, { "epoch": 0.6744861695964584, "grad_norm": 2.9797653730838856, "learning_rate": 4.467975881344791e-06, "loss": 0.0725250244140625, "step": 78005 }, { "epoch": 0.6745294031180016, "grad_norm": 0.2690786562145419, "learning_rate": 4.467798201987716e-06, "loss": 0.5172409057617188, "step": 78010 }, { "epoch": 0.6745726366395448, "grad_norm": 3.2964030139199245, "learning_rate": 4.467620515861285e-06, "loss": 0.059064483642578124, "step": 78015 }, { "epoch": 0.6746158701610881, "grad_norm": 9.215431042144639, "learning_rate": 4.467442822966318e-06, "loss": 0.12458648681640624, "step": 78020 }, { "epoch": 0.6746591036826314, "grad_norm": 21.87475300791389, "learning_rate": 4.467265123303634e-06, "loss": 0.2563041687011719, "step": 78025 }, { "epoch": 0.6747023372041746, "grad_norm": 9.775003010073045, "learning_rate": 4.467087416874054e-06, "loss": 0.4883880615234375, "step": 78030 }, { "epoch": 0.674745570725718, "grad_norm": 21.424120222319438, "learning_rate": 4.466909703678395e-06, "loss": 0.23464202880859375, "step": 78035 }, { "epoch": 0.6747888042472612, "grad_norm": 10.107725965941606, "learning_rate": 4.46673198371748e-06, "loss": 0.175054931640625, "step": 78040 }, { "epoch": 0.6748320377688044, "grad_norm": 0.18703367270151477, "learning_rate": 4.466554256992125e-06, "loss": 0.11189651489257812, "step": 78045 }, { "epoch": 0.6748752712903476, "grad_norm": 4.755786082843167, "learning_rate": 4.466376523503153e-06, "loss": 0.242144775390625, "step": 78050 }, { "epoch": 0.674918504811891, "grad_norm": 4.809460744469562, "learning_rate": 4.466198783251381e-06, "loss": 0.27805023193359374, "step": 78055 }, { "epoch": 0.6749617383334342, "grad_norm": 5.513696968451718, "learning_rate": 4.466021036237629e-06, "loss": 0.0606292724609375, "step": 78060 }, { "epoch": 0.6750049718549774, "grad_norm": 13.497297353964163, "learning_rate": 4.465843282462718e-06, "loss": 0.07564811706542969, "step": 78065 }, { "epoch": 0.6750482053765208, "grad_norm": 2.248292236717895, "learning_rate": 4.465665521927467e-06, "loss": 0.3344482421875, "step": 78070 }, { "epoch": 0.675091438898064, "grad_norm": 14.38877839932039, "learning_rate": 4.465487754632698e-06, "loss": 0.045009803771972653, "step": 78075 }, { "epoch": 0.6751346724196072, "grad_norm": 1.2408267283531376, "learning_rate": 4.465309980579228e-06, "loss": 0.05119171142578125, "step": 78080 }, { "epoch": 0.6751779059411506, "grad_norm": 2.6764058614472233, "learning_rate": 4.465132199767876e-06, "loss": 0.1199798583984375, "step": 78085 }, { "epoch": 0.6752211394626938, "grad_norm": 15.689988342210386, "learning_rate": 4.464954412199465e-06, "loss": 0.0920135498046875, "step": 78090 }, { "epoch": 0.675264372984237, "grad_norm": 34.879008264953576, "learning_rate": 4.464776617874815e-06, "loss": 0.288287353515625, "step": 78095 }, { "epoch": 0.6753076065057804, "grad_norm": 4.3387906502547215, "learning_rate": 4.4645988167947426e-06, "loss": 0.09820098876953125, "step": 78100 }, { "epoch": 0.6753508400273236, "grad_norm": 21.065044834899215, "learning_rate": 4.464421008960072e-06, "loss": 0.1241851806640625, "step": 78105 }, { "epoch": 0.6753940735488668, "grad_norm": 7.089053640344369, "learning_rate": 4.46424319437162e-06, "loss": 0.229449462890625, "step": 78110 }, { "epoch": 0.6754373070704102, "grad_norm": 9.336079759061432, "learning_rate": 4.464065373030207e-06, "loss": 0.04299850463867187, "step": 78115 }, { "epoch": 0.6754805405919534, "grad_norm": 3.682992979039183, "learning_rate": 4.463887544936654e-06, "loss": 0.0840972900390625, "step": 78120 }, { "epoch": 0.6755237741134966, "grad_norm": 6.806283119716982, "learning_rate": 4.463709710091782e-06, "loss": 0.04892730712890625, "step": 78125 }, { "epoch": 0.6755670076350399, "grad_norm": 2.435195641098452, "learning_rate": 4.463531868496411e-06, "loss": 0.23171539306640626, "step": 78130 }, { "epoch": 0.6756102411565832, "grad_norm": 17.346091706981483, "learning_rate": 4.463354020151358e-06, "loss": 0.465362548828125, "step": 78135 }, { "epoch": 0.6756534746781264, "grad_norm": 0.2072724706660666, "learning_rate": 4.463176165057446e-06, "loss": 0.012729644775390625, "step": 78140 }, { "epoch": 0.6756967081996696, "grad_norm": 8.290978052623084, "learning_rate": 4.462998303215497e-06, "loss": 0.16686553955078126, "step": 78145 }, { "epoch": 0.675739941721213, "grad_norm": 2.119659493888509, "learning_rate": 4.462820434626327e-06, "loss": 0.07040557861328126, "step": 78150 }, { "epoch": 0.6757831752427562, "grad_norm": 0.5233062325404587, "learning_rate": 4.462642559290759e-06, "loss": 0.3948486328125, "step": 78155 }, { "epoch": 0.6758264087642994, "grad_norm": 0.6245889701533283, "learning_rate": 4.462464677209613e-06, "loss": 0.07313232421875, "step": 78160 }, { "epoch": 0.6758696422858428, "grad_norm": 7.176203294714931, "learning_rate": 4.462286788383709e-06, "loss": 0.11832733154296875, "step": 78165 }, { "epoch": 0.675912875807386, "grad_norm": 19.458416254439435, "learning_rate": 4.462108892813866e-06, "loss": 0.0785400390625, "step": 78170 }, { "epoch": 0.6759561093289292, "grad_norm": 1.6551107256860058, "learning_rate": 4.461930990500909e-06, "loss": 0.14361572265625, "step": 78175 }, { "epoch": 0.6759993428504726, "grad_norm": 15.915273534360304, "learning_rate": 4.4617530814456526e-06, "loss": 0.25569915771484375, "step": 78180 }, { "epoch": 0.6760425763720158, "grad_norm": 15.361025501299848, "learning_rate": 4.461575165648922e-06, "loss": 0.07881336212158203, "step": 78185 }, { "epoch": 0.676085809893559, "grad_norm": 13.636615833806907, "learning_rate": 4.461397243111535e-06, "loss": 0.090130615234375, "step": 78190 }, { "epoch": 0.6761290434151024, "grad_norm": 8.327450769871648, "learning_rate": 4.461219313834314e-06, "loss": 0.08662109375, "step": 78195 }, { "epoch": 0.6761722769366456, "grad_norm": 1.9782447356445536, "learning_rate": 4.461041377818078e-06, "loss": 0.0687225341796875, "step": 78200 }, { "epoch": 0.6762155104581888, "grad_norm": 2.1359149163495332, "learning_rate": 4.460863435063647e-06, "loss": 0.13403701782226562, "step": 78205 }, { "epoch": 0.6762587439797322, "grad_norm": 0.5683326866740604, "learning_rate": 4.460685485571845e-06, "loss": 0.01384735107421875, "step": 78210 }, { "epoch": 0.6763019775012754, "grad_norm": 9.372932438617358, "learning_rate": 4.46050752934349e-06, "loss": 0.03936767578125, "step": 78215 }, { "epoch": 0.6763452110228186, "grad_norm": 1.4418705835754928, "learning_rate": 4.460329566379403e-06, "loss": 0.09546432495117188, "step": 78220 }, { "epoch": 0.6763884445443619, "grad_norm": 3.6655858919991293, "learning_rate": 4.460151596680405e-06, "loss": 0.17840499877929689, "step": 78225 }, { "epoch": 0.6764316780659052, "grad_norm": 13.67447212138401, "learning_rate": 4.459973620247317e-06, "loss": 0.05204563140869141, "step": 78230 }, { "epoch": 0.6764749115874484, "grad_norm": 0.944090000255752, "learning_rate": 4.45979563708096e-06, "loss": 0.6088714599609375, "step": 78235 }, { "epoch": 0.6765181451089917, "grad_norm": 1.1244782756858882, "learning_rate": 4.459617647182154e-06, "loss": 0.1100982666015625, "step": 78240 }, { "epoch": 0.676561378630535, "grad_norm": 8.29392527806701, "learning_rate": 4.4594396505517205e-06, "loss": 0.15171356201171876, "step": 78245 }, { "epoch": 0.6766046121520782, "grad_norm": 0.8771015164872167, "learning_rate": 4.45926164719048e-06, "loss": 0.09925079345703125, "step": 78250 }, { "epoch": 0.6766478456736215, "grad_norm": 4.106293446391332, "learning_rate": 4.4590836370992545e-06, "loss": 0.09483795166015625, "step": 78255 }, { "epoch": 0.6766910791951648, "grad_norm": 26.377454402020586, "learning_rate": 4.458905620278864e-06, "loss": 0.3924163818359375, "step": 78260 }, { "epoch": 0.676734312716708, "grad_norm": 8.639235469739827, "learning_rate": 4.458727596730129e-06, "loss": 0.10136566162109376, "step": 78265 }, { "epoch": 0.6767775462382513, "grad_norm": 9.35748745912855, "learning_rate": 4.458549566453872e-06, "loss": 0.33876953125, "step": 78270 }, { "epoch": 0.6768207797597946, "grad_norm": 2.8499437129025167, "learning_rate": 4.458371529450913e-06, "loss": 0.08017959594726562, "step": 78275 }, { "epoch": 0.6768640132813378, "grad_norm": 6.926414256372419, "learning_rate": 4.4581934857220735e-06, "loss": 0.17286529541015624, "step": 78280 }, { "epoch": 0.6769072468028811, "grad_norm": 12.414389034518726, "learning_rate": 4.458015435268174e-06, "loss": 0.19811477661132812, "step": 78285 }, { "epoch": 0.6769504803244244, "grad_norm": 2.4717976320646744, "learning_rate": 4.457837378090036e-06, "loss": 0.35706787109375, "step": 78290 }, { "epoch": 0.6769937138459676, "grad_norm": 0.5710766412537572, "learning_rate": 4.457659314188482e-06, "loss": 0.14129486083984374, "step": 78295 }, { "epoch": 0.6770369473675109, "grad_norm": 1.2973954320558925, "learning_rate": 4.457481243564331e-06, "loss": 0.1584686279296875, "step": 78300 }, { "epoch": 0.6770801808890541, "grad_norm": 0.704348989449239, "learning_rate": 4.457303166218406e-06, "loss": 0.21964797973632813, "step": 78305 }, { "epoch": 0.6771234144105974, "grad_norm": 23.422760408609246, "learning_rate": 4.457125082151527e-06, "loss": 0.30290374755859373, "step": 78310 }, { "epoch": 0.6771666479321407, "grad_norm": 3.230174749873681, "learning_rate": 4.456946991364516e-06, "loss": 0.0680938720703125, "step": 78315 }, { "epoch": 0.6772098814536839, "grad_norm": 9.647480372879764, "learning_rate": 4.456768893858194e-06, "loss": 0.09000244140625, "step": 78320 }, { "epoch": 0.6772531149752272, "grad_norm": 28.448934414742368, "learning_rate": 4.4565907896333835e-06, "loss": 0.159661865234375, "step": 78325 }, { "epoch": 0.6772963484967705, "grad_norm": 12.380170830955908, "learning_rate": 4.456412678690905e-06, "loss": 0.135162353515625, "step": 78330 }, { "epoch": 0.6773395820183137, "grad_norm": 53.637050891683074, "learning_rate": 4.456234561031579e-06, "loss": 0.43961944580078127, "step": 78335 }, { "epoch": 0.677382815539857, "grad_norm": 18.577367473690135, "learning_rate": 4.456056436656227e-06, "loss": 0.12413711547851562, "step": 78340 }, { "epoch": 0.6774260490614002, "grad_norm": 29.463965200019327, "learning_rate": 4.4558783055656735e-06, "loss": 0.14653472900390624, "step": 78345 }, { "epoch": 0.6774692825829435, "grad_norm": 0.12432074974695607, "learning_rate": 4.455700167760737e-06, "loss": 0.15274887084960936, "step": 78350 }, { "epoch": 0.6775125161044868, "grad_norm": 2.8508994828697873, "learning_rate": 4.4555220232422395e-06, "loss": 0.06238250732421875, "step": 78355 }, { "epoch": 0.67755574962603, "grad_norm": 3.5290525334244136, "learning_rate": 4.455343872011004e-06, "loss": 0.12114791870117188, "step": 78360 }, { "epoch": 0.6775989831475733, "grad_norm": 24.44016192973375, "learning_rate": 4.45516571406785e-06, "loss": 0.14523611068725586, "step": 78365 }, { "epoch": 0.6776422166691166, "grad_norm": 0.641996789908666, "learning_rate": 4.4549875494136004e-06, "loss": 0.12684669494628906, "step": 78370 }, { "epoch": 0.6776854501906598, "grad_norm": 11.80768595327274, "learning_rate": 4.454809378049078e-06, "loss": 0.09430084228515626, "step": 78375 }, { "epoch": 0.6777286837122031, "grad_norm": 10.443881976668868, "learning_rate": 4.4546311999751025e-06, "loss": 0.12591705322265626, "step": 78380 }, { "epoch": 0.6777719172337464, "grad_norm": 6.07777445377843, "learning_rate": 4.4544530151924965e-06, "loss": 0.0953643798828125, "step": 78385 }, { "epoch": 0.6778151507552896, "grad_norm": 4.113221773812824, "learning_rate": 4.4542748237020824e-06, "loss": 0.11781196594238282, "step": 78390 }, { "epoch": 0.6778583842768329, "grad_norm": 6.841756998506343, "learning_rate": 4.45409662550468e-06, "loss": 0.0819915771484375, "step": 78395 }, { "epoch": 0.6779016177983761, "grad_norm": 25.422379450427744, "learning_rate": 4.453918420601113e-06, "loss": 0.40739288330078127, "step": 78400 }, { "epoch": 0.6779448513199194, "grad_norm": 2.415694664588431, "learning_rate": 4.453740208992203e-06, "loss": 0.4667877197265625, "step": 78405 }, { "epoch": 0.6779880848414627, "grad_norm": 9.415825727280714, "learning_rate": 4.453561990678771e-06, "loss": 0.2527435302734375, "step": 78410 }, { "epoch": 0.6780313183630059, "grad_norm": 8.575457458545177, "learning_rate": 4.453383765661641e-06, "loss": 0.1279815673828125, "step": 78415 }, { "epoch": 0.6780745518845492, "grad_norm": 3.2387496024945883, "learning_rate": 4.453205533941633e-06, "loss": 0.3271644592285156, "step": 78420 }, { "epoch": 0.6781177854060925, "grad_norm": 27.707611062494102, "learning_rate": 4.453027295519568e-06, "loss": 0.100244140625, "step": 78425 }, { "epoch": 0.6781610189276357, "grad_norm": 15.862848136642134, "learning_rate": 4.45284905039627e-06, "loss": 0.17846183776855468, "step": 78430 }, { "epoch": 0.678204252449179, "grad_norm": 0.29663715476016406, "learning_rate": 4.452670798572561e-06, "loss": 0.13393478393554686, "step": 78435 }, { "epoch": 0.6782474859707223, "grad_norm": 0.4399525543310315, "learning_rate": 4.452492540049263e-06, "loss": 0.041815185546875, "step": 78440 }, { "epoch": 0.6782907194922655, "grad_norm": 3.8783250825836744, "learning_rate": 4.452314274827198e-06, "loss": 0.1748016357421875, "step": 78445 }, { "epoch": 0.6783339530138088, "grad_norm": 2.678537891472739, "learning_rate": 4.452136002907187e-06, "loss": 0.3592182159423828, "step": 78450 }, { "epoch": 0.6783771865353521, "grad_norm": 12.166099310057602, "learning_rate": 4.451957724290052e-06, "loss": 0.13737564086914061, "step": 78455 }, { "epoch": 0.6784204200568953, "grad_norm": 85.7566562664436, "learning_rate": 4.451779438976619e-06, "loss": 0.466265869140625, "step": 78460 }, { "epoch": 0.6784636535784386, "grad_norm": 5.829466004035663, "learning_rate": 4.451601146967706e-06, "loss": 0.325927734375, "step": 78465 }, { "epoch": 0.6785068870999819, "grad_norm": 44.73082006531527, "learning_rate": 4.451422848264137e-06, "loss": 0.3505523681640625, "step": 78470 }, { "epoch": 0.6785501206215251, "grad_norm": 25.34068360475861, "learning_rate": 4.451244542866734e-06, "loss": 0.21663360595703124, "step": 78475 }, { "epoch": 0.6785933541430683, "grad_norm": 25.357366023884392, "learning_rate": 4.451066230776319e-06, "loss": 0.36607666015625, "step": 78480 }, { "epoch": 0.6786365876646117, "grad_norm": 1.1979641592755494, "learning_rate": 4.450887911993715e-06, "loss": 0.27502899169921874, "step": 78485 }, { "epoch": 0.6786798211861549, "grad_norm": 5.969063015050704, "learning_rate": 4.450709586519744e-06, "loss": 0.03638916015625, "step": 78490 }, { "epoch": 0.6787230547076981, "grad_norm": 4.43857731238891, "learning_rate": 4.45053125435523e-06, "loss": 0.20280418395996094, "step": 78495 }, { "epoch": 0.6787662882292415, "grad_norm": 27.8547919604175, "learning_rate": 4.450352915500992e-06, "loss": 0.21817169189453126, "step": 78500 }, { "epoch": 0.6788095217507847, "grad_norm": 1.2377763775546273, "learning_rate": 4.450174569957856e-06, "loss": 0.1527984619140625, "step": 78505 }, { "epoch": 0.6788527552723279, "grad_norm": 1.6764545659858279, "learning_rate": 4.4499962177266415e-06, "loss": 0.13065872192382813, "step": 78510 }, { "epoch": 0.6788959887938713, "grad_norm": 5.177884215233893, "learning_rate": 4.449817858808174e-06, "loss": 0.077325439453125, "step": 78515 }, { "epoch": 0.6789392223154145, "grad_norm": 29.218736457190726, "learning_rate": 4.449639493203274e-06, "loss": 0.25194549560546875, "step": 78520 }, { "epoch": 0.6789824558369577, "grad_norm": 5.092289537744149, "learning_rate": 4.449461120912765e-06, "loss": 0.0535064697265625, "step": 78525 }, { "epoch": 0.679025689358501, "grad_norm": 5.867240319215254, "learning_rate": 4.44928274193747e-06, "loss": 0.1818115234375, "step": 78530 }, { "epoch": 0.6790689228800443, "grad_norm": 22.33188581048321, "learning_rate": 4.449104356278209e-06, "loss": 0.19774017333984376, "step": 78535 }, { "epoch": 0.6791121564015875, "grad_norm": 1.0141975859530277, "learning_rate": 4.448925963935809e-06, "loss": 0.023789215087890624, "step": 78540 }, { "epoch": 0.6791553899231308, "grad_norm": 13.347026765670051, "learning_rate": 4.4487475649110886e-06, "loss": 0.2691337585449219, "step": 78545 }, { "epoch": 0.6791986234446741, "grad_norm": 14.457526423722436, "learning_rate": 4.448569159204874e-06, "loss": 0.19566192626953124, "step": 78550 }, { "epoch": 0.6792418569662173, "grad_norm": 0.28810556909104984, "learning_rate": 4.448390746817986e-06, "loss": 0.04397659301757813, "step": 78555 }, { "epoch": 0.6792850904877606, "grad_norm": 1.7102969632811786, "learning_rate": 4.4482123277512475e-06, "loss": 0.06896705627441406, "step": 78560 }, { "epoch": 0.6793283240093039, "grad_norm": 110.03061856062372, "learning_rate": 4.448033902005482e-06, "loss": 0.115936279296875, "step": 78565 }, { "epoch": 0.6793715575308471, "grad_norm": 6.4273576228045455, "learning_rate": 4.447855469581512e-06, "loss": 0.06407623291015625, "step": 78570 }, { "epoch": 0.6794147910523903, "grad_norm": 1.7987979161884817, "learning_rate": 4.447677030480161e-06, "loss": 0.18688507080078126, "step": 78575 }, { "epoch": 0.6794580245739337, "grad_norm": 0.5054887617732691, "learning_rate": 4.447498584702251e-06, "loss": 0.13720970153808593, "step": 78580 }, { "epoch": 0.6795012580954769, "grad_norm": 2.183543664165433, "learning_rate": 4.447320132248605e-06, "loss": 0.2565673828125, "step": 78585 }, { "epoch": 0.6795444916170201, "grad_norm": 15.890997499421651, "learning_rate": 4.447141673120047e-06, "loss": 0.21960983276367188, "step": 78590 }, { "epoch": 0.6795877251385635, "grad_norm": 1.0830378602567825, "learning_rate": 4.4469632073173985e-06, "loss": 0.2587747573852539, "step": 78595 }, { "epoch": 0.6796309586601067, "grad_norm": 15.040813028822896, "learning_rate": 4.446784734841485e-06, "loss": 0.13619728088378907, "step": 78600 }, { "epoch": 0.6796741921816499, "grad_norm": 22.881811982399736, "learning_rate": 4.446606255693128e-06, "loss": 0.38353691101074217, "step": 78605 }, { "epoch": 0.6797174257031933, "grad_norm": 5.7603239917441025, "learning_rate": 4.44642776987315e-06, "loss": 0.126068115234375, "step": 78610 }, { "epoch": 0.6797606592247365, "grad_norm": 4.796285245904553, "learning_rate": 4.446249277382375e-06, "loss": 0.3597869873046875, "step": 78615 }, { "epoch": 0.6798038927462797, "grad_norm": 4.578184993494697, "learning_rate": 4.446070778221626e-06, "loss": 0.3143653869628906, "step": 78620 }, { "epoch": 0.6798471262678231, "grad_norm": 2.0932185505860526, "learning_rate": 4.4458922723917265e-06, "loss": 0.10408935546875, "step": 78625 }, { "epoch": 0.6798903597893663, "grad_norm": 1.4670811805594632, "learning_rate": 4.4457137598935004e-06, "loss": 0.6646274566650391, "step": 78630 }, { "epoch": 0.6799335933109095, "grad_norm": 8.983281446498633, "learning_rate": 4.445535240727769e-06, "loss": 0.19523162841796876, "step": 78635 }, { "epoch": 0.6799768268324529, "grad_norm": 0.09706955953379909, "learning_rate": 4.445356714895357e-06, "loss": 0.1046783447265625, "step": 78640 }, { "epoch": 0.6800200603539961, "grad_norm": 43.05770049269207, "learning_rate": 4.445178182397089e-06, "loss": 0.1761199951171875, "step": 78645 }, { "epoch": 0.6800632938755393, "grad_norm": 32.30856812927354, "learning_rate": 4.444999643233786e-06, "loss": 0.2051666259765625, "step": 78650 }, { "epoch": 0.6801065273970825, "grad_norm": 12.284112781425724, "learning_rate": 4.4448210974062725e-06, "loss": 0.12267532348632812, "step": 78655 }, { "epoch": 0.6801497609186259, "grad_norm": 8.491664257885107, "learning_rate": 4.444642544915371e-06, "loss": 0.16271209716796875, "step": 78660 }, { "epoch": 0.6801929944401691, "grad_norm": 20.649291215229553, "learning_rate": 4.444463985761906e-06, "loss": 0.5964958190917968, "step": 78665 }, { "epoch": 0.6802362279617123, "grad_norm": 4.791432899069559, "learning_rate": 4.444285419946701e-06, "loss": 0.22628097534179686, "step": 78670 }, { "epoch": 0.6802794614832557, "grad_norm": 2.201442367449401, "learning_rate": 4.444106847470579e-06, "loss": 0.3388458251953125, "step": 78675 }, { "epoch": 0.6803226950047989, "grad_norm": 7.7795056457489125, "learning_rate": 4.443928268334363e-06, "loss": 0.111602783203125, "step": 78680 }, { "epoch": 0.6803659285263421, "grad_norm": 1.827047652074952, "learning_rate": 4.443749682538879e-06, "loss": 0.09647712707519532, "step": 78685 }, { "epoch": 0.6804091620478855, "grad_norm": 14.722600857185066, "learning_rate": 4.443571090084948e-06, "loss": 0.32739105224609377, "step": 78690 }, { "epoch": 0.6804523955694287, "grad_norm": 13.276419204522275, "learning_rate": 4.443392490973396e-06, "loss": 0.06354827880859375, "step": 78695 }, { "epoch": 0.6804956290909719, "grad_norm": 4.958259539196937, "learning_rate": 4.443213885205045e-06, "loss": 0.27458038330078127, "step": 78700 }, { "epoch": 0.6805388626125153, "grad_norm": 3.77467318908604, "learning_rate": 4.443035272780718e-06, "loss": 0.19210662841796874, "step": 78705 }, { "epoch": 0.6805820961340585, "grad_norm": 13.611115242904098, "learning_rate": 4.44285665370124e-06, "loss": 0.241455078125, "step": 78710 }, { "epoch": 0.6806253296556017, "grad_norm": 51.29104275906365, "learning_rate": 4.442678027967436e-06, "loss": 0.21119070053100586, "step": 78715 }, { "epoch": 0.6806685631771451, "grad_norm": 0.12684065724197477, "learning_rate": 4.442499395580126e-06, "loss": 0.07528915405273437, "step": 78720 }, { "epoch": 0.6807117966986883, "grad_norm": 18.15271057971316, "learning_rate": 4.442320756540139e-06, "loss": 0.22972869873046875, "step": 78725 }, { "epoch": 0.6807550302202315, "grad_norm": 0.48860510040918803, "learning_rate": 4.442142110848295e-06, "loss": 0.163116455078125, "step": 78730 }, { "epoch": 0.6807982637417748, "grad_norm": 9.320212490826462, "learning_rate": 4.441963458505418e-06, "loss": 0.14264144897460937, "step": 78735 }, { "epoch": 0.6808414972633181, "grad_norm": 0.32811788091910055, "learning_rate": 4.441784799512334e-06, "loss": 0.08936386108398438, "step": 78740 }, { "epoch": 0.6808847307848613, "grad_norm": 0.33886438209518904, "learning_rate": 4.441606133869866e-06, "loss": 0.052433013916015625, "step": 78745 }, { "epoch": 0.6809279643064046, "grad_norm": 0.24927019644830328, "learning_rate": 4.441427461578838e-06, "loss": 0.0274749755859375, "step": 78750 }, { "epoch": 0.6809711978279479, "grad_norm": 18.754738624863734, "learning_rate": 4.441248782640072e-06, "loss": 0.23900985717773438, "step": 78755 }, { "epoch": 0.6810144313494911, "grad_norm": 8.77086283332812, "learning_rate": 4.441070097054396e-06, "loss": 0.04049072265625, "step": 78760 }, { "epoch": 0.6810576648710344, "grad_norm": 19.95600191125792, "learning_rate": 4.4408914048226315e-06, "loss": 0.2192138671875, "step": 78765 }, { "epoch": 0.6811008983925777, "grad_norm": 0.7516771015934167, "learning_rate": 4.440712705945603e-06, "loss": 0.15726318359375, "step": 78770 }, { "epoch": 0.6811441319141209, "grad_norm": 5.632091812231879, "learning_rate": 4.4405340004241346e-06, "loss": 0.26759185791015627, "step": 78775 }, { "epoch": 0.6811873654356642, "grad_norm": 11.644449149213221, "learning_rate": 4.440355288259052e-06, "loss": 0.12726058959960937, "step": 78780 }, { "epoch": 0.6812305989572075, "grad_norm": 7.0432453006458555, "learning_rate": 4.440176569451177e-06, "loss": 0.3050323486328125, "step": 78785 }, { "epoch": 0.6812738324787507, "grad_norm": 5.48086583483673, "learning_rate": 4.439997844001334e-06, "loss": 0.3036712646484375, "step": 78790 }, { "epoch": 0.681317066000294, "grad_norm": 15.88218540468881, "learning_rate": 4.439819111910349e-06, "loss": 0.19593353271484376, "step": 78795 }, { "epoch": 0.6813602995218373, "grad_norm": 4.4362158108452885, "learning_rate": 4.439640373179046e-06, "loss": 0.07021026611328125, "step": 78800 }, { "epoch": 0.6814035330433805, "grad_norm": 6.444317327095255, "learning_rate": 4.439461627808248e-06, "loss": 0.051275634765625, "step": 78805 }, { "epoch": 0.6814467665649238, "grad_norm": 27.606974681551968, "learning_rate": 4.4392828757987815e-06, "loss": 0.14270248413085937, "step": 78810 }, { "epoch": 0.6814900000864671, "grad_norm": 28.543909178724462, "learning_rate": 4.439104117151469e-06, "loss": 0.15251731872558594, "step": 78815 }, { "epoch": 0.6815332336080103, "grad_norm": 32.924511737929244, "learning_rate": 4.438925351867134e-06, "loss": 0.2517242431640625, "step": 78820 }, { "epoch": 0.6815764671295536, "grad_norm": 0.3443682906214535, "learning_rate": 4.438746579946605e-06, "loss": 0.2039875030517578, "step": 78825 }, { "epoch": 0.6816197006510968, "grad_norm": 1.9118676501630556, "learning_rate": 4.438567801390702e-06, "loss": 0.1908447265625, "step": 78830 }, { "epoch": 0.6816629341726401, "grad_norm": 21.453306778631656, "learning_rate": 4.438389016200252e-06, "loss": 0.1964508056640625, "step": 78835 }, { "epoch": 0.6817061676941834, "grad_norm": 4.930825027743918, "learning_rate": 4.438210224376078e-06, "loss": 0.096771240234375, "step": 78840 }, { "epoch": 0.6817494012157266, "grad_norm": 1.59687241034881, "learning_rate": 4.4380314259190075e-06, "loss": 0.2601280212402344, "step": 78845 }, { "epoch": 0.6817926347372699, "grad_norm": 4.678079370887026, "learning_rate": 4.437852620829861e-06, "loss": 0.07843475341796875, "step": 78850 }, { "epoch": 0.6818358682588131, "grad_norm": 1.4975422686387807, "learning_rate": 4.437673809109466e-06, "loss": 0.10477218627929688, "step": 78855 }, { "epoch": 0.6818791017803564, "grad_norm": 13.673870986949135, "learning_rate": 4.437494990758648e-06, "loss": 0.09892196655273437, "step": 78860 }, { "epoch": 0.6819223353018997, "grad_norm": 1.5229750309054269, "learning_rate": 4.437316165778228e-06, "loss": 0.0204315185546875, "step": 78865 }, { "epoch": 0.681965568823443, "grad_norm": 0.045854624512387074, "learning_rate": 4.437137334169034e-06, "loss": 0.10958080291748047, "step": 78870 }, { "epoch": 0.6820088023449862, "grad_norm": 2.548276693228458, "learning_rate": 4.4369584959318895e-06, "loss": 0.1794292449951172, "step": 78875 }, { "epoch": 0.6820520358665295, "grad_norm": 24.736384841100403, "learning_rate": 4.436779651067618e-06, "loss": 0.15267181396484375, "step": 78880 }, { "epoch": 0.6820952693880727, "grad_norm": 10.84419507780117, "learning_rate": 4.436600799577048e-06, "loss": 0.40291671752929686, "step": 78885 }, { "epoch": 0.682138502909616, "grad_norm": 13.27270071476014, "learning_rate": 4.436421941461001e-06, "loss": 0.23011283874511718, "step": 78890 }, { "epoch": 0.6821817364311593, "grad_norm": 11.513093569598206, "learning_rate": 4.436243076720302e-06, "loss": 0.08929595947265626, "step": 78895 }, { "epoch": 0.6822249699527025, "grad_norm": 5.910461575055883, "learning_rate": 4.436064205355779e-06, "loss": 0.13745040893554689, "step": 78900 }, { "epoch": 0.6822682034742458, "grad_norm": 2.2405645925128557, "learning_rate": 4.435885327368253e-06, "loss": 0.37235107421875, "step": 78905 }, { "epoch": 0.682311436995789, "grad_norm": 8.160203510057048, "learning_rate": 4.435706442758551e-06, "loss": 0.3814178466796875, "step": 78910 }, { "epoch": 0.6823546705173323, "grad_norm": 6.964864525535168, "learning_rate": 4.435527551527498e-06, "loss": 0.108026123046875, "step": 78915 }, { "epoch": 0.6823979040388756, "grad_norm": 30.407316507404712, "learning_rate": 4.435348653675919e-06, "loss": 0.477630615234375, "step": 78920 }, { "epoch": 0.6824411375604188, "grad_norm": 2.1426386035217275, "learning_rate": 4.435169749204639e-06, "loss": 0.03076629638671875, "step": 78925 }, { "epoch": 0.6824843710819621, "grad_norm": 18.377047779212994, "learning_rate": 4.434990838114482e-06, "loss": 0.099639892578125, "step": 78930 }, { "epoch": 0.6825276046035054, "grad_norm": 34.49382248787865, "learning_rate": 4.434811920406275e-06, "loss": 0.13539810180664064, "step": 78935 }, { "epoch": 0.6825708381250486, "grad_norm": 4.332527341557594, "learning_rate": 4.434632996080841e-06, "loss": 0.0597412109375, "step": 78940 }, { "epoch": 0.6826140716465919, "grad_norm": 13.829821647152594, "learning_rate": 4.434454065139008e-06, "loss": 0.27607574462890627, "step": 78945 }, { "epoch": 0.6826573051681352, "grad_norm": 8.114251744809193, "learning_rate": 4.434275127581598e-06, "loss": 0.098699951171875, "step": 78950 }, { "epoch": 0.6827005386896784, "grad_norm": 0.8282924285762633, "learning_rate": 4.4340961834094395e-06, "loss": 0.2760955810546875, "step": 78955 }, { "epoch": 0.6827437722112217, "grad_norm": 31.952579688977043, "learning_rate": 4.433917232623354e-06, "loss": 0.20951385498046876, "step": 78960 }, { "epoch": 0.682787005732765, "grad_norm": 4.195639860381694, "learning_rate": 4.433738275224171e-06, "loss": 0.10490570068359376, "step": 78965 }, { "epoch": 0.6828302392543082, "grad_norm": 10.513870917565631, "learning_rate": 4.4335593112127125e-06, "loss": 0.176971435546875, "step": 78970 }, { "epoch": 0.6828734727758515, "grad_norm": 9.980603852076538, "learning_rate": 4.433380340589806e-06, "loss": 0.2963233947753906, "step": 78975 }, { "epoch": 0.6829167062973948, "grad_norm": 20.498835047889926, "learning_rate": 4.433201363356276e-06, "loss": 0.275921630859375, "step": 78980 }, { "epoch": 0.682959939818938, "grad_norm": 20.685582345479116, "learning_rate": 4.433022379512946e-06, "loss": 0.23342742919921874, "step": 78985 }, { "epoch": 0.6830031733404813, "grad_norm": 24.926900368942846, "learning_rate": 4.432843389060645e-06, "loss": 0.1854888916015625, "step": 78990 }, { "epoch": 0.6830464068620246, "grad_norm": 10.902378849769802, "learning_rate": 4.432664392000198e-06, "loss": 0.18425369262695312, "step": 78995 }, { "epoch": 0.6830896403835678, "grad_norm": 11.462894706731305, "learning_rate": 4.4324853883324285e-06, "loss": 0.15496559143066407, "step": 79000 }, { "epoch": 0.683132873905111, "grad_norm": 36.39508868034355, "learning_rate": 4.432306378058162e-06, "loss": 0.0822845458984375, "step": 79005 }, { "epoch": 0.6831761074266544, "grad_norm": 2.1050862411598232, "learning_rate": 4.432127361178226e-06, "loss": 0.32235107421875, "step": 79010 }, { "epoch": 0.6832193409481976, "grad_norm": 9.796462515214634, "learning_rate": 4.431948337693444e-06, "loss": 0.20914955139160157, "step": 79015 }, { "epoch": 0.6832625744697408, "grad_norm": 17.934546656161775, "learning_rate": 4.4317693076046445e-06, "loss": 0.14595746994018555, "step": 79020 }, { "epoch": 0.6833058079912842, "grad_norm": 13.280992852936574, "learning_rate": 4.431590270912651e-06, "loss": 0.15506973266601562, "step": 79025 }, { "epoch": 0.6833490415128274, "grad_norm": 4.236591057012159, "learning_rate": 4.43141122761829e-06, "loss": 0.06438751220703125, "step": 79030 }, { "epoch": 0.6833922750343706, "grad_norm": 1.6915307569712261, "learning_rate": 4.4312321777223854e-06, "loss": 0.342059326171875, "step": 79035 }, { "epoch": 0.683435508555914, "grad_norm": 15.489855190855506, "learning_rate": 4.431053121225765e-06, "loss": 0.11027755737304687, "step": 79040 }, { "epoch": 0.6834787420774572, "grad_norm": 21.834223240717574, "learning_rate": 4.430874058129254e-06, "loss": 0.19644775390625, "step": 79045 }, { "epoch": 0.6835219755990004, "grad_norm": 18.2613284871636, "learning_rate": 4.430694988433679e-06, "loss": 0.17708740234375, "step": 79050 }, { "epoch": 0.6835652091205437, "grad_norm": 33.78169094930276, "learning_rate": 4.430515912139865e-06, "loss": 0.10205612182617188, "step": 79055 }, { "epoch": 0.683608442642087, "grad_norm": 3.3491390077993883, "learning_rate": 4.430336829248638e-06, "loss": 0.1400379180908203, "step": 79060 }, { "epoch": 0.6836516761636302, "grad_norm": 0.9509092697039457, "learning_rate": 4.430157739760824e-06, "loss": 0.09246139526367188, "step": 79065 }, { "epoch": 0.6836949096851735, "grad_norm": 0.45349561068043415, "learning_rate": 4.429978643677248e-06, "loss": 0.111456298828125, "step": 79070 }, { "epoch": 0.6837381432067168, "grad_norm": 11.803397695751212, "learning_rate": 4.429799540998737e-06, "loss": 0.07029037475585938, "step": 79075 }, { "epoch": 0.68378137672826, "grad_norm": 4.270958782097284, "learning_rate": 4.429620431726117e-06, "loss": 0.06103515625, "step": 79080 }, { "epoch": 0.6838246102498032, "grad_norm": 2.327347910069881, "learning_rate": 4.429441315860216e-06, "loss": 0.087933349609375, "step": 79085 }, { "epoch": 0.6838678437713466, "grad_norm": 6.715175493099556, "learning_rate": 4.429262193401855e-06, "loss": 0.3208984375, "step": 79090 }, { "epoch": 0.6839110772928898, "grad_norm": 22.270036283027018, "learning_rate": 4.429083064351864e-06, "loss": 0.098760986328125, "step": 79095 }, { "epoch": 0.683954310814433, "grad_norm": 2.1174911735632453, "learning_rate": 4.428903928711069e-06, "loss": 0.04910888671875, "step": 79100 }, { "epoch": 0.6839975443359764, "grad_norm": 0.2784757164432963, "learning_rate": 4.428724786480294e-06, "loss": 0.062259674072265625, "step": 79105 }, { "epoch": 0.6840407778575196, "grad_norm": 37.15876409885193, "learning_rate": 4.4285456376603675e-06, "loss": 0.3153778076171875, "step": 79110 }, { "epoch": 0.6840840113790628, "grad_norm": 1.5729382098380922, "learning_rate": 4.428366482252115e-06, "loss": 0.05020751953125, "step": 79115 }, { "epoch": 0.6841272449006062, "grad_norm": 1.000591484950851, "learning_rate": 4.428187320256361e-06, "loss": 0.03612823486328125, "step": 79120 }, { "epoch": 0.6841704784221494, "grad_norm": 34.544874621254365, "learning_rate": 4.428008151673935e-06, "loss": 0.25997314453125, "step": 79125 }, { "epoch": 0.6842137119436926, "grad_norm": 9.663548585386962, "learning_rate": 4.42782897650566e-06, "loss": 0.32767333984375, "step": 79130 }, { "epoch": 0.684256945465236, "grad_norm": 1.315694822426019, "learning_rate": 4.427649794752366e-06, "loss": 0.20683135986328124, "step": 79135 }, { "epoch": 0.6843001789867792, "grad_norm": 13.439353514998645, "learning_rate": 4.427470606414876e-06, "loss": 0.11748046875, "step": 79140 }, { "epoch": 0.6843434125083224, "grad_norm": 6.980136742354738, "learning_rate": 4.427291411494018e-06, "loss": 0.2186920166015625, "step": 79145 }, { "epoch": 0.6843866460298658, "grad_norm": 1.4201290023688158, "learning_rate": 4.427112209990618e-06, "loss": 0.047582149505615234, "step": 79150 }, { "epoch": 0.684429879551409, "grad_norm": 28.274931356258588, "learning_rate": 4.426933001905502e-06, "loss": 0.3024150848388672, "step": 79155 }, { "epoch": 0.6844731130729522, "grad_norm": 6.17007431737914, "learning_rate": 4.426753787239498e-06, "loss": 0.05065841674804687, "step": 79160 }, { "epoch": 0.6845163465944956, "grad_norm": 0.10825957239679894, "learning_rate": 4.42657456599343e-06, "loss": 0.19214324951171874, "step": 79165 }, { "epoch": 0.6845595801160388, "grad_norm": 0.9503145981147251, "learning_rate": 4.426395338168128e-06, "loss": 0.0855560302734375, "step": 79170 }, { "epoch": 0.684602813637582, "grad_norm": 2.6979614349458894, "learning_rate": 4.4262161037644165e-06, "loss": 0.052386474609375, "step": 79175 }, { "epoch": 0.6846460471591252, "grad_norm": 4.427939968839042, "learning_rate": 4.426036862783121e-06, "loss": 0.2167144775390625, "step": 79180 }, { "epoch": 0.6846892806806686, "grad_norm": 44.386410525369534, "learning_rate": 4.4258576152250706e-06, "loss": 0.313043212890625, "step": 79185 }, { "epoch": 0.6847325142022118, "grad_norm": 8.552365691238, "learning_rate": 4.425678361091091e-06, "loss": 0.0508331298828125, "step": 79190 }, { "epoch": 0.684775747723755, "grad_norm": 4.334297972010298, "learning_rate": 4.425499100382009e-06, "loss": 0.10777702331542968, "step": 79195 }, { "epoch": 0.6848189812452984, "grad_norm": 0.43286121974009556, "learning_rate": 4.42531983309865e-06, "loss": 0.24683303833007814, "step": 79200 }, { "epoch": 0.6848622147668416, "grad_norm": 23.04997956956976, "learning_rate": 4.425140559241843e-06, "loss": 0.178350830078125, "step": 79205 }, { "epoch": 0.6849054482883848, "grad_norm": 0.5611020341573977, "learning_rate": 4.424961278812412e-06, "loss": 0.0174346923828125, "step": 79210 }, { "epoch": 0.6849486818099282, "grad_norm": 2.10774302035751, "learning_rate": 4.424781991811187e-06, "loss": 0.0512115478515625, "step": 79215 }, { "epoch": 0.6849919153314714, "grad_norm": 3.0216324634210676, "learning_rate": 4.424602698238992e-06, "loss": 0.04781036376953125, "step": 79220 }, { "epoch": 0.6850351488530146, "grad_norm": 13.81994709382783, "learning_rate": 4.424423398096657e-06, "loss": 0.21814918518066406, "step": 79225 }, { "epoch": 0.685078382374558, "grad_norm": 7.390465155200327, "learning_rate": 4.424244091385006e-06, "loss": 0.5071884155273437, "step": 79230 }, { "epoch": 0.6851216158961012, "grad_norm": 21.683455812324052, "learning_rate": 4.424064778104868e-06, "loss": 0.21757354736328124, "step": 79235 }, { "epoch": 0.6851648494176444, "grad_norm": 11.927409347329949, "learning_rate": 4.423885458257069e-06, "loss": 0.07994918823242188, "step": 79240 }, { "epoch": 0.6852080829391878, "grad_norm": 0.6007170365431139, "learning_rate": 4.423706131842435e-06, "loss": 0.19586029052734374, "step": 79245 }, { "epoch": 0.685251316460731, "grad_norm": 20.372387672942743, "learning_rate": 4.423526798861795e-06, "loss": 0.14241485595703124, "step": 79250 }, { "epoch": 0.6852945499822742, "grad_norm": 5.796007570310613, "learning_rate": 4.4233474593159754e-06, "loss": 0.09360733032226562, "step": 79255 }, { "epoch": 0.6853377835038175, "grad_norm": 0.5410467045548384, "learning_rate": 4.423168113205802e-06, "loss": 0.18240966796875, "step": 79260 }, { "epoch": 0.6853810170253608, "grad_norm": 1.7534599657496186, "learning_rate": 4.422988760532103e-06, "loss": 0.053095245361328126, "step": 79265 }, { "epoch": 0.685424250546904, "grad_norm": 42.94411992649671, "learning_rate": 4.422809401295707e-06, "loss": 0.613592529296875, "step": 79270 }, { "epoch": 0.6854674840684473, "grad_norm": 42.14514665570287, "learning_rate": 4.422630035497439e-06, "loss": 0.112896728515625, "step": 79275 }, { "epoch": 0.6855107175899906, "grad_norm": 10.231440431543719, "learning_rate": 4.422450663138126e-06, "loss": 0.09791946411132812, "step": 79280 }, { "epoch": 0.6855539511115338, "grad_norm": 20.070720938642662, "learning_rate": 4.422271284218598e-06, "loss": 0.11038970947265625, "step": 79285 }, { "epoch": 0.6855971846330771, "grad_norm": 4.683741799014605, "learning_rate": 4.4220918987396804e-06, "loss": 0.07178878784179688, "step": 79290 }, { "epoch": 0.6856404181546204, "grad_norm": 7.4381829301824425, "learning_rate": 4.421912506702199e-06, "loss": 0.12197437286376953, "step": 79295 }, { "epoch": 0.6856836516761636, "grad_norm": 1.4535214013668207, "learning_rate": 4.421733108106983e-06, "loss": 0.27331085205078126, "step": 79300 }, { "epoch": 0.6857268851977069, "grad_norm": 37.02914368935304, "learning_rate": 4.42155370295486e-06, "loss": 0.34212684631347656, "step": 79305 }, { "epoch": 0.6857701187192502, "grad_norm": 3.1191305520550574, "learning_rate": 4.421374291246658e-06, "loss": 0.18989486694335939, "step": 79310 }, { "epoch": 0.6858133522407934, "grad_norm": 4.848642936115992, "learning_rate": 4.421194872983202e-06, "loss": 0.0917510986328125, "step": 79315 }, { "epoch": 0.6858565857623367, "grad_norm": 25.531701701841087, "learning_rate": 4.421015448165322e-06, "loss": 0.15911102294921875, "step": 79320 }, { "epoch": 0.68589981928388, "grad_norm": 4.256785012093481, "learning_rate": 4.420836016793844e-06, "loss": 0.3640247344970703, "step": 79325 }, { "epoch": 0.6859430528054232, "grad_norm": 39.789259413660936, "learning_rate": 4.420656578869594e-06, "loss": 0.27682952880859374, "step": 79330 }, { "epoch": 0.6859862863269665, "grad_norm": 8.385330504563894, "learning_rate": 4.420477134393404e-06, "loss": 0.02697410583496094, "step": 79335 }, { "epoch": 0.6860295198485098, "grad_norm": 8.943127715467632, "learning_rate": 4.420297683366097e-06, "loss": 0.037711715698242186, "step": 79340 }, { "epoch": 0.686072753370053, "grad_norm": 0.3515722083936109, "learning_rate": 4.420118225788504e-06, "loss": 0.10389556884765624, "step": 79345 }, { "epoch": 0.6861159868915963, "grad_norm": 4.240071853405783, "learning_rate": 4.419938761661451e-06, "loss": 0.2718971252441406, "step": 79350 }, { "epoch": 0.6861592204131395, "grad_norm": 3.837827982591462, "learning_rate": 4.419759290985765e-06, "loss": 0.319879150390625, "step": 79355 }, { "epoch": 0.6862024539346828, "grad_norm": 16.06773403487195, "learning_rate": 4.419579813762275e-06, "loss": 0.3690065383911133, "step": 79360 }, { "epoch": 0.686245687456226, "grad_norm": 16.50179955263367, "learning_rate": 4.419400329991809e-06, "loss": 0.08072509765625, "step": 79365 }, { "epoch": 0.6862889209777693, "grad_norm": 19.985248384559135, "learning_rate": 4.4192208396751935e-06, "loss": 0.18672714233398438, "step": 79370 }, { "epoch": 0.6863321544993126, "grad_norm": 9.497072731705405, "learning_rate": 4.4190413428132565e-06, "loss": 0.18584976196289063, "step": 79375 }, { "epoch": 0.6863753880208558, "grad_norm": 7.72178944872862, "learning_rate": 4.418861839406826e-06, "loss": 0.1009613037109375, "step": 79380 }, { "epoch": 0.6864186215423991, "grad_norm": 15.259409287471392, "learning_rate": 4.4186823294567315e-06, "loss": 0.09854812622070312, "step": 79385 }, { "epoch": 0.6864618550639424, "grad_norm": 6.82507494734533, "learning_rate": 4.418502812963799e-06, "loss": 0.22947235107421876, "step": 79390 }, { "epoch": 0.6865050885854856, "grad_norm": 1.319131871332491, "learning_rate": 4.418323289928857e-06, "loss": 0.098980712890625, "step": 79395 }, { "epoch": 0.6865483221070289, "grad_norm": 13.079478257853113, "learning_rate": 4.418143760352733e-06, "loss": 0.159625244140625, "step": 79400 }, { "epoch": 0.6865915556285722, "grad_norm": 9.042281325124517, "learning_rate": 4.417964224236254e-06, "loss": 0.1824951171875, "step": 79405 }, { "epoch": 0.6866347891501154, "grad_norm": 4.853537995499292, "learning_rate": 4.417784681580251e-06, "loss": 0.10067901611328126, "step": 79410 }, { "epoch": 0.6866780226716587, "grad_norm": 19.68375895589187, "learning_rate": 4.417605132385549e-06, "loss": 0.21686248779296874, "step": 79415 }, { "epoch": 0.686721256193202, "grad_norm": 7.413102398602109, "learning_rate": 4.417425576652978e-06, "loss": 0.1746490478515625, "step": 79420 }, { "epoch": 0.6867644897147452, "grad_norm": 1.6574070335952837, "learning_rate": 4.417246014383366e-06, "loss": 0.0448883056640625, "step": 79425 }, { "epoch": 0.6868077232362885, "grad_norm": 1.753317689053433, "learning_rate": 4.417066445577541e-06, "loss": 0.0171875, "step": 79430 }, { "epoch": 0.6868509567578317, "grad_norm": 2.512371701767158, "learning_rate": 4.41688687023633e-06, "loss": 0.05228729248046875, "step": 79435 }, { "epoch": 0.686894190279375, "grad_norm": 16.41072326267099, "learning_rate": 4.416707288360561e-06, "loss": 0.131243896484375, "step": 79440 }, { "epoch": 0.6869374238009183, "grad_norm": 23.202002270984146, "learning_rate": 4.4165276999510645e-06, "loss": 0.24652099609375, "step": 79445 }, { "epoch": 0.6869806573224615, "grad_norm": 1.624808733115685, "learning_rate": 4.4163481050086676e-06, "loss": 0.1241119384765625, "step": 79450 }, { "epoch": 0.6870238908440048, "grad_norm": 2.1118451529685154, "learning_rate": 4.416168503534198e-06, "loss": 0.11709747314453126, "step": 79455 }, { "epoch": 0.6870671243655481, "grad_norm": 8.2670854526768, "learning_rate": 4.415988895528484e-06, "loss": 0.08079452514648437, "step": 79460 }, { "epoch": 0.6871103578870913, "grad_norm": 4.97789962386485, "learning_rate": 4.415809280992355e-06, "loss": 0.05003814697265625, "step": 79465 }, { "epoch": 0.6871535914086346, "grad_norm": 14.629041840959207, "learning_rate": 4.415629659926638e-06, "loss": 0.09443130493164062, "step": 79470 }, { "epoch": 0.6871968249301779, "grad_norm": 18.803775477607278, "learning_rate": 4.415450032332162e-06, "loss": 0.33843460083007815, "step": 79475 }, { "epoch": 0.6872400584517211, "grad_norm": 18.45462029241031, "learning_rate": 4.415270398209756e-06, "loss": 0.1960052490234375, "step": 79480 }, { "epoch": 0.6872832919732644, "grad_norm": 27.949766037728956, "learning_rate": 4.415090757560248e-06, "loss": 0.18395042419433594, "step": 79485 }, { "epoch": 0.6873265254948077, "grad_norm": 8.009735565298973, "learning_rate": 4.414911110384466e-06, "loss": 0.25223445892333984, "step": 79490 }, { "epoch": 0.6873697590163509, "grad_norm": 3.446567557605477, "learning_rate": 4.41473145668324e-06, "loss": 0.38727340698242185, "step": 79495 }, { "epoch": 0.6874129925378942, "grad_norm": 7.1406280082641205, "learning_rate": 4.414551796457396e-06, "loss": 0.2123554229736328, "step": 79500 }, { "epoch": 0.6874562260594375, "grad_norm": 28.40000408911721, "learning_rate": 4.414372129707765e-06, "loss": 0.19184532165527343, "step": 79505 }, { "epoch": 0.6874994595809807, "grad_norm": 2.5582172089194053, "learning_rate": 4.414192456435176e-06, "loss": 0.51763916015625, "step": 79510 }, { "epoch": 0.687542693102524, "grad_norm": 3.8626249461868594, "learning_rate": 4.414012776640454e-06, "loss": 0.06270675659179688, "step": 79515 }, { "epoch": 0.6875859266240673, "grad_norm": 1.8805864757762922, "learning_rate": 4.413833090324432e-06, "loss": 0.34761695861816405, "step": 79520 }, { "epoch": 0.6876291601456105, "grad_norm": 1.60874501157796, "learning_rate": 4.413653397487935e-06, "loss": 0.03283767700195313, "step": 79525 }, { "epoch": 0.6876723936671537, "grad_norm": 28.597013790841718, "learning_rate": 4.413473698131794e-06, "loss": 0.164398193359375, "step": 79530 }, { "epoch": 0.687715627188697, "grad_norm": 7.870168551873204, "learning_rate": 4.413293992256837e-06, "loss": 0.1658843994140625, "step": 79535 }, { "epoch": 0.6877588607102403, "grad_norm": 5.305374809314256, "learning_rate": 4.413114279863895e-06, "loss": 0.1702953338623047, "step": 79540 }, { "epoch": 0.6878020942317835, "grad_norm": 41.03947336841876, "learning_rate": 4.412934560953793e-06, "loss": 0.21444625854492189, "step": 79545 }, { "epoch": 0.6878453277533269, "grad_norm": 4.219110066367743, "learning_rate": 4.412754835527361e-06, "loss": 0.1852691650390625, "step": 79550 }, { "epoch": 0.6878885612748701, "grad_norm": 2.5470206703824316, "learning_rate": 4.412575103585429e-06, "loss": 0.11379165649414062, "step": 79555 }, { "epoch": 0.6879317947964133, "grad_norm": 0.1679056589491123, "learning_rate": 4.412395365128826e-06, "loss": 0.09196929931640625, "step": 79560 }, { "epoch": 0.6879750283179566, "grad_norm": 2.564437567768462, "learning_rate": 4.41221562015838e-06, "loss": 0.05916748046875, "step": 79565 }, { "epoch": 0.6880182618394999, "grad_norm": 18.243559460759595, "learning_rate": 4.412035868674921e-06, "loss": 0.267529296875, "step": 79570 }, { "epoch": 0.6880614953610431, "grad_norm": 0.4333793155631662, "learning_rate": 4.4118561106792775e-06, "loss": 0.061734771728515624, "step": 79575 }, { "epoch": 0.6881047288825864, "grad_norm": 26.90261271143253, "learning_rate": 4.411676346172276e-06, "loss": 0.4134002685546875, "step": 79580 }, { "epoch": 0.6881479624041297, "grad_norm": 14.837344236956119, "learning_rate": 4.41149657515475e-06, "loss": 0.42244720458984375, "step": 79585 }, { "epoch": 0.6881911959256729, "grad_norm": 2.9603114321801187, "learning_rate": 4.411316797627527e-06, "loss": 0.0963623046875, "step": 79590 }, { "epoch": 0.6882344294472162, "grad_norm": 4.618714563854575, "learning_rate": 4.411137013591434e-06, "loss": 0.1909881591796875, "step": 79595 }, { "epoch": 0.6882776629687595, "grad_norm": 3.7230424034787117, "learning_rate": 4.4109572230473035e-06, "loss": 0.2575721740722656, "step": 79600 }, { "epoch": 0.6883208964903027, "grad_norm": 3.245932856946656, "learning_rate": 4.410777425995962e-06, "loss": 0.14938201904296874, "step": 79605 }, { "epoch": 0.6883641300118459, "grad_norm": 1.4284470268174392, "learning_rate": 4.410597622438239e-06, "loss": 0.1537628173828125, "step": 79610 }, { "epoch": 0.6884073635333893, "grad_norm": 47.90267844911512, "learning_rate": 4.410417812374965e-06, "loss": 0.5453826904296875, "step": 79615 }, { "epoch": 0.6884505970549325, "grad_norm": 4.0800911278486796, "learning_rate": 4.41023799580697e-06, "loss": 0.34625396728515623, "step": 79620 }, { "epoch": 0.6884938305764757, "grad_norm": 17.17638313616081, "learning_rate": 4.410058172735081e-06, "loss": 0.11629486083984375, "step": 79625 }, { "epoch": 0.6885370640980191, "grad_norm": 17.766658827612897, "learning_rate": 4.409878343160128e-06, "loss": 0.09478759765625, "step": 79630 }, { "epoch": 0.6885802976195623, "grad_norm": 0.8513711751321341, "learning_rate": 4.409698507082941e-06, "loss": 0.16925430297851562, "step": 79635 }, { "epoch": 0.6886235311411055, "grad_norm": 14.586810608908904, "learning_rate": 4.409518664504349e-06, "loss": 0.07858810424804688, "step": 79640 }, { "epoch": 0.6886667646626489, "grad_norm": 1.6579036839473225, "learning_rate": 4.409338815425181e-06, "loss": 0.2336669921875, "step": 79645 }, { "epoch": 0.6887099981841921, "grad_norm": 4.972372140539161, "learning_rate": 4.409158959846266e-06, "loss": 0.07065544128417969, "step": 79650 }, { "epoch": 0.6887532317057353, "grad_norm": 1.067546790078104, "learning_rate": 4.408979097768436e-06, "loss": 0.28070602416992185, "step": 79655 }, { "epoch": 0.6887964652272787, "grad_norm": 17.54993217849025, "learning_rate": 4.408799229192519e-06, "loss": 0.13788719177246095, "step": 79660 }, { "epoch": 0.6888396987488219, "grad_norm": 0.24291884211404616, "learning_rate": 4.408619354119344e-06, "loss": 0.07696151733398438, "step": 79665 }, { "epoch": 0.6888829322703651, "grad_norm": 11.066533683809698, "learning_rate": 4.408439472549741e-06, "loss": 0.130108642578125, "step": 79670 }, { "epoch": 0.6889261657919085, "grad_norm": 5.944402846633997, "learning_rate": 4.408259584484539e-06, "loss": 0.1083526611328125, "step": 79675 }, { "epoch": 0.6889693993134517, "grad_norm": 0.16177220353680052, "learning_rate": 4.408079689924569e-06, "loss": 0.04918365478515625, "step": 79680 }, { "epoch": 0.6890126328349949, "grad_norm": 3.184702601276275, "learning_rate": 4.40789978887066e-06, "loss": 0.034050369262695314, "step": 79685 }, { "epoch": 0.6890558663565383, "grad_norm": 8.669507747616017, "learning_rate": 4.407719881323641e-06, "loss": 0.054744720458984375, "step": 79690 }, { "epoch": 0.6890990998780815, "grad_norm": 31.335602067807425, "learning_rate": 4.407539967284343e-06, "loss": 0.1148040771484375, "step": 79695 }, { "epoch": 0.6891423333996247, "grad_norm": 37.24071299353892, "learning_rate": 4.407360046753595e-06, "loss": 0.2752593994140625, "step": 79700 }, { "epoch": 0.689185566921168, "grad_norm": 1.4857485453189694, "learning_rate": 4.4071801197322264e-06, "loss": 0.5950408935546875, "step": 79705 }, { "epoch": 0.6892288004427113, "grad_norm": 3.7228381783014237, "learning_rate": 4.407000186221067e-06, "loss": 0.156085205078125, "step": 79710 }, { "epoch": 0.6892720339642545, "grad_norm": 45.24168044090355, "learning_rate": 4.4068202462209475e-06, "loss": 0.3298492431640625, "step": 79715 }, { "epoch": 0.6893152674857977, "grad_norm": 0.7924256308195903, "learning_rate": 4.406640299732697e-06, "loss": 0.038409423828125, "step": 79720 }, { "epoch": 0.6893585010073411, "grad_norm": 0.360883685188594, "learning_rate": 4.406460346757146e-06, "loss": 0.042650604248046876, "step": 79725 }, { "epoch": 0.6894017345288843, "grad_norm": 0.6957566300620491, "learning_rate": 4.406280387295125e-06, "loss": 0.307440185546875, "step": 79730 }, { "epoch": 0.6894449680504275, "grad_norm": 12.203727387179125, "learning_rate": 4.406100421347462e-06, "loss": 0.14582290649414062, "step": 79735 }, { "epoch": 0.6894882015719709, "grad_norm": 1.1835701624733062, "learning_rate": 4.405920448914989e-06, "loss": 0.09320831298828125, "step": 79740 }, { "epoch": 0.6895314350935141, "grad_norm": 19.47081435430692, "learning_rate": 4.405740469998534e-06, "loss": 0.1878498077392578, "step": 79745 }, { "epoch": 0.6895746686150573, "grad_norm": 1.0416282832748816, "learning_rate": 4.4055604845989286e-06, "loss": 0.06822662353515625, "step": 79750 }, { "epoch": 0.6896179021366007, "grad_norm": 8.34631452221807, "learning_rate": 4.4053804927170025e-06, "loss": 0.34036407470703123, "step": 79755 }, { "epoch": 0.6896611356581439, "grad_norm": 0.12369883302850428, "learning_rate": 4.405200494353586e-06, "loss": 0.15207672119140625, "step": 79760 }, { "epoch": 0.6897043691796871, "grad_norm": 0.6250859316998634, "learning_rate": 4.405020489509509e-06, "loss": 0.23799209594726561, "step": 79765 }, { "epoch": 0.6897476027012305, "grad_norm": 6.107617234680743, "learning_rate": 4.4048404781856e-06, "loss": 0.21576461791992188, "step": 79770 }, { "epoch": 0.6897908362227737, "grad_norm": 5.265330338580283, "learning_rate": 4.404660460382693e-06, "loss": 0.420794677734375, "step": 79775 }, { "epoch": 0.6898340697443169, "grad_norm": 11.840880156878255, "learning_rate": 4.404480436101616e-06, "loss": 0.13254432678222655, "step": 79780 }, { "epoch": 0.6898773032658602, "grad_norm": 10.994560957261351, "learning_rate": 4.404300405343198e-06, "loss": 0.28280029296875, "step": 79785 }, { "epoch": 0.6899205367874035, "grad_norm": 12.66246342314059, "learning_rate": 4.404120368108271e-06, "loss": 0.07966804504394531, "step": 79790 }, { "epoch": 0.6899637703089467, "grad_norm": 12.105290512381465, "learning_rate": 4.403940324397666e-06, "loss": 0.10407562255859375, "step": 79795 }, { "epoch": 0.69000700383049, "grad_norm": 4.285380067172142, "learning_rate": 4.403760274212212e-06, "loss": 0.0293670654296875, "step": 79800 }, { "epoch": 0.6900502373520333, "grad_norm": 4.513443501463452, "learning_rate": 4.403580217552738e-06, "loss": 0.2481903076171875, "step": 79805 }, { "epoch": 0.6900934708735765, "grad_norm": 6.753265430184558, "learning_rate": 4.403400154420078e-06, "loss": 0.10242462158203125, "step": 79810 }, { "epoch": 0.6901367043951198, "grad_norm": 4.18418748926444, "learning_rate": 4.40322008481506e-06, "loss": 0.06407470703125, "step": 79815 }, { "epoch": 0.6901799379166631, "grad_norm": 12.453967040975325, "learning_rate": 4.403040008738514e-06, "loss": 0.18285751342773438, "step": 79820 }, { "epoch": 0.6902231714382063, "grad_norm": 6.503423092123739, "learning_rate": 4.402859926191273e-06, "loss": 0.095745849609375, "step": 79825 }, { "epoch": 0.6902664049597496, "grad_norm": 5.645780681809807, "learning_rate": 4.402679837174164e-06, "loss": 0.346435546875, "step": 79830 }, { "epoch": 0.6903096384812929, "grad_norm": 0.19740808239049035, "learning_rate": 4.402499741688021e-06, "loss": 0.4401580810546875, "step": 79835 }, { "epoch": 0.6903528720028361, "grad_norm": 5.181241618621504, "learning_rate": 4.402319639733673e-06, "loss": 0.18500022888183593, "step": 79840 }, { "epoch": 0.6903961055243794, "grad_norm": 12.179173322011835, "learning_rate": 4.4021395313119505e-06, "loss": 0.3654212951660156, "step": 79845 }, { "epoch": 0.6904393390459227, "grad_norm": 10.040509283814655, "learning_rate": 4.401959416423685e-06, "loss": 0.16338653564453126, "step": 79850 }, { "epoch": 0.6904825725674659, "grad_norm": 4.44416842910343, "learning_rate": 4.401779295069706e-06, "loss": 0.027556991577148436, "step": 79855 }, { "epoch": 0.6905258060890092, "grad_norm": 3.755601973672619, "learning_rate": 4.401599167250844e-06, "loss": 0.07494583129882812, "step": 79860 }, { "epoch": 0.6905690396105525, "grad_norm": 1.3763254878318016, "learning_rate": 4.40141903296793e-06, "loss": 0.15307540893554689, "step": 79865 }, { "epoch": 0.6906122731320957, "grad_norm": 3.6575056805739465, "learning_rate": 4.401238892221798e-06, "loss": 0.055831527709960936, "step": 79870 }, { "epoch": 0.690655506653639, "grad_norm": 0.19971325725357236, "learning_rate": 4.4010587450132745e-06, "loss": 0.1817943572998047, "step": 79875 }, { "epoch": 0.6906987401751822, "grad_norm": 2.787050282501064, "learning_rate": 4.400878591343193e-06, "loss": 0.08720703125, "step": 79880 }, { "epoch": 0.6907419736967255, "grad_norm": 17.428996269218647, "learning_rate": 4.400698431212381e-06, "loss": 0.13957061767578124, "step": 79885 }, { "epoch": 0.6907852072182687, "grad_norm": 1.8021013411971623, "learning_rate": 4.400518264621672e-06, "loss": 0.31690444946289065, "step": 79890 }, { "epoch": 0.690828440739812, "grad_norm": 22.70574238725061, "learning_rate": 4.400338091571897e-06, "loss": 0.2010162353515625, "step": 79895 }, { "epoch": 0.6908716742613553, "grad_norm": 24.412766254948597, "learning_rate": 4.400157912063887e-06, "loss": 0.18383560180664063, "step": 79900 }, { "epoch": 0.6909149077828985, "grad_norm": 9.010654077587853, "learning_rate": 4.399977726098472e-06, "loss": 0.12534713745117188, "step": 79905 }, { "epoch": 0.6909581413044418, "grad_norm": 21.022808424739154, "learning_rate": 4.399797533676485e-06, "loss": 0.1215118408203125, "step": 79910 }, { "epoch": 0.6910013748259851, "grad_norm": 3.430604192472896, "learning_rate": 4.399617334798753e-06, "loss": 0.154443359375, "step": 79915 }, { "epoch": 0.6910446083475283, "grad_norm": 12.724488586673676, "learning_rate": 4.3994371294661105e-06, "loss": 0.07326812744140625, "step": 79920 }, { "epoch": 0.6910878418690716, "grad_norm": 2.7868404443937513, "learning_rate": 4.399256917679387e-06, "loss": 0.03507080078125, "step": 79925 }, { "epoch": 0.6911310753906149, "grad_norm": 46.12314584769832, "learning_rate": 4.399076699439415e-06, "loss": 0.47356491088867186, "step": 79930 }, { "epoch": 0.6911743089121581, "grad_norm": 3.7113476018916898, "learning_rate": 4.398896474747025e-06, "loss": 0.05295486450195312, "step": 79935 }, { "epoch": 0.6912175424337014, "grad_norm": 3.011459922833166, "learning_rate": 4.398716243603048e-06, "loss": 0.0562957763671875, "step": 79940 }, { "epoch": 0.6912607759552447, "grad_norm": 0.21992563332425713, "learning_rate": 4.398536006008315e-06, "loss": 0.021150970458984376, "step": 79945 }, { "epoch": 0.6913040094767879, "grad_norm": 4.767046634559379, "learning_rate": 4.3983557619636576e-06, "loss": 0.07113265991210938, "step": 79950 }, { "epoch": 0.6913472429983312, "grad_norm": 1.390923367704698, "learning_rate": 4.398175511469907e-06, "loss": 0.29359588623046873, "step": 79955 }, { "epoch": 0.6913904765198744, "grad_norm": 0.2294023851887735, "learning_rate": 4.397995254527895e-06, "loss": 0.1956512451171875, "step": 79960 }, { "epoch": 0.6914337100414177, "grad_norm": 2.214595964495952, "learning_rate": 4.397814991138451e-06, "loss": 0.04943008422851562, "step": 79965 }, { "epoch": 0.691476943562961, "grad_norm": 8.173192920145421, "learning_rate": 4.397634721302409e-06, "loss": 0.128179931640625, "step": 79970 }, { "epoch": 0.6915201770845042, "grad_norm": 0.9046369624085191, "learning_rate": 4.397454445020599e-06, "loss": 0.06264190673828125, "step": 79975 }, { "epoch": 0.6915634106060475, "grad_norm": 1.0965822241292957, "learning_rate": 4.397274162293851e-06, "loss": 0.099200439453125, "step": 79980 }, { "epoch": 0.6916066441275908, "grad_norm": 15.104649720397227, "learning_rate": 4.397093873123e-06, "loss": 0.2029857635498047, "step": 79985 }, { "epoch": 0.691649877649134, "grad_norm": 25.442401949729504, "learning_rate": 4.396913577508875e-06, "loss": 0.17149658203125, "step": 79990 }, { "epoch": 0.6916931111706773, "grad_norm": 12.463555668490372, "learning_rate": 4.396733275452308e-06, "loss": 0.1973703384399414, "step": 79995 }, { "epoch": 0.6917363446922206, "grad_norm": 14.550485133962635, "learning_rate": 4.396552966954131e-06, "loss": 0.097686767578125, "step": 80000 }, { "epoch": 0.6917363446922206, "eval_loss": 0.09788688272237778, "eval_margin": 0.15175552666187286, "eval_mean_neg": 0.00430450402200222, "eval_mean_pos": 0.7209447622299194, "eval_runtime": 19.5877, "eval_samples_per_second": 11.793, "eval_steps_per_second": 5.922, "step": 80000 }, { "epoch": 0.6917795782137638, "grad_norm": 25.760090669140787, "learning_rate": 4.396372652015174e-06, "loss": 0.095318603515625, "step": 80005 }, { "epoch": 0.6918228117353071, "grad_norm": 3.0764886446099236, "learning_rate": 4.39619233063627e-06, "loss": 0.15178985595703126, "step": 80010 }, { "epoch": 0.6918660452568504, "grad_norm": 0.16715373777772355, "learning_rate": 4.396012002818251e-06, "loss": 0.3785667419433594, "step": 80015 }, { "epoch": 0.6919092787783936, "grad_norm": 1.3927232971037478, "learning_rate": 4.395831668561948e-06, "loss": 0.061322021484375, "step": 80020 }, { "epoch": 0.6919525122999369, "grad_norm": 26.049206207038015, "learning_rate": 4.395651327868193e-06, "loss": 0.21397705078125, "step": 80025 }, { "epoch": 0.6919957458214802, "grad_norm": 17.662324038735093, "learning_rate": 4.3954709807378155e-06, "loss": 0.11591644287109375, "step": 80030 }, { "epoch": 0.6920389793430234, "grad_norm": 4.639139091359825, "learning_rate": 4.3952906271716504e-06, "loss": 0.13803596496582032, "step": 80035 }, { "epoch": 0.6920822128645666, "grad_norm": 2.459954521082901, "learning_rate": 4.395110267170529e-06, "loss": 0.0544830322265625, "step": 80040 }, { "epoch": 0.69212544638611, "grad_norm": 13.390362418066601, "learning_rate": 4.394929900735282e-06, "loss": 0.328363037109375, "step": 80045 }, { "epoch": 0.6921686799076532, "grad_norm": 1.5589521082134012, "learning_rate": 4.39474952786674e-06, "loss": 0.11138477325439453, "step": 80050 }, { "epoch": 0.6922119134291964, "grad_norm": 8.653971396474207, "learning_rate": 4.394569148565738e-06, "loss": 0.04267120361328125, "step": 80055 }, { "epoch": 0.6922551469507398, "grad_norm": 5.39554548617643, "learning_rate": 4.3943887628331055e-06, "loss": 0.10698394775390625, "step": 80060 }, { "epoch": 0.692298380472283, "grad_norm": 11.104895739140508, "learning_rate": 4.394208370669676e-06, "loss": 0.06566390991210938, "step": 80065 }, { "epoch": 0.6923416139938262, "grad_norm": 10.129176595203933, "learning_rate": 4.39402797207628e-06, "loss": 0.11710433959960938, "step": 80070 }, { "epoch": 0.6923848475153696, "grad_norm": 22.43882807208922, "learning_rate": 4.393847567053751e-06, "loss": 0.49037628173828124, "step": 80075 }, { "epoch": 0.6924280810369128, "grad_norm": 2.630873865879285, "learning_rate": 4.393667155602919e-06, "loss": 0.28253936767578125, "step": 80080 }, { "epoch": 0.692471314558456, "grad_norm": 6.804446795175428, "learning_rate": 4.393486737724617e-06, "loss": 0.19211177825927733, "step": 80085 }, { "epoch": 0.6925145480799993, "grad_norm": 4.528375451489793, "learning_rate": 4.393306313419678e-06, "loss": 0.16987342834472657, "step": 80090 }, { "epoch": 0.6925577816015426, "grad_norm": 4.620207663760422, "learning_rate": 4.393125882688934e-06, "loss": 0.06054344177246094, "step": 80095 }, { "epoch": 0.6926010151230858, "grad_norm": 3.600278827210535, "learning_rate": 4.392945445533217e-06, "loss": 0.2383392333984375, "step": 80100 }, { "epoch": 0.6926442486446291, "grad_norm": 1.413650042069444, "learning_rate": 4.392765001953357e-06, "loss": 0.05276336669921875, "step": 80105 }, { "epoch": 0.6926874821661724, "grad_norm": 27.620134035130565, "learning_rate": 4.3925845519501885e-06, "loss": 0.10845375061035156, "step": 80110 }, { "epoch": 0.6927307156877156, "grad_norm": 1.8326405629579094, "learning_rate": 4.392404095524544e-06, "loss": 0.07599945068359375, "step": 80115 }, { "epoch": 0.6927739492092589, "grad_norm": 33.11264033697161, "learning_rate": 4.392223632677253e-06, "loss": 0.3251045227050781, "step": 80120 }, { "epoch": 0.6928171827308022, "grad_norm": 0.9961429193095677, "learning_rate": 4.392043163409151e-06, "loss": 0.1105316162109375, "step": 80125 }, { "epoch": 0.6928604162523454, "grad_norm": 5.545491718467361, "learning_rate": 4.391862687721069e-06, "loss": 0.09194183349609375, "step": 80130 }, { "epoch": 0.6929036497738886, "grad_norm": 2.403003961208082, "learning_rate": 4.391682205613839e-06, "loss": 0.023264694213867187, "step": 80135 }, { "epoch": 0.692946883295432, "grad_norm": 25.129269603728854, "learning_rate": 4.3915017170882934e-06, "loss": 0.238800048828125, "step": 80140 }, { "epoch": 0.6929901168169752, "grad_norm": 33.004240340451666, "learning_rate": 4.391321222145264e-06, "loss": 0.13949432373046874, "step": 80145 }, { "epoch": 0.6930333503385184, "grad_norm": 29.273824139085242, "learning_rate": 4.3911407207855855e-06, "loss": 0.1360565185546875, "step": 80150 }, { "epoch": 0.6930765838600618, "grad_norm": 6.094521821467758, "learning_rate": 4.390960213010088e-06, "loss": 0.0562469482421875, "step": 80155 }, { "epoch": 0.693119817381605, "grad_norm": 44.85052402283103, "learning_rate": 4.390779698819606e-06, "loss": 0.288079833984375, "step": 80160 }, { "epoch": 0.6931630509031482, "grad_norm": 0.16259890208400007, "learning_rate": 4.390599178214971e-06, "loss": 0.04585189819335937, "step": 80165 }, { "epoch": 0.6932062844246916, "grad_norm": 11.961530588858105, "learning_rate": 4.3904186511970134e-06, "loss": 0.24109268188476562, "step": 80170 }, { "epoch": 0.6932495179462348, "grad_norm": 20.87228002644118, "learning_rate": 4.390238117766569e-06, "loss": 0.10719127655029297, "step": 80175 }, { "epoch": 0.693292751467778, "grad_norm": 0.9947513644268949, "learning_rate": 4.390057577924471e-06, "loss": 0.1978144645690918, "step": 80180 }, { "epoch": 0.6933359849893214, "grad_norm": 10.549458778524238, "learning_rate": 4.3898770316715475e-06, "loss": 0.0593994140625, "step": 80185 }, { "epoch": 0.6933792185108646, "grad_norm": 24.63921089600646, "learning_rate": 4.389696479008636e-06, "loss": 0.19571151733398437, "step": 80190 }, { "epoch": 0.6934224520324078, "grad_norm": 1.6946343149811047, "learning_rate": 4.389515919936566e-06, "loss": 0.08400421142578125, "step": 80195 }, { "epoch": 0.6934656855539512, "grad_norm": 4.983989856242595, "learning_rate": 4.389335354456171e-06, "loss": 0.2799407958984375, "step": 80200 }, { "epoch": 0.6935089190754944, "grad_norm": 0.5544813569551624, "learning_rate": 4.3891547825682845e-06, "loss": 0.21232147216796876, "step": 80205 }, { "epoch": 0.6935521525970376, "grad_norm": 0.2873587934257675, "learning_rate": 4.38897420427374e-06, "loss": 0.2840259552001953, "step": 80210 }, { "epoch": 0.6935953861185808, "grad_norm": 18.445331555873118, "learning_rate": 4.388793619573368e-06, "loss": 0.12227630615234375, "step": 80215 }, { "epoch": 0.6936386196401242, "grad_norm": 1.6961104012214012, "learning_rate": 4.388613028468003e-06, "loss": 0.0539581298828125, "step": 80220 }, { "epoch": 0.6936818531616674, "grad_norm": 10.613509135031103, "learning_rate": 4.3884324309584765e-06, "loss": 0.40103759765625, "step": 80225 }, { "epoch": 0.6937250866832106, "grad_norm": 17.1168062550291, "learning_rate": 4.3882518270456236e-06, "loss": 0.10294342041015625, "step": 80230 }, { "epoch": 0.693768320204754, "grad_norm": 2.263948501652716, "learning_rate": 4.3880712167302755e-06, "loss": 0.04701938629150391, "step": 80235 }, { "epoch": 0.6938115537262972, "grad_norm": 5.603180645746583, "learning_rate": 4.387890600013265e-06, "loss": 0.17346572875976562, "step": 80240 }, { "epoch": 0.6938547872478404, "grad_norm": 2.2692266173291706, "learning_rate": 4.387709976895427e-06, "loss": 0.127435302734375, "step": 80245 }, { "epoch": 0.6938980207693838, "grad_norm": 1.8440848095820186, "learning_rate": 4.387529347377592e-06, "loss": 0.023904037475585938, "step": 80250 }, { "epoch": 0.693941254290927, "grad_norm": 8.557688607263662, "learning_rate": 4.387348711460594e-06, "loss": 0.36151123046875, "step": 80255 }, { "epoch": 0.6939844878124702, "grad_norm": 16.384837403964, "learning_rate": 4.387168069145268e-06, "loss": 0.08166351318359374, "step": 80260 }, { "epoch": 0.6940277213340136, "grad_norm": 44.98766947044041, "learning_rate": 4.386987420432444e-06, "loss": 0.45255126953125, "step": 80265 }, { "epoch": 0.6940709548555568, "grad_norm": 0.6695050194386268, "learning_rate": 4.3868067653229575e-06, "loss": 0.20630645751953125, "step": 80270 }, { "epoch": 0.6941141883771, "grad_norm": 34.18019664087713, "learning_rate": 4.38662610381764e-06, "loss": 0.2651641845703125, "step": 80275 }, { "epoch": 0.6941574218986434, "grad_norm": 0.7034686915493867, "learning_rate": 4.3864454359173255e-06, "loss": 0.0471099853515625, "step": 80280 }, { "epoch": 0.6942006554201866, "grad_norm": 11.234498918164908, "learning_rate": 4.386264761622847e-06, "loss": 0.04134445190429688, "step": 80285 }, { "epoch": 0.6942438889417298, "grad_norm": 0.736859249009056, "learning_rate": 4.386084080935039e-06, "loss": 0.22433061599731446, "step": 80290 }, { "epoch": 0.6942871224632732, "grad_norm": 8.691737271489384, "learning_rate": 4.385903393854732e-06, "loss": 0.10727691650390625, "step": 80295 }, { "epoch": 0.6943303559848164, "grad_norm": 23.85305523710238, "learning_rate": 4.385722700382763e-06, "loss": 0.2954254150390625, "step": 80300 }, { "epoch": 0.6943735895063596, "grad_norm": 8.652056771047542, "learning_rate": 4.385542000519962e-06, "loss": 0.02991619110107422, "step": 80305 }, { "epoch": 0.6944168230279029, "grad_norm": 9.837649003323502, "learning_rate": 4.385361294267164e-06, "loss": 0.0426513671875, "step": 80310 }, { "epoch": 0.6944600565494462, "grad_norm": 3.2459609075501903, "learning_rate": 4.385180581625202e-06, "loss": 0.057930374145507814, "step": 80315 }, { "epoch": 0.6945032900709894, "grad_norm": 1.133859283676475, "learning_rate": 4.38499986259491e-06, "loss": 0.13835983276367186, "step": 80320 }, { "epoch": 0.6945465235925327, "grad_norm": 31.173453989739134, "learning_rate": 4.384819137177121e-06, "loss": 0.15821914672851561, "step": 80325 }, { "epoch": 0.694589757114076, "grad_norm": 10.546112764427347, "learning_rate": 4.384638405372668e-06, "loss": 0.08037261962890625, "step": 80330 }, { "epoch": 0.6946329906356192, "grad_norm": 13.180733830055004, "learning_rate": 4.384457667182386e-06, "loss": 0.25207061767578126, "step": 80335 }, { "epoch": 0.6946762241571625, "grad_norm": 14.797596801876274, "learning_rate": 4.384276922607106e-06, "loss": 0.26521453857421873, "step": 80340 }, { "epoch": 0.6947194576787058, "grad_norm": 0.7751120504684359, "learning_rate": 4.384096171647664e-06, "loss": 0.22791290283203125, "step": 80345 }, { "epoch": 0.694762691200249, "grad_norm": 32.22647995166355, "learning_rate": 4.383915414304893e-06, "loss": 0.26539955139160154, "step": 80350 }, { "epoch": 0.6948059247217923, "grad_norm": 0.18084359319126508, "learning_rate": 4.383734650579626e-06, "loss": 0.09528007507324218, "step": 80355 }, { "epoch": 0.6948491582433356, "grad_norm": 36.62871072054293, "learning_rate": 4.383553880472697e-06, "loss": 0.4185394287109375, "step": 80360 }, { "epoch": 0.6948923917648788, "grad_norm": 4.244067887120449, "learning_rate": 4.383373103984941e-06, "loss": 0.13158416748046875, "step": 80365 }, { "epoch": 0.694935625286422, "grad_norm": 4.116341713127551, "learning_rate": 4.383192321117188e-06, "loss": 0.08471412658691406, "step": 80370 }, { "epoch": 0.6949788588079654, "grad_norm": 40.940393492023134, "learning_rate": 4.383011531870275e-06, "loss": 0.5745414733886719, "step": 80375 }, { "epoch": 0.6950220923295086, "grad_norm": 0.4792680564132186, "learning_rate": 4.382830736245037e-06, "loss": 0.18038482666015626, "step": 80380 }, { "epoch": 0.6950653258510519, "grad_norm": 20.137720975520594, "learning_rate": 4.382649934242305e-06, "loss": 0.13983497619628907, "step": 80385 }, { "epoch": 0.6951085593725951, "grad_norm": 34.71491064254816, "learning_rate": 4.382469125862913e-06, "loss": 0.1213592529296875, "step": 80390 }, { "epoch": 0.6951517928941384, "grad_norm": 15.435878443905926, "learning_rate": 4.382288311107696e-06, "loss": 0.06557083129882812, "step": 80395 }, { "epoch": 0.6951950264156816, "grad_norm": 5.722105618917314, "learning_rate": 4.382107489977486e-06, "loss": 0.04581184387207031, "step": 80400 }, { "epoch": 0.6952382599372249, "grad_norm": 3.649368202847393, "learning_rate": 4.38192666247312e-06, "loss": 0.14638671875, "step": 80405 }, { "epoch": 0.6952814934587682, "grad_norm": 10.937375630764594, "learning_rate": 4.38174582859543e-06, "loss": 0.32974395751953123, "step": 80410 }, { "epoch": 0.6953247269803114, "grad_norm": 7.758973461641776, "learning_rate": 4.38156498834525e-06, "loss": 0.3136383056640625, "step": 80415 }, { "epoch": 0.6953679605018547, "grad_norm": 60.35106355776779, "learning_rate": 4.381384141723415e-06, "loss": 0.3012584686279297, "step": 80420 }, { "epoch": 0.695411194023398, "grad_norm": 2.8220821068877187, "learning_rate": 4.381203288730757e-06, "loss": 0.109295654296875, "step": 80425 }, { "epoch": 0.6954544275449412, "grad_norm": 0.734419089230378, "learning_rate": 4.381022429368113e-06, "loss": 0.10237312316894531, "step": 80430 }, { "epoch": 0.6954976610664845, "grad_norm": 4.890514732869459, "learning_rate": 4.3808415636363154e-06, "loss": 0.2519378662109375, "step": 80435 }, { "epoch": 0.6955408945880278, "grad_norm": 17.391955206130902, "learning_rate": 4.380660691536198e-06, "loss": 0.07033023834228516, "step": 80440 }, { "epoch": 0.695584128109571, "grad_norm": 4.743796951654303, "learning_rate": 4.3804798130685954e-06, "loss": 0.12616729736328125, "step": 80445 }, { "epoch": 0.6956273616311143, "grad_norm": 1.1523602055880706, "learning_rate": 4.3802989282343416e-06, "loss": 0.0281005859375, "step": 80450 }, { "epoch": 0.6956705951526576, "grad_norm": 18.580215613672802, "learning_rate": 4.3801180370342725e-06, "loss": 0.176507568359375, "step": 80455 }, { "epoch": 0.6957138286742008, "grad_norm": 4.497685287501815, "learning_rate": 4.379937139469219e-06, "loss": 0.18629417419433594, "step": 80460 }, { "epoch": 0.6957570621957441, "grad_norm": 0.7576052550483554, "learning_rate": 4.37975623554002e-06, "loss": 0.0635589599609375, "step": 80465 }, { "epoch": 0.6958002957172874, "grad_norm": 1.373483123486058, "learning_rate": 4.3795753252475044e-06, "loss": 0.28695831298828123, "step": 80470 }, { "epoch": 0.6958435292388306, "grad_norm": 2.947729339288054, "learning_rate": 4.379394408592511e-06, "loss": 0.16945886611938477, "step": 80475 }, { "epoch": 0.6958867627603739, "grad_norm": 1.2030443417708068, "learning_rate": 4.379213485575872e-06, "loss": 0.04716682434082031, "step": 80480 }, { "epoch": 0.6959299962819171, "grad_norm": 15.690933755895387, "learning_rate": 4.379032556198422e-06, "loss": 0.20386028289794922, "step": 80485 }, { "epoch": 0.6959732298034604, "grad_norm": 3.6226061575220205, "learning_rate": 4.378851620460997e-06, "loss": 0.04309234619140625, "step": 80490 }, { "epoch": 0.6960164633250037, "grad_norm": 1.6180397895047707, "learning_rate": 4.378670678364429e-06, "loss": 0.02412071228027344, "step": 80495 }, { "epoch": 0.6960596968465469, "grad_norm": 5.255315244692986, "learning_rate": 4.378489729909553e-06, "loss": 0.041996002197265625, "step": 80500 }, { "epoch": 0.6961029303680902, "grad_norm": 2.5789983698460843, "learning_rate": 4.378308775097206e-06, "loss": 0.03975410461425781, "step": 80505 }, { "epoch": 0.6961461638896335, "grad_norm": 76.31603155383385, "learning_rate": 4.378127813928219e-06, "loss": 0.5489532470703125, "step": 80510 }, { "epoch": 0.6961893974111767, "grad_norm": 0.9403620393938659, "learning_rate": 4.377946846403429e-06, "loss": 0.100714111328125, "step": 80515 }, { "epoch": 0.69623263093272, "grad_norm": 27.127159571446963, "learning_rate": 4.377765872523669e-06, "loss": 0.088140869140625, "step": 80520 }, { "epoch": 0.6962758644542633, "grad_norm": 0.6878605960718276, "learning_rate": 4.377584892289776e-06, "loss": 0.11584033966064453, "step": 80525 }, { "epoch": 0.6963190979758065, "grad_norm": 4.855682233210517, "learning_rate": 4.377403905702583e-06, "loss": 0.12762832641601562, "step": 80530 }, { "epoch": 0.6963623314973498, "grad_norm": 3.8351545781643446, "learning_rate": 4.377222912762925e-06, "loss": 0.08481597900390625, "step": 80535 }, { "epoch": 0.6964055650188931, "grad_norm": 1.696723631851398, "learning_rate": 4.377041913471635e-06, "loss": 0.3401275634765625, "step": 80540 }, { "epoch": 0.6964487985404363, "grad_norm": 3.450870310662008, "learning_rate": 4.376860907829551e-06, "loss": 0.408367919921875, "step": 80545 }, { "epoch": 0.6964920320619796, "grad_norm": 1.2372083880600437, "learning_rate": 4.376679895837505e-06, "loss": 0.016944122314453126, "step": 80550 }, { "epoch": 0.6965352655835229, "grad_norm": 5.6413913032324325, "learning_rate": 4.376498877496335e-06, "loss": 0.1143218994140625, "step": 80555 }, { "epoch": 0.6965784991050661, "grad_norm": 1.152905986834382, "learning_rate": 4.376317852806872e-06, "loss": 0.07795791625976563, "step": 80560 }, { "epoch": 0.6966217326266093, "grad_norm": 0.19399237467106065, "learning_rate": 4.376136821769953e-06, "loss": 0.2796722412109375, "step": 80565 }, { "epoch": 0.6966649661481527, "grad_norm": 2.429335837084522, "learning_rate": 4.375955784386412e-06, "loss": 0.26481475830078127, "step": 80570 }, { "epoch": 0.6967081996696959, "grad_norm": 0.3843220643476471, "learning_rate": 4.375774740657086e-06, "loss": 0.2957038879394531, "step": 80575 }, { "epoch": 0.6967514331912391, "grad_norm": 3.1803804759277807, "learning_rate": 4.375593690582808e-06, "loss": 0.22648468017578124, "step": 80580 }, { "epoch": 0.6967946667127825, "grad_norm": 10.976223033171651, "learning_rate": 4.375412634164413e-06, "loss": 0.1790008544921875, "step": 80585 }, { "epoch": 0.6968379002343257, "grad_norm": 19.006061320525685, "learning_rate": 4.375231571402736e-06, "loss": 0.29530029296875, "step": 80590 }, { "epoch": 0.6968811337558689, "grad_norm": 2.3881109852374927, "learning_rate": 4.375050502298613e-06, "loss": 0.045886993408203125, "step": 80595 }, { "epoch": 0.6969243672774122, "grad_norm": 2.136657861941869, "learning_rate": 4.3748694268528776e-06, "loss": 0.042776298522949216, "step": 80600 }, { "epoch": 0.6969676007989555, "grad_norm": 14.232306489708671, "learning_rate": 4.374688345066368e-06, "loss": 0.5568294525146484, "step": 80605 }, { "epoch": 0.6970108343204987, "grad_norm": 9.292403724776282, "learning_rate": 4.374507256939916e-06, "loss": 0.17546463012695312, "step": 80610 }, { "epoch": 0.697054067842042, "grad_norm": 28.603656646085938, "learning_rate": 4.374326162474358e-06, "loss": 0.11335067749023438, "step": 80615 }, { "epoch": 0.6970973013635853, "grad_norm": 0.9512720914541609, "learning_rate": 4.37414506167053e-06, "loss": 0.29546051025390624, "step": 80620 }, { "epoch": 0.6971405348851285, "grad_norm": 9.01624154004321, "learning_rate": 4.3739639545292654e-06, "loss": 0.48757286071777345, "step": 80625 }, { "epoch": 0.6971837684066718, "grad_norm": 2.460001260631571, "learning_rate": 4.3737828410514e-06, "loss": 0.12361183166503906, "step": 80630 }, { "epoch": 0.6972270019282151, "grad_norm": 7.624342308594692, "learning_rate": 4.373601721237771e-06, "loss": 0.04899606704711914, "step": 80635 }, { "epoch": 0.6972702354497583, "grad_norm": 3.982644449218571, "learning_rate": 4.373420595089212e-06, "loss": 0.24936676025390625, "step": 80640 }, { "epoch": 0.6973134689713016, "grad_norm": 0.19458949295855438, "learning_rate": 4.373239462606557e-06, "loss": 0.05046844482421875, "step": 80645 }, { "epoch": 0.6973567024928449, "grad_norm": 1.9228888739610914, "learning_rate": 4.3730583237906445e-06, "loss": 0.4867767333984375, "step": 80650 }, { "epoch": 0.6973999360143881, "grad_norm": 14.940021584452124, "learning_rate": 4.372877178642308e-06, "loss": 0.0834442138671875, "step": 80655 }, { "epoch": 0.6974431695359313, "grad_norm": 3.039095206749311, "learning_rate": 4.372696027162384e-06, "loss": 0.0608332633972168, "step": 80660 }, { "epoch": 0.6974864030574747, "grad_norm": 7.923583951861586, "learning_rate": 4.3725148693517056e-06, "loss": 0.0987274169921875, "step": 80665 }, { "epoch": 0.6975296365790179, "grad_norm": 4.151866186197445, "learning_rate": 4.372333705211111e-06, "loss": 0.042791748046875, "step": 80670 }, { "epoch": 0.6975728701005611, "grad_norm": 13.270227826546066, "learning_rate": 4.372152534741434e-06, "loss": 0.098541259765625, "step": 80675 }, { "epoch": 0.6976161036221045, "grad_norm": 6.2231805115933545, "learning_rate": 4.37197135794351e-06, "loss": 0.26015625, "step": 80680 }, { "epoch": 0.6976593371436477, "grad_norm": 9.122099291501168, "learning_rate": 4.371790174818177e-06, "loss": 0.147869873046875, "step": 80685 }, { "epoch": 0.6977025706651909, "grad_norm": 31.182322190852307, "learning_rate": 4.3716089853662686e-06, "loss": 0.4598186492919922, "step": 80690 }, { "epoch": 0.6977458041867343, "grad_norm": 20.498531485592018, "learning_rate": 4.37142778958862e-06, "loss": 0.15393142700195311, "step": 80695 }, { "epoch": 0.6977890377082775, "grad_norm": 10.85046501291198, "learning_rate": 4.3712465874860685e-06, "loss": 0.43834228515625, "step": 80700 }, { "epoch": 0.6978322712298207, "grad_norm": 8.568204019797529, "learning_rate": 4.3710653790594476e-06, "loss": 0.17032012939453126, "step": 80705 }, { "epoch": 0.6978755047513641, "grad_norm": 5.575185403716382, "learning_rate": 4.370884164309595e-06, "loss": 0.12352886199951171, "step": 80710 }, { "epoch": 0.6979187382729073, "grad_norm": 27.52967982225393, "learning_rate": 4.370702943237347e-06, "loss": 0.140411376953125, "step": 80715 }, { "epoch": 0.6979619717944505, "grad_norm": 33.069882894913405, "learning_rate": 4.370521715843537e-06, "loss": 0.19788284301757814, "step": 80720 }, { "epoch": 0.6980052053159939, "grad_norm": 0.43234933602564646, "learning_rate": 4.370340482129001e-06, "loss": 0.18298759460449218, "step": 80725 }, { "epoch": 0.6980484388375371, "grad_norm": 16.180232134506603, "learning_rate": 4.3701592420945774e-06, "loss": 0.11689796447753906, "step": 80730 }, { "epoch": 0.6980916723590803, "grad_norm": 10.736346917270811, "learning_rate": 4.3699779957411e-06, "loss": 0.27422027587890624, "step": 80735 }, { "epoch": 0.6981349058806235, "grad_norm": 10.547758696235416, "learning_rate": 4.369796743069405e-06, "loss": 0.16002197265625, "step": 80740 }, { "epoch": 0.6981781394021669, "grad_norm": 10.430640981657856, "learning_rate": 4.369615484080329e-06, "loss": 0.15510406494140624, "step": 80745 }, { "epoch": 0.6982213729237101, "grad_norm": 12.385139898738545, "learning_rate": 4.369434218774707e-06, "loss": 0.08232002258300782, "step": 80750 }, { "epoch": 0.6982646064452533, "grad_norm": 4.722575632615744, "learning_rate": 4.369252947153375e-06, "loss": 0.18407669067382812, "step": 80755 }, { "epoch": 0.6983078399667967, "grad_norm": 1.7492990943353908, "learning_rate": 4.36907166921717e-06, "loss": 0.13588447570800782, "step": 80760 }, { "epoch": 0.6983510734883399, "grad_norm": 7.84975297818509, "learning_rate": 4.368890384966926e-06, "loss": 0.1366445541381836, "step": 80765 }, { "epoch": 0.6983943070098831, "grad_norm": 21.73094259276337, "learning_rate": 4.368709094403482e-06, "loss": 0.15058612823486328, "step": 80770 }, { "epoch": 0.6984375405314265, "grad_norm": 1.356046853777832, "learning_rate": 4.368527797527672e-06, "loss": 0.07675628662109375, "step": 80775 }, { "epoch": 0.6984807740529697, "grad_norm": 1.1002981546158543, "learning_rate": 4.368346494340333e-06, "loss": 0.2696014404296875, "step": 80780 }, { "epoch": 0.6985240075745129, "grad_norm": 0.03737263523281962, "learning_rate": 4.368165184842301e-06, "loss": 0.05733680725097656, "step": 80785 }, { "epoch": 0.6985672410960563, "grad_norm": 5.753388958281246, "learning_rate": 4.367983869034412e-06, "loss": 0.076605224609375, "step": 80790 }, { "epoch": 0.6986104746175995, "grad_norm": 25.461519493938987, "learning_rate": 4.3678025469175015e-06, "loss": 0.20672607421875, "step": 80795 }, { "epoch": 0.6986537081391427, "grad_norm": 0.3712087841599953, "learning_rate": 4.367621218492407e-06, "loss": 0.161541748046875, "step": 80800 }, { "epoch": 0.6986969416606861, "grad_norm": 25.306154271472803, "learning_rate": 4.3674398837599646e-06, "loss": 0.1252288818359375, "step": 80805 }, { "epoch": 0.6987401751822293, "grad_norm": 3.049164840330242, "learning_rate": 4.36725854272101e-06, "loss": 0.09427032470703126, "step": 80810 }, { "epoch": 0.6987834087037725, "grad_norm": 37.65822085324358, "learning_rate": 4.36707719537638e-06, "loss": 0.3368133544921875, "step": 80815 }, { "epoch": 0.6988266422253159, "grad_norm": 4.886164003587345, "learning_rate": 4.366895841726911e-06, "loss": 0.1961334228515625, "step": 80820 }, { "epoch": 0.6988698757468591, "grad_norm": 16.686249021353007, "learning_rate": 4.366714481773439e-06, "loss": 0.20936851501464843, "step": 80825 }, { "epoch": 0.6989131092684023, "grad_norm": 16.05640277470129, "learning_rate": 4.366533115516801e-06, "loss": 0.08395805358886718, "step": 80830 }, { "epoch": 0.6989563427899456, "grad_norm": 0.5519327892149667, "learning_rate": 4.3663517429578315e-06, "loss": 0.22354202270507811, "step": 80835 }, { "epoch": 0.6989995763114889, "grad_norm": 42.287360830190735, "learning_rate": 4.366170364097369e-06, "loss": 0.5849200248718261, "step": 80840 }, { "epoch": 0.6990428098330321, "grad_norm": 10.331881822542563, "learning_rate": 4.365988978936251e-06, "loss": 0.07455368041992187, "step": 80845 }, { "epoch": 0.6990860433545754, "grad_norm": 17.6559930919158, "learning_rate": 4.365807587475311e-06, "loss": 0.13377685546875, "step": 80850 }, { "epoch": 0.6991292768761187, "grad_norm": 23.06250604872629, "learning_rate": 4.365626189715387e-06, "loss": 0.2459186553955078, "step": 80855 }, { "epoch": 0.6991725103976619, "grad_norm": 0.5290233175860047, "learning_rate": 4.365444785657317e-06, "loss": 0.38845367431640626, "step": 80860 }, { "epoch": 0.6992157439192052, "grad_norm": 2.900806693202586, "learning_rate": 4.365263375301936e-06, "loss": 0.07702808380126953, "step": 80865 }, { "epoch": 0.6992589774407485, "grad_norm": 12.077358258588344, "learning_rate": 4.36508195865008e-06, "loss": 0.20599365234375, "step": 80870 }, { "epoch": 0.6993022109622917, "grad_norm": 7.959377790790982, "learning_rate": 4.364900535702587e-06, "loss": 0.111175537109375, "step": 80875 }, { "epoch": 0.699345444483835, "grad_norm": 1.09331638779252, "learning_rate": 4.364719106460293e-06, "loss": 0.416864013671875, "step": 80880 }, { "epoch": 0.6993886780053783, "grad_norm": 2.401733648077829, "learning_rate": 4.364537670924036e-06, "loss": 0.096429443359375, "step": 80885 }, { "epoch": 0.6994319115269215, "grad_norm": 5.383140405542601, "learning_rate": 4.3643562290946505e-06, "loss": 0.13349227905273436, "step": 80890 }, { "epoch": 0.6994751450484648, "grad_norm": 31.079211172114835, "learning_rate": 4.364174780972977e-06, "loss": 0.17025012969970704, "step": 80895 }, { "epoch": 0.6995183785700081, "grad_norm": 17.809092541190076, "learning_rate": 4.363993326559848e-06, "loss": 0.164703369140625, "step": 80900 }, { "epoch": 0.6995616120915513, "grad_norm": 6.488837306800001, "learning_rate": 4.363811865856102e-06, "loss": 0.17033767700195312, "step": 80905 }, { "epoch": 0.6996048456130946, "grad_norm": 12.195657082123974, "learning_rate": 4.363630398862577e-06, "loss": 0.29298248291015627, "step": 80910 }, { "epoch": 0.6996480791346378, "grad_norm": 0.16109740039930676, "learning_rate": 4.363448925580109e-06, "loss": 0.20536346435546876, "step": 80915 }, { "epoch": 0.6996913126561811, "grad_norm": 15.076502261548725, "learning_rate": 4.363267446009535e-06, "loss": 0.107421875, "step": 80920 }, { "epoch": 0.6997345461777243, "grad_norm": 3.163652876193587, "learning_rate": 4.363085960151692e-06, "loss": 0.052394866943359375, "step": 80925 }, { "epoch": 0.6997777796992676, "grad_norm": 9.475792961428327, "learning_rate": 4.362904468007417e-06, "loss": 0.15839080810546874, "step": 80930 }, { "epoch": 0.6998210132208109, "grad_norm": 9.768928299282335, "learning_rate": 4.362722969577546e-06, "loss": 0.33429107666015623, "step": 80935 }, { "epoch": 0.6998642467423541, "grad_norm": 1.262629922795859, "learning_rate": 4.362541464862919e-06, "loss": 0.256024169921875, "step": 80940 }, { "epoch": 0.6999074802638974, "grad_norm": 22.30460753352465, "learning_rate": 4.362359953864371e-06, "loss": 0.1268707275390625, "step": 80945 }, { "epoch": 0.6999507137854407, "grad_norm": 22.56874815644318, "learning_rate": 4.362178436582738e-06, "loss": 0.14808349609375, "step": 80950 }, { "epoch": 0.6999939473069839, "grad_norm": 1.9540584760054254, "learning_rate": 4.361996913018859e-06, "loss": 0.2290557861328125, "step": 80955 }, { "epoch": 0.7000371808285272, "grad_norm": 22.28453603607343, "learning_rate": 4.361815383173571e-06, "loss": 0.12570724487304688, "step": 80960 }, { "epoch": 0.7000804143500705, "grad_norm": 5.813721892499418, "learning_rate": 4.36163384704771e-06, "loss": 0.16096954345703124, "step": 80965 }, { "epoch": 0.7001236478716137, "grad_norm": 7.574897002073507, "learning_rate": 4.361452304642114e-06, "loss": 0.24718017578125, "step": 80970 }, { "epoch": 0.700166881393157, "grad_norm": 9.007664497787477, "learning_rate": 4.3612707559576215e-06, "loss": 0.15011768341064452, "step": 80975 }, { "epoch": 0.7002101149147003, "grad_norm": 2.205643540004327, "learning_rate": 4.3610892009950675e-06, "loss": 0.18792877197265626, "step": 80980 }, { "epoch": 0.7002533484362435, "grad_norm": 1.9643604822448757, "learning_rate": 4.36090763975529e-06, "loss": 0.205938720703125, "step": 80985 }, { "epoch": 0.7002965819577868, "grad_norm": 6.086293454031793, "learning_rate": 4.360726072239128e-06, "loss": 0.13241424560546874, "step": 80990 }, { "epoch": 0.7003398154793301, "grad_norm": 29.796484988782122, "learning_rate": 4.360544498447417e-06, "loss": 0.16619796752929689, "step": 80995 }, { "epoch": 0.7003830490008733, "grad_norm": 15.615007404557883, "learning_rate": 4.360362918380994e-06, "loss": 0.5532562255859375, "step": 81000 }, { "epoch": 0.7004262825224166, "grad_norm": 0.7191749381809232, "learning_rate": 4.360181332040698e-06, "loss": 0.19051895141601563, "step": 81005 }, { "epoch": 0.7004695160439598, "grad_norm": 33.557808346083874, "learning_rate": 4.359999739427368e-06, "loss": 0.3359893798828125, "step": 81010 }, { "epoch": 0.7005127495655031, "grad_norm": 4.97430797623683, "learning_rate": 4.359818140541836e-06, "loss": 0.16380615234375, "step": 81015 }, { "epoch": 0.7005559830870464, "grad_norm": 9.669098798519686, "learning_rate": 4.359636535384945e-06, "loss": 0.07519683837890626, "step": 81020 }, { "epoch": 0.7005992166085896, "grad_norm": 2.2300879012613035, "learning_rate": 4.35945492395753e-06, "loss": 0.046543121337890625, "step": 81025 }, { "epoch": 0.7006424501301329, "grad_norm": 3.4699038740490904, "learning_rate": 4.359273306260428e-06, "loss": 0.17551612854003906, "step": 81030 }, { "epoch": 0.7006856836516762, "grad_norm": 4.128991983091286, "learning_rate": 4.359091682294479e-06, "loss": 0.2259124755859375, "step": 81035 }, { "epoch": 0.7007289171732194, "grad_norm": 15.69462313047174, "learning_rate": 4.358910052060519e-06, "loss": 0.08816146850585938, "step": 81040 }, { "epoch": 0.7007721506947627, "grad_norm": 22.765721323169394, "learning_rate": 4.358728415559386e-06, "loss": 0.4860679626464844, "step": 81045 }, { "epoch": 0.700815384216306, "grad_norm": 3.079763243899565, "learning_rate": 4.358546772791916e-06, "loss": 0.08826904296875, "step": 81050 }, { "epoch": 0.7008586177378492, "grad_norm": 11.731962922166188, "learning_rate": 4.35836512375895e-06, "loss": 0.11154403686523437, "step": 81055 }, { "epoch": 0.7009018512593925, "grad_norm": 37.41669040513683, "learning_rate": 4.358183468461324e-06, "loss": 0.30728759765625, "step": 81060 }, { "epoch": 0.7009450847809358, "grad_norm": 0.5628474067445636, "learning_rate": 4.3580018068998755e-06, "loss": 0.057183456420898435, "step": 81065 }, { "epoch": 0.700988318302479, "grad_norm": 4.698619759209907, "learning_rate": 4.357820139075442e-06, "loss": 0.1576904296875, "step": 81070 }, { "epoch": 0.7010315518240223, "grad_norm": 0.46534829629219837, "learning_rate": 4.357638464988863e-06, "loss": 0.18508148193359375, "step": 81075 }, { "epoch": 0.7010747853455656, "grad_norm": 2.229323307392795, "learning_rate": 4.357456784640975e-06, "loss": 0.095703125, "step": 81080 }, { "epoch": 0.7011180188671088, "grad_norm": 3.5452451270752174, "learning_rate": 4.357275098032617e-06, "loss": 0.18767547607421875, "step": 81085 }, { "epoch": 0.701161252388652, "grad_norm": 3.940827570318308, "learning_rate": 4.357093405164626e-06, "loss": 0.37387723922729493, "step": 81090 }, { "epoch": 0.7012044859101954, "grad_norm": 2.6815952053115315, "learning_rate": 4.356911706037839e-06, "loss": 0.02508544921875, "step": 81095 }, { "epoch": 0.7012477194317386, "grad_norm": 3.1207687237940087, "learning_rate": 4.356730000653096e-06, "loss": 0.13058929443359374, "step": 81100 }, { "epoch": 0.7012909529532818, "grad_norm": 11.381131325283864, "learning_rate": 4.356548289011233e-06, "loss": 0.060428619384765625, "step": 81105 }, { "epoch": 0.7013341864748251, "grad_norm": 7.090643558508803, "learning_rate": 4.356366571113091e-06, "loss": 0.17806549072265626, "step": 81110 }, { "epoch": 0.7013774199963684, "grad_norm": 0.6532982861376082, "learning_rate": 4.356184846959505e-06, "loss": 0.12955322265625, "step": 81115 }, { "epoch": 0.7014206535179116, "grad_norm": 19.015048418354866, "learning_rate": 4.3560031165513144e-06, "loss": 0.4316802978515625, "step": 81120 }, { "epoch": 0.701463887039455, "grad_norm": 7.243346989050628, "learning_rate": 4.355821379889358e-06, "loss": 0.045151901245117185, "step": 81125 }, { "epoch": 0.7015071205609982, "grad_norm": 9.992287741061357, "learning_rate": 4.355639636974472e-06, "loss": 0.201910400390625, "step": 81130 }, { "epoch": 0.7015503540825414, "grad_norm": 40.5153250281022, "learning_rate": 4.355457887807496e-06, "loss": 0.1672332763671875, "step": 81135 }, { "epoch": 0.7015935876040847, "grad_norm": 14.537981693409014, "learning_rate": 4.355276132389268e-06, "loss": 0.107110595703125, "step": 81140 }, { "epoch": 0.701636821125628, "grad_norm": 1.341222302478872, "learning_rate": 4.355094370720627e-06, "loss": 0.1158447265625, "step": 81145 }, { "epoch": 0.7016800546471712, "grad_norm": 1.7220536943929432, "learning_rate": 4.354912602802411e-06, "loss": 0.12167892456054688, "step": 81150 }, { "epoch": 0.7017232881687145, "grad_norm": 29.97859399963512, "learning_rate": 4.3547308286354556e-06, "loss": 0.13347091674804687, "step": 81155 }, { "epoch": 0.7017665216902578, "grad_norm": 0.05782563440681116, "learning_rate": 4.354549048220603e-06, "loss": 0.16803817749023436, "step": 81160 }, { "epoch": 0.701809755211801, "grad_norm": 6.564987344603206, "learning_rate": 4.354367261558689e-06, "loss": 0.18741607666015625, "step": 81165 }, { "epoch": 0.7018529887333443, "grad_norm": 0.4278063902015095, "learning_rate": 4.3541854686505535e-06, "loss": 0.13802337646484375, "step": 81170 }, { "epoch": 0.7018962222548876, "grad_norm": 29.24888364471813, "learning_rate": 4.3540036694970345e-06, "loss": 0.1238311767578125, "step": 81175 }, { "epoch": 0.7019394557764308, "grad_norm": 16.03248771149967, "learning_rate": 4.35382186409897e-06, "loss": 0.07183837890625, "step": 81180 }, { "epoch": 0.701982689297974, "grad_norm": 16.707100281895762, "learning_rate": 4.353640052457199e-06, "loss": 0.0624908447265625, "step": 81185 }, { "epoch": 0.7020259228195174, "grad_norm": 34.52353305054334, "learning_rate": 4.3534582345725585e-06, "loss": 0.21660537719726564, "step": 81190 }, { "epoch": 0.7020691563410606, "grad_norm": 0.9665722864916567, "learning_rate": 4.353276410445889e-06, "loss": 0.156060791015625, "step": 81195 }, { "epoch": 0.7021123898626038, "grad_norm": 15.51079669617745, "learning_rate": 4.353094580078029e-06, "loss": 0.08783493041992188, "step": 81200 }, { "epoch": 0.7021556233841472, "grad_norm": 0.9889880283236221, "learning_rate": 4.352912743469815e-06, "loss": 0.26950225830078123, "step": 81205 }, { "epoch": 0.7021988569056904, "grad_norm": 1.391275948538319, "learning_rate": 4.352730900622088e-06, "loss": 0.205126953125, "step": 81210 }, { "epoch": 0.7022420904272336, "grad_norm": 0.7203235081613452, "learning_rate": 4.352549051535685e-06, "loss": 0.0130828857421875, "step": 81215 }, { "epoch": 0.702285323948777, "grad_norm": 1.8183168115228514, "learning_rate": 4.352367196211445e-06, "loss": 0.0290008544921875, "step": 81220 }, { "epoch": 0.7023285574703202, "grad_norm": 0.2355681638710652, "learning_rate": 4.352185334650208e-06, "loss": 0.0304443359375, "step": 81225 }, { "epoch": 0.7023717909918634, "grad_norm": 3.0746753098148627, "learning_rate": 4.352003466852811e-06, "loss": 0.14707040786743164, "step": 81230 }, { "epoch": 0.7024150245134068, "grad_norm": 17.47435668063142, "learning_rate": 4.351821592820094e-06, "loss": 0.21496505737304689, "step": 81235 }, { "epoch": 0.70245825803495, "grad_norm": 2.2750077248512417, "learning_rate": 4.351639712552895e-06, "loss": 0.11639690399169922, "step": 81240 }, { "epoch": 0.7025014915564932, "grad_norm": 3.998885153956391, "learning_rate": 4.351457826052053e-06, "loss": 0.09256591796875, "step": 81245 }, { "epoch": 0.7025447250780366, "grad_norm": 19.64930817062829, "learning_rate": 4.3512759333184065e-06, "loss": 0.11091690063476563, "step": 81250 }, { "epoch": 0.7025879585995798, "grad_norm": 16.79108409132846, "learning_rate": 4.351094034352795e-06, "loss": 0.1743438720703125, "step": 81255 }, { "epoch": 0.702631192121123, "grad_norm": 25.662653414283987, "learning_rate": 4.350912129156058e-06, "loss": 0.2465850830078125, "step": 81260 }, { "epoch": 0.7026744256426662, "grad_norm": 1.6593825589483837, "learning_rate": 4.350730217729033e-06, "loss": 0.502020263671875, "step": 81265 }, { "epoch": 0.7027176591642096, "grad_norm": 1.7549303376265366, "learning_rate": 4.350548300072559e-06, "loss": 0.05579986572265625, "step": 81270 }, { "epoch": 0.7027608926857528, "grad_norm": 9.720097751116862, "learning_rate": 4.350366376187476e-06, "loss": 0.2430694580078125, "step": 81275 }, { "epoch": 0.702804126207296, "grad_norm": 40.50323204496384, "learning_rate": 4.350184446074624e-06, "loss": 0.365570068359375, "step": 81280 }, { "epoch": 0.7028473597288394, "grad_norm": 6.450325597097926, "learning_rate": 4.350002509734839e-06, "loss": 0.05517311096191406, "step": 81285 }, { "epoch": 0.7028905932503826, "grad_norm": 2.924044812540637, "learning_rate": 4.349820567168962e-06, "loss": 0.1450164794921875, "step": 81290 }, { "epoch": 0.7029338267719258, "grad_norm": 17.303716895673723, "learning_rate": 4.349638618377832e-06, "loss": 0.309100341796875, "step": 81295 }, { "epoch": 0.7029770602934692, "grad_norm": 1.3600913708307945, "learning_rate": 4.349456663362287e-06, "loss": 0.15582275390625, "step": 81300 }, { "epoch": 0.7030202938150124, "grad_norm": 8.81685305962187, "learning_rate": 4.349274702123168e-06, "loss": 0.1109283447265625, "step": 81305 }, { "epoch": 0.7030635273365556, "grad_norm": 35.579605103500924, "learning_rate": 4.349092734661314e-06, "loss": 0.28308639526367185, "step": 81310 }, { "epoch": 0.703106760858099, "grad_norm": 6.348044295537209, "learning_rate": 4.3489107609775625e-06, "loss": 0.12373199462890624, "step": 81315 }, { "epoch": 0.7031499943796422, "grad_norm": 0.7897572235472019, "learning_rate": 4.348728781072755e-06, "loss": 0.07214508056640626, "step": 81320 }, { "epoch": 0.7031932279011854, "grad_norm": 23.80428129307253, "learning_rate": 4.3485467949477275e-06, "loss": 0.14901885986328126, "step": 81325 }, { "epoch": 0.7032364614227288, "grad_norm": 4.145821575020574, "learning_rate": 4.348364802603322e-06, "loss": 0.0330902099609375, "step": 81330 }, { "epoch": 0.703279694944272, "grad_norm": 6.510290619647555, "learning_rate": 4.348182804040378e-06, "loss": 0.06252822875976563, "step": 81335 }, { "epoch": 0.7033229284658152, "grad_norm": 0.34539599292828016, "learning_rate": 4.348000799259734e-06, "loss": 0.0571014404296875, "step": 81340 }, { "epoch": 0.7033661619873586, "grad_norm": 0.8803658377359213, "learning_rate": 4.347818788262229e-06, "loss": 0.06860389709472656, "step": 81345 }, { "epoch": 0.7034093955089018, "grad_norm": 1.4522729577185225, "learning_rate": 4.347636771048703e-06, "loss": 0.30257091522216795, "step": 81350 }, { "epoch": 0.703452629030445, "grad_norm": 12.484903501890258, "learning_rate": 4.347454747619995e-06, "loss": 0.05380058288574219, "step": 81355 }, { "epoch": 0.7034958625519883, "grad_norm": 0.9677231994963317, "learning_rate": 4.347272717976946e-06, "loss": 0.31254425048828127, "step": 81360 }, { "epoch": 0.7035390960735316, "grad_norm": 8.314512310903933, "learning_rate": 4.347090682120392e-06, "loss": 0.3976837158203125, "step": 81365 }, { "epoch": 0.7035823295950748, "grad_norm": 1.6440605284251941, "learning_rate": 4.346908640051177e-06, "loss": 0.1297393798828125, "step": 81370 }, { "epoch": 0.7036255631166181, "grad_norm": 5.620394031888474, "learning_rate": 4.3467265917701374e-06, "loss": 0.121380615234375, "step": 81375 }, { "epoch": 0.7036687966381614, "grad_norm": 24.629230720105543, "learning_rate": 4.346544537278114e-06, "loss": 0.11025390625, "step": 81380 }, { "epoch": 0.7037120301597046, "grad_norm": 5.534672975599029, "learning_rate": 4.346362476575945e-06, "loss": 0.08088932037353516, "step": 81385 }, { "epoch": 0.7037552636812479, "grad_norm": 8.900599053201049, "learning_rate": 4.346180409664472e-06, "loss": 0.13311920166015626, "step": 81390 }, { "epoch": 0.7037984972027912, "grad_norm": 17.619635075818604, "learning_rate": 4.345998336544534e-06, "loss": 0.11542015075683594, "step": 81395 }, { "epoch": 0.7038417307243344, "grad_norm": 4.38402461360357, "learning_rate": 4.345816257216972e-06, "loss": 0.04240875244140625, "step": 81400 }, { "epoch": 0.7038849642458777, "grad_norm": 3.5209536885705743, "learning_rate": 4.345634171682623e-06, "loss": 0.146527099609375, "step": 81405 }, { "epoch": 0.703928197767421, "grad_norm": 3.711332895564667, "learning_rate": 4.345452079942328e-06, "loss": 0.1319091796875, "step": 81410 }, { "epoch": 0.7039714312889642, "grad_norm": 8.439328689627715, "learning_rate": 4.345269981996926e-06, "loss": 0.076904296875, "step": 81415 }, { "epoch": 0.7040146648105075, "grad_norm": 0.5879199409767049, "learning_rate": 4.345087877847259e-06, "loss": 0.253533935546875, "step": 81420 }, { "epoch": 0.7040578983320508, "grad_norm": 2.4580389116962666, "learning_rate": 4.344905767494166e-06, "loss": 0.1085845947265625, "step": 81425 }, { "epoch": 0.704101131853594, "grad_norm": 7.725383371491828, "learning_rate": 4.3447236509384855e-06, "loss": 0.10987968444824218, "step": 81430 }, { "epoch": 0.7041443653751372, "grad_norm": 12.735579507381868, "learning_rate": 4.344541528181059e-06, "loss": 0.22726707458496093, "step": 81435 }, { "epoch": 0.7041875988966805, "grad_norm": 8.255279367652168, "learning_rate": 4.344359399222725e-06, "loss": 0.09534530639648438, "step": 81440 }, { "epoch": 0.7042308324182238, "grad_norm": 7.984310822633186, "learning_rate": 4.344177264064324e-06, "loss": 0.13851318359375, "step": 81445 }, { "epoch": 0.704274065939767, "grad_norm": 1.1983724311362631, "learning_rate": 4.3439951227066975e-06, "loss": 0.10529899597167969, "step": 81450 }, { "epoch": 0.7043172994613103, "grad_norm": 13.372067191257988, "learning_rate": 4.343812975150684e-06, "loss": 0.05160598754882813, "step": 81455 }, { "epoch": 0.7043605329828536, "grad_norm": 7.607185441754597, "learning_rate": 4.343630821397122e-06, "loss": 0.28369979858398436, "step": 81460 }, { "epoch": 0.7044037665043968, "grad_norm": 10.554721856620763, "learning_rate": 4.343448661446856e-06, "loss": 0.22459869384765624, "step": 81465 }, { "epoch": 0.7044470000259401, "grad_norm": 1.5178459272804485, "learning_rate": 4.343266495300722e-06, "loss": 0.12223129272460938, "step": 81470 }, { "epoch": 0.7044902335474834, "grad_norm": 31.090232544955782, "learning_rate": 4.343084322959562e-06, "loss": 0.1586578369140625, "step": 81475 }, { "epoch": 0.7045334670690266, "grad_norm": 26.46810606024044, "learning_rate": 4.342902144424216e-06, "loss": 0.384747314453125, "step": 81480 }, { "epoch": 0.7045767005905699, "grad_norm": 8.575159414599506, "learning_rate": 4.342719959695523e-06, "loss": 0.06458511352539062, "step": 81485 }, { "epoch": 0.7046199341121132, "grad_norm": 25.436541587165934, "learning_rate": 4.342537768774325e-06, "loss": 0.3258209228515625, "step": 81490 }, { "epoch": 0.7046631676336564, "grad_norm": 0.7936945952598288, "learning_rate": 4.342355571661461e-06, "loss": 0.1305379867553711, "step": 81495 }, { "epoch": 0.7047064011551997, "grad_norm": 40.55199613071958, "learning_rate": 4.342173368357772e-06, "loss": 0.6939727783203125, "step": 81500 }, { "epoch": 0.704749634676743, "grad_norm": 0.6463501333914508, "learning_rate": 4.3419911588640985e-06, "loss": 0.16587066650390625, "step": 81505 }, { "epoch": 0.7047928681982862, "grad_norm": 12.409211532881057, "learning_rate": 4.34180894318128e-06, "loss": 0.3783306121826172, "step": 81510 }, { "epoch": 0.7048361017198295, "grad_norm": 0.15391950024004372, "learning_rate": 4.341626721310157e-06, "loss": 0.022454071044921874, "step": 81515 }, { "epoch": 0.7048793352413727, "grad_norm": 1.7275949155471777, "learning_rate": 4.34144449325157e-06, "loss": 0.09818115234375, "step": 81520 }, { "epoch": 0.704922568762916, "grad_norm": 2.3043803049363536, "learning_rate": 4.3412622590063585e-06, "loss": 0.027685546875, "step": 81525 }, { "epoch": 0.7049658022844593, "grad_norm": 1.9403635909724963, "learning_rate": 4.341080018575366e-06, "loss": 0.1390453338623047, "step": 81530 }, { "epoch": 0.7050090358060025, "grad_norm": 5.128135901513795, "learning_rate": 4.34089777195943e-06, "loss": 0.350653076171875, "step": 81535 }, { "epoch": 0.7050522693275458, "grad_norm": 47.290710391177655, "learning_rate": 4.340715519159392e-06, "loss": 0.38153800964355467, "step": 81540 }, { "epoch": 0.7050955028490891, "grad_norm": 0.3214063992130978, "learning_rate": 4.340533260176093e-06, "loss": 0.06008453369140625, "step": 81545 }, { "epoch": 0.7051387363706323, "grad_norm": 13.71767746447391, "learning_rate": 4.340350995010373e-06, "loss": 0.49559326171875, "step": 81550 }, { "epoch": 0.7051819698921756, "grad_norm": 6.963247508468867, "learning_rate": 4.3401687236630715e-06, "loss": 0.07865047454833984, "step": 81555 }, { "epoch": 0.7052252034137189, "grad_norm": 8.168482543221273, "learning_rate": 4.3399864461350316e-06, "loss": 0.06783447265625, "step": 81560 }, { "epoch": 0.7052684369352621, "grad_norm": 2.3278393164413695, "learning_rate": 4.339804162427092e-06, "loss": 0.0990488052368164, "step": 81565 }, { "epoch": 0.7053116704568054, "grad_norm": 0.7828741884726024, "learning_rate": 4.339621872540093e-06, "loss": 0.2660850524902344, "step": 81570 }, { "epoch": 0.7053549039783487, "grad_norm": 2.5716742395998278, "learning_rate": 4.339439576474879e-06, "loss": 0.196630859375, "step": 81575 }, { "epoch": 0.7053981374998919, "grad_norm": 9.231023491727044, "learning_rate": 4.339257274232285e-06, "loss": 0.051409912109375, "step": 81580 }, { "epoch": 0.7054413710214352, "grad_norm": 4.020930508892019, "learning_rate": 4.339074965813156e-06, "loss": 0.123382568359375, "step": 81585 }, { "epoch": 0.7054846045429785, "grad_norm": 2.0805978562372927, "learning_rate": 4.338892651218333e-06, "loss": 0.09824066162109375, "step": 81590 }, { "epoch": 0.7055278380645217, "grad_norm": 15.149144828580278, "learning_rate": 4.3387103304486535e-06, "loss": 0.14720458984375, "step": 81595 }, { "epoch": 0.705571071586065, "grad_norm": 13.095266740889802, "learning_rate": 4.3385280035049605e-06, "loss": 0.09213790893554688, "step": 81600 }, { "epoch": 0.7056143051076083, "grad_norm": 15.153604254421413, "learning_rate": 4.3383456703880956e-06, "loss": 0.09677886962890625, "step": 81605 }, { "epoch": 0.7056575386291515, "grad_norm": 1.1651148313676112, "learning_rate": 4.338163331098897e-06, "loss": 0.12364959716796875, "step": 81610 }, { "epoch": 0.7057007721506947, "grad_norm": 25.01556819023241, "learning_rate": 4.3379809856382094e-06, "loss": 0.5813735961914063, "step": 81615 }, { "epoch": 0.705744005672238, "grad_norm": 5.228228318252446, "learning_rate": 4.33779863400687e-06, "loss": 0.07848663330078125, "step": 81620 }, { "epoch": 0.7057872391937813, "grad_norm": 6.776637469786549, "learning_rate": 4.337616276205724e-06, "loss": 0.281982421875, "step": 81625 }, { "epoch": 0.7058304727153245, "grad_norm": 32.32650479122368, "learning_rate": 4.3374339122356085e-06, "loss": 0.438470458984375, "step": 81630 }, { "epoch": 0.7058737062368678, "grad_norm": 0.37879945476013344, "learning_rate": 4.3372515420973645e-06, "loss": 0.09794921875, "step": 81635 }, { "epoch": 0.7059169397584111, "grad_norm": 0.27818802380703395, "learning_rate": 4.337069165791836e-06, "loss": 0.06729583740234375, "step": 81640 }, { "epoch": 0.7059601732799543, "grad_norm": 10.273452246170395, "learning_rate": 4.3368867833198625e-06, "loss": 0.23426361083984376, "step": 81645 }, { "epoch": 0.7060034068014976, "grad_norm": 1.5031714234743632, "learning_rate": 4.336704394682285e-06, "loss": 0.08129043579101562, "step": 81650 }, { "epoch": 0.7060466403230409, "grad_norm": 7.847930371796977, "learning_rate": 4.336521999879946e-06, "loss": 0.15017414093017578, "step": 81655 }, { "epoch": 0.7060898738445841, "grad_norm": 5.058719819456223, "learning_rate": 4.3363395989136844e-06, "loss": 0.0547210693359375, "step": 81660 }, { "epoch": 0.7061331073661274, "grad_norm": 26.441745941326023, "learning_rate": 4.3361571917843435e-06, "loss": 0.12407989501953125, "step": 81665 }, { "epoch": 0.7061763408876707, "grad_norm": 1.0936368580014662, "learning_rate": 4.3359747784927625e-06, "loss": 0.067364501953125, "step": 81670 }, { "epoch": 0.7062195744092139, "grad_norm": 22.53037186036993, "learning_rate": 4.335792359039785e-06, "loss": 0.1289306640625, "step": 81675 }, { "epoch": 0.7062628079307572, "grad_norm": 20.601080341049084, "learning_rate": 4.335609933426251e-06, "loss": 0.2215179443359375, "step": 81680 }, { "epoch": 0.7063060414523005, "grad_norm": 0.11801898925680654, "learning_rate": 4.335427501653001e-06, "loss": 0.09366312026977539, "step": 81685 }, { "epoch": 0.7063492749738437, "grad_norm": 33.8527623414574, "learning_rate": 4.3352450637208785e-06, "loss": 0.18310546875, "step": 81690 }, { "epoch": 0.7063925084953869, "grad_norm": 2.5251954955994766, "learning_rate": 4.335062619630723e-06, "loss": 0.1138885498046875, "step": 81695 }, { "epoch": 0.7064357420169303, "grad_norm": 0.8921570917164273, "learning_rate": 4.334880169383377e-06, "loss": 0.025232696533203126, "step": 81700 }, { "epoch": 0.7064789755384735, "grad_norm": 51.04196640995936, "learning_rate": 4.334697712979682e-06, "loss": 0.24441757202148437, "step": 81705 }, { "epoch": 0.7065222090600167, "grad_norm": 6.685495947682013, "learning_rate": 4.334515250420479e-06, "loss": 0.22819671630859376, "step": 81710 }, { "epoch": 0.7065654425815601, "grad_norm": 0.40622918261362767, "learning_rate": 4.33433278170661e-06, "loss": 0.08292007446289062, "step": 81715 }, { "epoch": 0.7066086761031033, "grad_norm": 3.6224162680156673, "learning_rate": 4.334150306838914e-06, "loss": 0.023645973205566405, "step": 81720 }, { "epoch": 0.7066519096246465, "grad_norm": 33.46580369579083, "learning_rate": 4.333967825818237e-06, "loss": 0.25982131958007815, "step": 81725 }, { "epoch": 0.7066951431461899, "grad_norm": 2.1203268590235815, "learning_rate": 4.333785338645417e-06, "loss": 0.2947662353515625, "step": 81730 }, { "epoch": 0.7067383766677331, "grad_norm": 30.85725380044715, "learning_rate": 4.333602845321298e-06, "loss": 0.2822425842285156, "step": 81735 }, { "epoch": 0.7067816101892763, "grad_norm": 1.262637955591581, "learning_rate": 4.333420345846719e-06, "loss": 0.103350830078125, "step": 81740 }, { "epoch": 0.7068248437108197, "grad_norm": 0.559694724866029, "learning_rate": 4.333237840222524e-06, "loss": 0.0578125, "step": 81745 }, { "epoch": 0.7068680772323629, "grad_norm": 4.586504449417537, "learning_rate": 4.333055328449554e-06, "loss": 0.24127655029296874, "step": 81750 }, { "epoch": 0.7069113107539061, "grad_norm": 13.464969187263993, "learning_rate": 4.33287281052865e-06, "loss": 0.08535690307617187, "step": 81755 }, { "epoch": 0.7069545442754495, "grad_norm": 13.776325237901423, "learning_rate": 4.332690286460655e-06, "loss": 0.2610313415527344, "step": 81760 }, { "epoch": 0.7069977777969927, "grad_norm": 5.659594912263115, "learning_rate": 4.3325077562464106e-06, "loss": 0.0563812255859375, "step": 81765 }, { "epoch": 0.7070410113185359, "grad_norm": 1.8055074734785157, "learning_rate": 4.332325219886757e-06, "loss": 0.1323944091796875, "step": 81770 }, { "epoch": 0.7070842448400793, "grad_norm": 7.438287497853295, "learning_rate": 4.332142677382538e-06, "loss": 0.16283073425292968, "step": 81775 }, { "epoch": 0.7071274783616225, "grad_norm": 3.12467160335954, "learning_rate": 4.331960128734595e-06, "loss": 0.23567657470703124, "step": 81780 }, { "epoch": 0.7071707118831657, "grad_norm": 10.887331785862509, "learning_rate": 4.331777573943769e-06, "loss": 0.0712493896484375, "step": 81785 }, { "epoch": 0.7072139454047089, "grad_norm": 15.434477404170865, "learning_rate": 4.331595013010902e-06, "loss": 0.13574066162109374, "step": 81790 }, { "epoch": 0.7072571789262523, "grad_norm": 9.181068512045588, "learning_rate": 4.331412445936837e-06, "loss": 0.0823272705078125, "step": 81795 }, { "epoch": 0.7073004124477955, "grad_norm": 0.35178587678159545, "learning_rate": 4.331229872722416e-06, "loss": 0.07740478515625, "step": 81800 }, { "epoch": 0.7073436459693387, "grad_norm": 4.838383046976751, "learning_rate": 4.33104729336848e-06, "loss": 0.14188613891601562, "step": 81805 }, { "epoch": 0.7073868794908821, "grad_norm": 2.0137896165535856, "learning_rate": 4.330864707875871e-06, "loss": 0.1283355712890625, "step": 81810 }, { "epoch": 0.7074301130124253, "grad_norm": 9.742956336849515, "learning_rate": 4.330682116245432e-06, "loss": 0.030836868286132812, "step": 81815 }, { "epoch": 0.7074733465339685, "grad_norm": 20.034746244199482, "learning_rate": 4.330499518478005e-06, "loss": 0.305615234375, "step": 81820 }, { "epoch": 0.7075165800555119, "grad_norm": 31.78903453405696, "learning_rate": 4.3303169145744315e-06, "loss": 0.3785297393798828, "step": 81825 }, { "epoch": 0.7075598135770551, "grad_norm": 4.055761981538845, "learning_rate": 4.330134304535554e-06, "loss": 0.14803009033203124, "step": 81830 }, { "epoch": 0.7076030470985983, "grad_norm": 5.782101835228106, "learning_rate": 4.329951688362214e-06, "loss": 0.185125732421875, "step": 81835 }, { "epoch": 0.7076462806201417, "grad_norm": 37.92772974252461, "learning_rate": 4.329769066055254e-06, "loss": 0.19947967529296876, "step": 81840 }, { "epoch": 0.7076895141416849, "grad_norm": 0.3807613770078174, "learning_rate": 4.329586437615518e-06, "loss": 0.0323974609375, "step": 81845 }, { "epoch": 0.7077327476632281, "grad_norm": 48.054640723443704, "learning_rate": 4.329403803043847e-06, "loss": 0.161370849609375, "step": 81850 }, { "epoch": 0.7077759811847715, "grad_norm": 4.586221849313299, "learning_rate": 4.329221162341081e-06, "loss": 0.24436073303222655, "step": 81855 }, { "epoch": 0.7078192147063147, "grad_norm": 27.06210028699503, "learning_rate": 4.329038515508067e-06, "loss": 0.2624248504638672, "step": 81860 }, { "epoch": 0.7078624482278579, "grad_norm": 37.56741466194332, "learning_rate": 4.328855862545643e-06, "loss": 0.20560760498046876, "step": 81865 }, { "epoch": 0.7079056817494012, "grad_norm": 0.28473261292497526, "learning_rate": 4.328673203454654e-06, "loss": 0.10724945068359375, "step": 81870 }, { "epoch": 0.7079489152709445, "grad_norm": 7.242580119101449, "learning_rate": 4.3284905382359405e-06, "loss": 0.10901107788085937, "step": 81875 }, { "epoch": 0.7079921487924877, "grad_norm": 5.463911888735756, "learning_rate": 4.328307866890348e-06, "loss": 0.216015625, "step": 81880 }, { "epoch": 0.708035382314031, "grad_norm": 4.9392594930511216, "learning_rate": 4.328125189418716e-06, "loss": 0.5083175659179687, "step": 81885 }, { "epoch": 0.7080786158355743, "grad_norm": 13.173030724293872, "learning_rate": 4.327942505821887e-06, "loss": 0.17718887329101562, "step": 81890 }, { "epoch": 0.7081218493571175, "grad_norm": 18.243601555910754, "learning_rate": 4.327759816100706e-06, "loss": 0.26851959228515626, "step": 81895 }, { "epoch": 0.7081650828786608, "grad_norm": 2.071922746967601, "learning_rate": 4.327577120256013e-06, "loss": 0.13750038146972657, "step": 81900 }, { "epoch": 0.7082083164002041, "grad_norm": 4.004604789918113, "learning_rate": 4.327394418288652e-06, "loss": 0.1546875, "step": 81905 }, { "epoch": 0.7082515499217473, "grad_norm": 31.179350948650818, "learning_rate": 4.327211710199465e-06, "loss": 0.19090423583984376, "step": 81910 }, { "epoch": 0.7082947834432906, "grad_norm": 6.658971304910045, "learning_rate": 4.327028995989296e-06, "loss": 0.4447929382324219, "step": 81915 }, { "epoch": 0.7083380169648339, "grad_norm": 4.228206847149128, "learning_rate": 4.326846275658985e-06, "loss": 0.16139068603515624, "step": 81920 }, { "epoch": 0.7083812504863771, "grad_norm": 1.4948879555347854, "learning_rate": 4.326663549209377e-06, "loss": 0.3871246337890625, "step": 81925 }, { "epoch": 0.7084244840079204, "grad_norm": 7.507686612927782, "learning_rate": 4.326480816641314e-06, "loss": 0.12124099731445312, "step": 81930 }, { "epoch": 0.7084677175294637, "grad_norm": 6.832016152894039, "learning_rate": 4.326298077955638e-06, "loss": 0.07086105346679687, "step": 81935 }, { "epoch": 0.7085109510510069, "grad_norm": 4.711037259042006, "learning_rate": 4.326115333153194e-06, "loss": 0.297979736328125, "step": 81940 }, { "epoch": 0.7085541845725501, "grad_norm": 25.310042688289023, "learning_rate": 4.3259325822348214e-06, "loss": 0.1969268798828125, "step": 81945 }, { "epoch": 0.7085974180940935, "grad_norm": 0.8134520144541931, "learning_rate": 4.325749825201366e-06, "loss": 0.05483322143554688, "step": 81950 }, { "epoch": 0.7086406516156367, "grad_norm": 1.7712003308145534, "learning_rate": 4.325567062053669e-06, "loss": 0.04022064208984375, "step": 81955 }, { "epoch": 0.70868388513718, "grad_norm": 19.61367776596773, "learning_rate": 4.325384292792574e-06, "loss": 0.3135093688964844, "step": 81960 }, { "epoch": 0.7087271186587232, "grad_norm": 1.4669235015773086, "learning_rate": 4.325201517418925e-06, "loss": 0.09358673095703125, "step": 81965 }, { "epoch": 0.7087703521802665, "grad_norm": 6.484788394780266, "learning_rate": 4.325018735933562e-06, "loss": 0.1455982208251953, "step": 81970 }, { "epoch": 0.7088135857018097, "grad_norm": 55.79039694888006, "learning_rate": 4.32483594833733e-06, "loss": 0.316583251953125, "step": 81975 }, { "epoch": 0.708856819223353, "grad_norm": 18.126406316249025, "learning_rate": 4.324653154631072e-06, "loss": 0.07969970703125, "step": 81980 }, { "epoch": 0.7089000527448963, "grad_norm": 19.49398402414753, "learning_rate": 4.32447035481563e-06, "loss": 0.13048248291015624, "step": 81985 }, { "epoch": 0.7089432862664395, "grad_norm": 7.818609184861271, "learning_rate": 4.324287548891849e-06, "loss": 0.31290130615234374, "step": 81990 }, { "epoch": 0.7089865197879828, "grad_norm": 13.609233447216916, "learning_rate": 4.32410473686057e-06, "loss": 0.0478515625, "step": 81995 }, { "epoch": 0.7090297533095261, "grad_norm": 0.2874068077749435, "learning_rate": 4.323921918722637e-06, "loss": 0.3507091522216797, "step": 82000 }, { "epoch": 0.7090729868310693, "grad_norm": 11.66581842480579, "learning_rate": 4.323739094478893e-06, "loss": 0.2323394775390625, "step": 82005 }, { "epoch": 0.7091162203526126, "grad_norm": 2.4844360406466666, "learning_rate": 4.3235562641301815e-06, "loss": 0.19169540405273439, "step": 82010 }, { "epoch": 0.7091594538741559, "grad_norm": 45.889604316813156, "learning_rate": 4.323373427677345e-06, "loss": 0.23048553466796876, "step": 82015 }, { "epoch": 0.7092026873956991, "grad_norm": 2.8583402570955596, "learning_rate": 4.323190585121229e-06, "loss": 0.1207122802734375, "step": 82020 }, { "epoch": 0.7092459209172424, "grad_norm": 1.207093118774349, "learning_rate": 4.323007736462673e-06, "loss": 0.28186492919921874, "step": 82025 }, { "epoch": 0.7092891544387857, "grad_norm": 2.630849837221141, "learning_rate": 4.3228248817025235e-06, "loss": 0.045123291015625, "step": 82030 }, { "epoch": 0.7093323879603289, "grad_norm": 0.9140448110885017, "learning_rate": 4.322642020841621e-06, "loss": 0.09034881591796876, "step": 82035 }, { "epoch": 0.7093756214818722, "grad_norm": 13.017931867733992, "learning_rate": 4.322459153880811e-06, "loss": 0.24760894775390624, "step": 82040 }, { "epoch": 0.7094188550034154, "grad_norm": 17.31920233793031, "learning_rate": 4.322276280820937e-06, "loss": 0.5777389526367187, "step": 82045 }, { "epoch": 0.7094620885249587, "grad_norm": 37.29406613352645, "learning_rate": 4.322093401662841e-06, "loss": 0.5124351501464843, "step": 82050 }, { "epoch": 0.709505322046502, "grad_norm": 11.193724276876413, "learning_rate": 4.3219105164073675e-06, "loss": 0.1624542236328125, "step": 82055 }, { "epoch": 0.7095485555680452, "grad_norm": 1.1593792098084086, "learning_rate": 4.321727625055359e-06, "loss": 0.21197547912597656, "step": 82060 }, { "epoch": 0.7095917890895885, "grad_norm": 2.9358379472960268, "learning_rate": 4.321544727607659e-06, "loss": 0.1241973876953125, "step": 82065 }, { "epoch": 0.7096350226111318, "grad_norm": 18.6885716322062, "learning_rate": 4.3213618240651125e-06, "loss": 0.23469696044921876, "step": 82070 }, { "epoch": 0.709678256132675, "grad_norm": 37.01600436709648, "learning_rate": 4.321178914428562e-06, "loss": 0.1511505126953125, "step": 82075 }, { "epoch": 0.7097214896542183, "grad_norm": 2.197111918025533, "learning_rate": 4.32099599869885e-06, "loss": 0.10929336547851562, "step": 82080 }, { "epoch": 0.7097647231757616, "grad_norm": 7.2974356790116905, "learning_rate": 4.320813076876823e-06, "loss": 0.32241973876953123, "step": 82085 }, { "epoch": 0.7098079566973048, "grad_norm": 0.2597560451212494, "learning_rate": 4.320630148963321e-06, "loss": 0.17722015380859374, "step": 82090 }, { "epoch": 0.7098511902188481, "grad_norm": 0.06995546925052114, "learning_rate": 4.32044721495919e-06, "loss": 0.1800567626953125, "step": 82095 }, { "epoch": 0.7098944237403914, "grad_norm": 1.5528731869933494, "learning_rate": 4.3202642748652735e-06, "loss": 0.1329559326171875, "step": 82100 }, { "epoch": 0.7099376572619346, "grad_norm": 3.9532326809980547, "learning_rate": 4.320081328682415e-06, "loss": 0.03762054443359375, "step": 82105 }, { "epoch": 0.7099808907834779, "grad_norm": 1.105486190851153, "learning_rate": 4.319898376411457e-06, "loss": 0.03188018798828125, "step": 82110 }, { "epoch": 0.7100241243050212, "grad_norm": 0.3064256407205273, "learning_rate": 4.3197154180532455e-06, "loss": 0.1690725326538086, "step": 82115 }, { "epoch": 0.7100673578265644, "grad_norm": 18.344439369907914, "learning_rate": 4.319532453608623e-06, "loss": 0.23281478881835938, "step": 82120 }, { "epoch": 0.7101105913481077, "grad_norm": 3.0870829294902484, "learning_rate": 4.319349483078432e-06, "loss": 0.09567337036132813, "step": 82125 }, { "epoch": 0.710153824869651, "grad_norm": 20.863900422719873, "learning_rate": 4.319166506463519e-06, "loss": 0.34640016555786135, "step": 82130 }, { "epoch": 0.7101970583911942, "grad_norm": 9.674748318144777, "learning_rate": 4.318983523764727e-06, "loss": 0.2390716552734375, "step": 82135 }, { "epoch": 0.7102402919127374, "grad_norm": 27.730173117016452, "learning_rate": 4.318800534982898e-06, "loss": 0.128955078125, "step": 82140 }, { "epoch": 0.7102835254342807, "grad_norm": 13.212675887407313, "learning_rate": 4.318617540118879e-06, "loss": 0.18624420166015626, "step": 82145 }, { "epoch": 0.710326758955824, "grad_norm": 1.264276524585368, "learning_rate": 4.318434539173511e-06, "loss": 0.06920547485351562, "step": 82150 }, { "epoch": 0.7103699924773672, "grad_norm": 0.6425601484592547, "learning_rate": 4.318251532147641e-06, "loss": 0.06679162979125977, "step": 82155 }, { "epoch": 0.7104132259989105, "grad_norm": 1.7334550775236899, "learning_rate": 4.31806851904211e-06, "loss": 0.19408950805664063, "step": 82160 }, { "epoch": 0.7104564595204538, "grad_norm": 2.4254343167194796, "learning_rate": 4.317885499857765e-06, "loss": 0.38647308349609377, "step": 82165 }, { "epoch": 0.710499693041997, "grad_norm": 1.5724368522200793, "learning_rate": 4.317702474595447e-06, "loss": 0.0563323974609375, "step": 82170 }, { "epoch": 0.7105429265635403, "grad_norm": 1.5264003386995642, "learning_rate": 4.317519443256002e-06, "loss": 0.07252349853515624, "step": 82175 }, { "epoch": 0.7105861600850836, "grad_norm": 38.29999561671648, "learning_rate": 4.317336405840274e-06, "loss": 0.2320281982421875, "step": 82180 }, { "epoch": 0.7106293936066268, "grad_norm": 9.668076845794571, "learning_rate": 4.317153362349108e-06, "loss": 0.1568462371826172, "step": 82185 }, { "epoch": 0.7106726271281701, "grad_norm": 8.432086969745292, "learning_rate": 4.316970312783346e-06, "loss": 0.0741424560546875, "step": 82190 }, { "epoch": 0.7107158606497134, "grad_norm": 0.8372726140349832, "learning_rate": 4.316787257143833e-06, "loss": 0.049457740783691403, "step": 82195 }, { "epoch": 0.7107590941712566, "grad_norm": 6.255379160317158, "learning_rate": 4.316604195431413e-06, "loss": 0.0582733154296875, "step": 82200 }, { "epoch": 0.7108023276927999, "grad_norm": 9.434217186785261, "learning_rate": 4.3164211276469315e-06, "loss": 0.07999496459960938, "step": 82205 }, { "epoch": 0.7108455612143432, "grad_norm": 2.9991509931350806, "learning_rate": 4.316238053791232e-06, "loss": 0.236492919921875, "step": 82210 }, { "epoch": 0.7108887947358864, "grad_norm": 4.734173557624044, "learning_rate": 4.3160549738651596e-06, "loss": 0.053263092041015626, "step": 82215 }, { "epoch": 0.7109320282574296, "grad_norm": 1.5526344066837885, "learning_rate": 4.315871887869558e-06, "loss": 0.21548309326171874, "step": 82220 }, { "epoch": 0.710975261778973, "grad_norm": 2.470670715229022, "learning_rate": 4.31568879580527e-06, "loss": 0.033374786376953125, "step": 82225 }, { "epoch": 0.7110184953005162, "grad_norm": 0.5084094397766812, "learning_rate": 4.315505697673142e-06, "loss": 0.33654632568359377, "step": 82230 }, { "epoch": 0.7110617288220594, "grad_norm": 9.393145362461148, "learning_rate": 4.315322593474019e-06, "loss": 0.07917633056640624, "step": 82235 }, { "epoch": 0.7111049623436028, "grad_norm": 3.1898811625472527, "learning_rate": 4.315139483208743e-06, "loss": 0.09366302490234375, "step": 82240 }, { "epoch": 0.711148195865146, "grad_norm": 0.2765357221283494, "learning_rate": 4.314956366878162e-06, "loss": 0.09680309295654296, "step": 82245 }, { "epoch": 0.7111914293866892, "grad_norm": 24.508646199093636, "learning_rate": 4.314773244483117e-06, "loss": 0.26396484375, "step": 82250 }, { "epoch": 0.7112346629082326, "grad_norm": 2.4227977064020214, "learning_rate": 4.314590116024454e-06, "loss": 0.0977294921875, "step": 82255 }, { "epoch": 0.7112778964297758, "grad_norm": 1.9877718316399442, "learning_rate": 4.314406981503018e-06, "loss": 0.20915069580078124, "step": 82260 }, { "epoch": 0.711321129951319, "grad_norm": 9.434506995681915, "learning_rate": 4.314223840919653e-06, "loss": 0.2693115234375, "step": 82265 }, { "epoch": 0.7113643634728624, "grad_norm": 17.504708593184386, "learning_rate": 4.314040694275203e-06, "loss": 0.14242172241210938, "step": 82270 }, { "epoch": 0.7114075969944056, "grad_norm": 11.304387998344595, "learning_rate": 4.313857541570514e-06, "loss": 0.26102218627929685, "step": 82275 }, { "epoch": 0.7114508305159488, "grad_norm": 15.237094700664313, "learning_rate": 4.313674382806431e-06, "loss": 0.24123687744140626, "step": 82280 }, { "epoch": 0.7114940640374922, "grad_norm": 9.60566789584066, "learning_rate": 4.313491217983797e-06, "loss": 0.16054306030273438, "step": 82285 }, { "epoch": 0.7115372975590354, "grad_norm": 0.9807369066143088, "learning_rate": 4.3133080471034565e-06, "loss": 0.18554229736328126, "step": 82290 }, { "epoch": 0.7115805310805786, "grad_norm": 8.409552939815468, "learning_rate": 4.3131248701662565e-06, "loss": 0.12222518920898437, "step": 82295 }, { "epoch": 0.711623764602122, "grad_norm": 27.154869144014615, "learning_rate": 4.31294168717304e-06, "loss": 0.1436248779296875, "step": 82300 }, { "epoch": 0.7116669981236652, "grad_norm": 19.215264492920554, "learning_rate": 4.312758498124654e-06, "loss": 0.16903839111328126, "step": 82305 }, { "epoch": 0.7117102316452084, "grad_norm": 11.547491457624549, "learning_rate": 4.312575303021941e-06, "loss": 0.08604278564453124, "step": 82310 }, { "epoch": 0.7117534651667516, "grad_norm": 8.332473917295005, "learning_rate": 4.312392101865745e-06, "loss": 0.34630126953125, "step": 82315 }, { "epoch": 0.711796698688295, "grad_norm": 0.9225466162977333, "learning_rate": 4.312208894656914e-06, "loss": 0.1357177734375, "step": 82320 }, { "epoch": 0.7118399322098382, "grad_norm": 0.7790289086813541, "learning_rate": 4.312025681396293e-06, "loss": 0.08071556091308593, "step": 82325 }, { "epoch": 0.7118831657313814, "grad_norm": 25.46965953475003, "learning_rate": 4.311842462084724e-06, "loss": 0.17348480224609375, "step": 82330 }, { "epoch": 0.7119263992529248, "grad_norm": 5.596640413273592, "learning_rate": 4.311659236723053e-06, "loss": 0.08999176025390625, "step": 82335 }, { "epoch": 0.711969632774468, "grad_norm": 3.3322975217583677, "learning_rate": 4.311476005312126e-06, "loss": 0.08661575317382812, "step": 82340 }, { "epoch": 0.7120128662960112, "grad_norm": 13.261418080177451, "learning_rate": 4.311292767852787e-06, "loss": 0.07976932525634765, "step": 82345 }, { "epoch": 0.7120560998175546, "grad_norm": 4.436812099282049, "learning_rate": 4.311109524345882e-06, "loss": 0.23317222595214843, "step": 82350 }, { "epoch": 0.7120993333390978, "grad_norm": 1.055012039930726, "learning_rate": 4.310926274792257e-06, "loss": 0.1455596923828125, "step": 82355 }, { "epoch": 0.712142566860641, "grad_norm": 0.36587136363009337, "learning_rate": 4.310743019192755e-06, "loss": 0.10603179931640624, "step": 82360 }, { "epoch": 0.7121858003821844, "grad_norm": 42.05409108535521, "learning_rate": 4.31055975754822e-06, "loss": 0.4377859115600586, "step": 82365 }, { "epoch": 0.7122290339037276, "grad_norm": 9.338346884964647, "learning_rate": 4.310376489859502e-06, "loss": 0.126751708984375, "step": 82370 }, { "epoch": 0.7122722674252708, "grad_norm": 8.106361492241543, "learning_rate": 4.310193216127443e-06, "loss": 0.08673152923583985, "step": 82375 }, { "epoch": 0.7123155009468142, "grad_norm": 6.587270553361115, "learning_rate": 4.3100099363528865e-06, "loss": 0.16417713165283204, "step": 82380 }, { "epoch": 0.7123587344683574, "grad_norm": 0.37064445646127786, "learning_rate": 4.309826650536682e-06, "loss": 0.115570068359375, "step": 82385 }, { "epoch": 0.7124019679899006, "grad_norm": 21.460770309536436, "learning_rate": 4.309643358679673e-06, "loss": 0.13326873779296874, "step": 82390 }, { "epoch": 0.7124452015114439, "grad_norm": 2.805210448102839, "learning_rate": 4.309460060782703e-06, "loss": 0.22562255859375, "step": 82395 }, { "epoch": 0.7124884350329872, "grad_norm": 2.4520406130888266, "learning_rate": 4.30927675684662e-06, "loss": 0.0621795654296875, "step": 82400 }, { "epoch": 0.7125316685545304, "grad_norm": 0.8705800117640028, "learning_rate": 4.309093446872268e-06, "loss": 0.37880992889404297, "step": 82405 }, { "epoch": 0.7125749020760737, "grad_norm": 5.429716073893446, "learning_rate": 4.308910130860493e-06, "loss": 0.07892608642578125, "step": 82410 }, { "epoch": 0.712618135597617, "grad_norm": 0.0969396306389603, "learning_rate": 4.308726808812139e-06, "loss": 0.14043731689453126, "step": 82415 }, { "epoch": 0.7126613691191602, "grad_norm": 2.800354942097625, "learning_rate": 4.308543480728054e-06, "loss": 0.026861572265625, "step": 82420 }, { "epoch": 0.7127046026407035, "grad_norm": 3.587339231892574, "learning_rate": 4.308360146609081e-06, "loss": 0.09652099609375, "step": 82425 }, { "epoch": 0.7127478361622468, "grad_norm": 2.6058445985681047, "learning_rate": 4.3081768064560676e-06, "loss": 0.248870849609375, "step": 82430 }, { "epoch": 0.71279106968379, "grad_norm": 0.14744738930624077, "learning_rate": 4.307993460269858e-06, "loss": 0.25818576812744143, "step": 82435 }, { "epoch": 0.7128343032053333, "grad_norm": 14.185765029864003, "learning_rate": 4.307810108051298e-06, "loss": 0.0830352783203125, "step": 82440 }, { "epoch": 0.7128775367268766, "grad_norm": 4.091165431653883, "learning_rate": 4.307626749801233e-06, "loss": 0.07466697692871094, "step": 82445 }, { "epoch": 0.7129207702484198, "grad_norm": 8.414518528470648, "learning_rate": 4.307443385520509e-06, "loss": 0.0724578857421875, "step": 82450 }, { "epoch": 0.712964003769963, "grad_norm": 3.0457355489192843, "learning_rate": 4.307260015209972e-06, "loss": 0.13575668334960939, "step": 82455 }, { "epoch": 0.7130072372915064, "grad_norm": 0.8011802056710882, "learning_rate": 4.307076638870467e-06, "loss": 0.03465614318847656, "step": 82460 }, { "epoch": 0.7130504708130496, "grad_norm": 15.899823633057798, "learning_rate": 4.306893256502841e-06, "loss": 0.1621318817138672, "step": 82465 }, { "epoch": 0.7130937043345928, "grad_norm": 1.3099461926048626, "learning_rate": 4.306709868107938e-06, "loss": 0.14393692016601561, "step": 82470 }, { "epoch": 0.7131369378561362, "grad_norm": 4.930139702378169, "learning_rate": 4.306526473686604e-06, "loss": 0.2173126220703125, "step": 82475 }, { "epoch": 0.7131801713776794, "grad_norm": 1.3982016211840418, "learning_rate": 4.306343073239686e-06, "loss": 0.20580120086669923, "step": 82480 }, { "epoch": 0.7132234048992226, "grad_norm": 25.838100125297924, "learning_rate": 4.30615966676803e-06, "loss": 0.31906661987304685, "step": 82485 }, { "epoch": 0.7132666384207659, "grad_norm": 31.599400948152844, "learning_rate": 4.30597625427248e-06, "loss": 0.43394775390625, "step": 82490 }, { "epoch": 0.7133098719423092, "grad_norm": 2.0331661683793545, "learning_rate": 4.305792835753883e-06, "loss": 0.03538532257080078, "step": 82495 }, { "epoch": 0.7133531054638524, "grad_norm": 0.3202254521768337, "learning_rate": 4.305609411213085e-06, "loss": 0.2476348876953125, "step": 82500 }, { "epoch": 0.7133963389853957, "grad_norm": 45.72630468115058, "learning_rate": 4.305425980650932e-06, "loss": 0.20279541015625, "step": 82505 }, { "epoch": 0.713439572506939, "grad_norm": 0.6638840754622015, "learning_rate": 4.305242544068269e-06, "loss": 0.07807903289794922, "step": 82510 }, { "epoch": 0.7134828060284822, "grad_norm": 0.13911007636542685, "learning_rate": 4.305059101465943e-06, "loss": 0.14930400848388672, "step": 82515 }, { "epoch": 0.7135260395500255, "grad_norm": 49.61581579120243, "learning_rate": 4.3048756528448e-06, "loss": 0.22547626495361328, "step": 82520 }, { "epoch": 0.7135692730715688, "grad_norm": 4.603477347752737, "learning_rate": 4.304692198205685e-06, "loss": 0.10621261596679688, "step": 82525 }, { "epoch": 0.713612506593112, "grad_norm": 17.463894745535406, "learning_rate": 4.304508737549446e-06, "loss": 0.17841796875, "step": 82530 }, { "epoch": 0.7136557401146553, "grad_norm": 30.861953058090013, "learning_rate": 4.304325270876928e-06, "loss": 0.37985992431640625, "step": 82535 }, { "epoch": 0.7136989736361986, "grad_norm": 39.92001028728759, "learning_rate": 4.304141798188976e-06, "loss": 0.3870595932006836, "step": 82540 }, { "epoch": 0.7137422071577418, "grad_norm": 3.4150025738812193, "learning_rate": 4.303958319486438e-06, "loss": 0.02510223388671875, "step": 82545 }, { "epoch": 0.7137854406792851, "grad_norm": 21.8781003020204, "learning_rate": 4.303774834770159e-06, "loss": 0.14384613037109376, "step": 82550 }, { "epoch": 0.7138286742008284, "grad_norm": 2.4622154550876365, "learning_rate": 4.303591344040986e-06, "loss": 0.0984375, "step": 82555 }, { "epoch": 0.7138719077223716, "grad_norm": 36.418278548435765, "learning_rate": 4.303407847299765e-06, "loss": 0.26156158447265626, "step": 82560 }, { "epoch": 0.7139151412439149, "grad_norm": 34.63637955556247, "learning_rate": 4.303224344547342e-06, "loss": 0.367340087890625, "step": 82565 }, { "epoch": 0.7139583747654581, "grad_norm": 5.710880065301414, "learning_rate": 4.3030408357845636e-06, "loss": 0.10696144104003906, "step": 82570 }, { "epoch": 0.7140016082870014, "grad_norm": 0.5823200958661865, "learning_rate": 4.302857321012276e-06, "loss": 0.3474296569824219, "step": 82575 }, { "epoch": 0.7140448418085447, "grad_norm": 40.720324857728336, "learning_rate": 4.302673800231326e-06, "loss": 0.37005043029785156, "step": 82580 }, { "epoch": 0.7140880753300879, "grad_norm": 30.531112298800267, "learning_rate": 4.302490273442559e-06, "loss": 0.2359100341796875, "step": 82585 }, { "epoch": 0.7141313088516312, "grad_norm": 17.22261362596865, "learning_rate": 4.302306740646822e-06, "loss": 0.19005470275878905, "step": 82590 }, { "epoch": 0.7141745423731745, "grad_norm": 0.5688709676453236, "learning_rate": 4.302123201844961e-06, "loss": 0.30011749267578125, "step": 82595 }, { "epoch": 0.7142177758947177, "grad_norm": 2.9181474670726186, "learning_rate": 4.301939657037823e-06, "loss": 0.06727733612060546, "step": 82600 }, { "epoch": 0.714261009416261, "grad_norm": 33.54233063636516, "learning_rate": 4.301756106226254e-06, "loss": 0.2188385009765625, "step": 82605 }, { "epoch": 0.7143042429378043, "grad_norm": 29.022890749341244, "learning_rate": 4.301572549411102e-06, "loss": 0.15737228393554686, "step": 82610 }, { "epoch": 0.7143474764593475, "grad_norm": 26.1956185062915, "learning_rate": 4.301388986593212e-06, "loss": 0.1864593505859375, "step": 82615 }, { "epoch": 0.7143907099808908, "grad_norm": 5.388433866644799, "learning_rate": 4.301205417773431e-06, "loss": 0.09671630859375, "step": 82620 }, { "epoch": 0.714433943502434, "grad_norm": 2.3026328967820455, "learning_rate": 4.301021842952605e-06, "loss": 0.212689208984375, "step": 82625 }, { "epoch": 0.7144771770239773, "grad_norm": 1.8344005164226045, "learning_rate": 4.300838262131582e-06, "loss": 0.111663818359375, "step": 82630 }, { "epoch": 0.7145204105455206, "grad_norm": 33.026607971578514, "learning_rate": 4.300654675311207e-06, "loss": 0.07455978393554688, "step": 82635 }, { "epoch": 0.7145636440670639, "grad_norm": 7.725338052947847, "learning_rate": 4.300471082492328e-06, "loss": 0.18158283233642578, "step": 82640 }, { "epoch": 0.7146068775886071, "grad_norm": 2.5432893203060534, "learning_rate": 4.300287483675792e-06, "loss": 0.0353607177734375, "step": 82645 }, { "epoch": 0.7146501111101504, "grad_norm": 4.064497466673464, "learning_rate": 4.300103878862444e-06, "loss": 0.22977447509765625, "step": 82650 }, { "epoch": 0.7146933446316937, "grad_norm": 3.5924799648996006, "learning_rate": 4.299920268053132e-06, "loss": 0.04288978576660156, "step": 82655 }, { "epoch": 0.7147365781532369, "grad_norm": 6.525327490723307, "learning_rate": 4.299736651248703e-06, "loss": 0.38621368408203127, "step": 82660 }, { "epoch": 0.7147798116747801, "grad_norm": 1.6235497150695284, "learning_rate": 4.299553028450003e-06, "loss": 0.045355224609375, "step": 82665 }, { "epoch": 0.7148230451963234, "grad_norm": 12.176727745245554, "learning_rate": 4.299369399657879e-06, "loss": 0.2023529052734375, "step": 82670 }, { "epoch": 0.7148662787178667, "grad_norm": 5.284350139108507, "learning_rate": 4.29918576487318e-06, "loss": 0.1009765625, "step": 82675 }, { "epoch": 0.7149095122394099, "grad_norm": 5.042920604631324, "learning_rate": 4.2990021240967484e-06, "loss": 0.19462013244628906, "step": 82680 }, { "epoch": 0.7149527457609532, "grad_norm": 0.28168937401785, "learning_rate": 4.298818477329435e-06, "loss": 0.08445587158203124, "step": 82685 }, { "epoch": 0.7149959792824965, "grad_norm": 6.672958328783442, "learning_rate": 4.298634824572086e-06, "loss": 0.16978912353515624, "step": 82690 }, { "epoch": 0.7150392128040397, "grad_norm": 9.223892556691034, "learning_rate": 4.298451165825548e-06, "loss": 0.21236572265625, "step": 82695 }, { "epoch": 0.715082446325583, "grad_norm": 10.111668431559853, "learning_rate": 4.298267501090667e-06, "loss": 0.28549995422363283, "step": 82700 }, { "epoch": 0.7151256798471263, "grad_norm": 9.435257941697538, "learning_rate": 4.298083830368292e-06, "loss": 0.09721298217773437, "step": 82705 }, { "epoch": 0.7151689133686695, "grad_norm": 0.7776319206957543, "learning_rate": 4.297900153659269e-06, "loss": 0.33036556243896487, "step": 82710 }, { "epoch": 0.7152121468902128, "grad_norm": 31.92074633393248, "learning_rate": 4.297716470964444e-06, "loss": 0.13884544372558594, "step": 82715 }, { "epoch": 0.7152553804117561, "grad_norm": 8.688913183681537, "learning_rate": 4.297532782284666e-06, "loss": 0.1249267578125, "step": 82720 }, { "epoch": 0.7152986139332993, "grad_norm": 1.0540143270012248, "learning_rate": 4.297349087620781e-06, "loss": 0.28087158203125, "step": 82725 }, { "epoch": 0.7153418474548426, "grad_norm": 1.1837955659247985, "learning_rate": 4.297165386973637e-06, "loss": 0.035205078125, "step": 82730 }, { "epoch": 0.7153850809763859, "grad_norm": 5.104069502284281, "learning_rate": 4.296981680344082e-06, "loss": 0.10881805419921875, "step": 82735 }, { "epoch": 0.7154283144979291, "grad_norm": 5.4166212243887, "learning_rate": 4.29679796773296e-06, "loss": 0.0861968994140625, "step": 82740 }, { "epoch": 0.7154715480194723, "grad_norm": 1.4248311404298555, "learning_rate": 4.296614249141122e-06, "loss": 0.03858261108398438, "step": 82745 }, { "epoch": 0.7155147815410157, "grad_norm": 4.761962652094467, "learning_rate": 4.296430524569413e-06, "loss": 0.16759490966796875, "step": 82750 }, { "epoch": 0.7155580150625589, "grad_norm": 19.745575772167705, "learning_rate": 4.296246794018681e-06, "loss": 0.1324859619140625, "step": 82755 }, { "epoch": 0.7156012485841021, "grad_norm": 5.247444367167276, "learning_rate": 4.2960630574897735e-06, "loss": 0.02015533447265625, "step": 82760 }, { "epoch": 0.7156444821056455, "grad_norm": 22.631122795331127, "learning_rate": 4.295879314983537e-06, "loss": 0.2964111328125, "step": 82765 }, { "epoch": 0.7156877156271887, "grad_norm": 1.2651587165893925, "learning_rate": 4.2956955665008205e-06, "loss": 0.0837738037109375, "step": 82770 }, { "epoch": 0.7157309491487319, "grad_norm": 1.9024722442195097, "learning_rate": 4.29551181204247e-06, "loss": 0.16840667724609376, "step": 82775 }, { "epoch": 0.7157741826702753, "grad_norm": 1.5995444123972975, "learning_rate": 4.295328051609333e-06, "loss": 0.12062454223632812, "step": 82780 }, { "epoch": 0.7158174161918185, "grad_norm": 0.2562607636574223, "learning_rate": 4.295144285202259e-06, "loss": 0.2490570068359375, "step": 82785 }, { "epoch": 0.7158606497133617, "grad_norm": 1.8618399961354084, "learning_rate": 4.294960512822093e-06, "loss": 0.02685546875, "step": 82790 }, { "epoch": 0.7159038832349051, "grad_norm": 23.417557975719305, "learning_rate": 4.294776734469684e-06, "loss": 0.1475311279296875, "step": 82795 }, { "epoch": 0.7159471167564483, "grad_norm": 0.12539041457220232, "learning_rate": 4.294592950145879e-06, "loss": 0.4943084716796875, "step": 82800 }, { "epoch": 0.7159903502779915, "grad_norm": 27.983334105006445, "learning_rate": 4.294409159851525e-06, "loss": 0.3513214111328125, "step": 82805 }, { "epoch": 0.7160335837995349, "grad_norm": 40.68909102618218, "learning_rate": 4.294225363587471e-06, "loss": 0.18581695556640626, "step": 82810 }, { "epoch": 0.7160768173210781, "grad_norm": 55.106053393636145, "learning_rate": 4.294041561354564e-06, "loss": 0.29752540588378906, "step": 82815 }, { "epoch": 0.7161200508426213, "grad_norm": 5.766446406795815, "learning_rate": 4.293857753153652e-06, "loss": 0.16754837036132814, "step": 82820 }, { "epoch": 0.7161632843641647, "grad_norm": 28.639981031139268, "learning_rate": 4.293673938985581e-06, "loss": 0.4516998291015625, "step": 82825 }, { "epoch": 0.7162065178857079, "grad_norm": 10.885139426542588, "learning_rate": 4.293490118851202e-06, "loss": 0.06124677658081055, "step": 82830 }, { "epoch": 0.7162497514072511, "grad_norm": 0.27912680780192006, "learning_rate": 4.293306292751359e-06, "loss": 0.16890487670898438, "step": 82835 }, { "epoch": 0.7162929849287943, "grad_norm": 1.3718707944497515, "learning_rate": 4.293122460686903e-06, "loss": 0.20642833709716796, "step": 82840 }, { "epoch": 0.7163362184503377, "grad_norm": 5.342531883342633, "learning_rate": 4.2929386226586805e-06, "loss": 0.10613212585449219, "step": 82845 }, { "epoch": 0.7163794519718809, "grad_norm": 0.710982425272788, "learning_rate": 4.292754778667537e-06, "loss": 0.26618194580078125, "step": 82850 }, { "epoch": 0.7164226854934241, "grad_norm": 13.804927276605016, "learning_rate": 4.2925709287143255e-06, "loss": 0.19145965576171875, "step": 82855 }, { "epoch": 0.7164659190149675, "grad_norm": 8.834668918692994, "learning_rate": 4.29238707279989e-06, "loss": 0.25624923706054686, "step": 82860 }, { "epoch": 0.7165091525365107, "grad_norm": 1.9205611590089995, "learning_rate": 4.29220321092508e-06, "loss": 0.064605712890625, "step": 82865 }, { "epoch": 0.7165523860580539, "grad_norm": 30.05852417583906, "learning_rate": 4.292019343090743e-06, "loss": 0.13023872375488282, "step": 82870 }, { "epoch": 0.7165956195795973, "grad_norm": 6.665512909999818, "learning_rate": 4.291835469297726e-06, "loss": 0.27019195556640624, "step": 82875 }, { "epoch": 0.7166388531011405, "grad_norm": 1.7414131994729465, "learning_rate": 4.291651589546878e-06, "loss": 0.23564453125, "step": 82880 }, { "epoch": 0.7166820866226837, "grad_norm": 22.362326549573243, "learning_rate": 4.2914677038390486e-06, "loss": 0.210162353515625, "step": 82885 }, { "epoch": 0.7167253201442271, "grad_norm": 1.5064744534447503, "learning_rate": 4.291283812175082e-06, "loss": 0.077294921875, "step": 82890 }, { "epoch": 0.7167685536657703, "grad_norm": 11.660261135529813, "learning_rate": 4.29109991455583e-06, "loss": 0.183319091796875, "step": 82895 }, { "epoch": 0.7168117871873135, "grad_norm": 16.214422085461173, "learning_rate": 4.29091601098214e-06, "loss": 0.15021286010742188, "step": 82900 }, { "epoch": 0.7168550207088569, "grad_norm": 3.6123723680688222, "learning_rate": 4.290732101454857e-06, "loss": 0.075433349609375, "step": 82905 }, { "epoch": 0.7168982542304001, "grad_norm": 2.232973090404064, "learning_rate": 4.290548185974834e-06, "loss": 0.12104873657226563, "step": 82910 }, { "epoch": 0.7169414877519433, "grad_norm": 1.94875644156894, "learning_rate": 4.2903642645429155e-06, "loss": 0.14562835693359374, "step": 82915 }, { "epoch": 0.7169847212734866, "grad_norm": 2.166294211580369, "learning_rate": 4.290180337159951e-06, "loss": 0.3638603210449219, "step": 82920 }, { "epoch": 0.7170279547950299, "grad_norm": 2.285496821025736, "learning_rate": 4.289996403826789e-06, "loss": 0.09877700805664062, "step": 82925 }, { "epoch": 0.7170711883165731, "grad_norm": 0.42673910165656387, "learning_rate": 4.289812464544278e-06, "loss": 0.38142242431640627, "step": 82930 }, { "epoch": 0.7171144218381164, "grad_norm": 15.660666816842342, "learning_rate": 4.289628519313266e-06, "loss": 0.08325996398925781, "step": 82935 }, { "epoch": 0.7171576553596597, "grad_norm": 0.5824502067163444, "learning_rate": 4.2894445681346e-06, "loss": 0.040692138671875, "step": 82940 }, { "epoch": 0.7172008888812029, "grad_norm": 15.686612594944554, "learning_rate": 4.289260611009131e-06, "loss": 0.1323974609375, "step": 82945 }, { "epoch": 0.7172441224027462, "grad_norm": 5.667341982869684, "learning_rate": 4.289076647937705e-06, "loss": 0.18762588500976562, "step": 82950 }, { "epoch": 0.7172873559242895, "grad_norm": 11.829294015934183, "learning_rate": 4.2888926789211725e-06, "loss": 0.20192375183105468, "step": 82955 }, { "epoch": 0.7173305894458327, "grad_norm": 26.227645645295805, "learning_rate": 4.28870870396038e-06, "loss": 0.1626190185546875, "step": 82960 }, { "epoch": 0.717373822967376, "grad_norm": 24.474817064686267, "learning_rate": 4.2885247230561775e-06, "loss": 0.0564361572265625, "step": 82965 }, { "epoch": 0.7174170564889193, "grad_norm": 6.23554604346003, "learning_rate": 4.288340736209411e-06, "loss": 0.0736053466796875, "step": 82970 }, { "epoch": 0.7174602900104625, "grad_norm": 15.024803734666284, "learning_rate": 4.288156743420932e-06, "loss": 0.11178665161132813, "step": 82975 }, { "epoch": 0.7175035235320057, "grad_norm": 7.425820505526284, "learning_rate": 4.287972744691589e-06, "loss": 0.11376953125, "step": 82980 }, { "epoch": 0.7175467570535491, "grad_norm": 13.82821654592251, "learning_rate": 4.287788740022229e-06, "loss": 0.16610870361328126, "step": 82985 }, { "epoch": 0.7175899905750923, "grad_norm": 8.333059420926835, "learning_rate": 4.2876047294137e-06, "loss": 0.12650146484375, "step": 82990 }, { "epoch": 0.7176332240966355, "grad_norm": 8.397928188120753, "learning_rate": 4.287420712866852e-06, "loss": 0.16515579223632812, "step": 82995 }, { "epoch": 0.7176764576181788, "grad_norm": 1.41595060510517, "learning_rate": 4.2872366903825354e-06, "loss": 0.10499801635742187, "step": 83000 }, { "epoch": 0.7177196911397221, "grad_norm": 4.854742111700291, "learning_rate": 4.287052661961594e-06, "loss": 0.0347381591796875, "step": 83005 }, { "epoch": 0.7177629246612653, "grad_norm": 0.3279802467160487, "learning_rate": 4.286868627604882e-06, "loss": 0.04510574340820313, "step": 83010 }, { "epoch": 0.7178061581828086, "grad_norm": 3.593131717536672, "learning_rate": 4.286684587313244e-06, "loss": 0.11128082275390624, "step": 83015 }, { "epoch": 0.7178493917043519, "grad_norm": 1.5502317133446786, "learning_rate": 4.28650054108753e-06, "loss": 0.07571868896484375, "step": 83020 }, { "epoch": 0.7178926252258951, "grad_norm": 1.9769041785410246, "learning_rate": 4.286316488928591e-06, "loss": 0.2524131774902344, "step": 83025 }, { "epoch": 0.7179358587474384, "grad_norm": 0.9811484020181578, "learning_rate": 4.286132430837273e-06, "loss": 0.0307098388671875, "step": 83030 }, { "epoch": 0.7179790922689817, "grad_norm": 0.8935281843851247, "learning_rate": 4.285948366814426e-06, "loss": 0.029357528686523436, "step": 83035 }, { "epoch": 0.7180223257905249, "grad_norm": 13.020115123918721, "learning_rate": 4.285764296860899e-06, "loss": 0.078753662109375, "step": 83040 }, { "epoch": 0.7180655593120682, "grad_norm": 1.7395837521153332, "learning_rate": 4.2855802209775395e-06, "loss": 0.059332275390625, "step": 83045 }, { "epoch": 0.7181087928336115, "grad_norm": 5.447724622043259, "learning_rate": 4.285396139165199e-06, "loss": 0.3390201568603516, "step": 83050 }, { "epoch": 0.7181520263551547, "grad_norm": 7.615676191816369, "learning_rate": 4.285212051424724e-06, "loss": 0.30774459838867185, "step": 83055 }, { "epoch": 0.718195259876698, "grad_norm": 6.12302593663077, "learning_rate": 4.285027957756966e-06, "loss": 0.14091796875, "step": 83060 }, { "epoch": 0.7182384933982413, "grad_norm": 5.531350105489183, "learning_rate": 4.2848438581627715e-06, "loss": 0.519659423828125, "step": 83065 }, { "epoch": 0.7182817269197845, "grad_norm": 0.8954296976801757, "learning_rate": 4.2846597526429915e-06, "loss": 0.0910064697265625, "step": 83070 }, { "epoch": 0.7183249604413278, "grad_norm": 7.4349042863747705, "learning_rate": 4.284475641198473e-06, "loss": 0.25330429077148436, "step": 83075 }, { "epoch": 0.7183681939628711, "grad_norm": 10.666171053941024, "learning_rate": 4.2842915238300665e-06, "loss": 0.0489715576171875, "step": 83080 }, { "epoch": 0.7184114274844143, "grad_norm": 18.29464166452458, "learning_rate": 4.284107400538621e-06, "loss": 0.13472747802734375, "step": 83085 }, { "epoch": 0.7184546610059576, "grad_norm": 3.3739929991654463, "learning_rate": 4.283923271324988e-06, "loss": 0.0892791748046875, "step": 83090 }, { "epoch": 0.7184978945275008, "grad_norm": 5.280029159353305, "learning_rate": 4.283739136190012e-06, "loss": 0.27668609619140627, "step": 83095 }, { "epoch": 0.7185411280490441, "grad_norm": 2.563995284847684, "learning_rate": 4.283554995134545e-06, "loss": 0.39148101806640623, "step": 83100 }, { "epoch": 0.7185843615705874, "grad_norm": 0.08901542362314274, "learning_rate": 4.283370848159435e-06, "loss": 0.1652996063232422, "step": 83105 }, { "epoch": 0.7186275950921306, "grad_norm": 4.004809442587146, "learning_rate": 4.2831866952655334e-06, "loss": 0.20381622314453124, "step": 83110 }, { "epoch": 0.7186708286136739, "grad_norm": 0.6860814742744372, "learning_rate": 4.283002536453687e-06, "loss": 0.3008720397949219, "step": 83115 }, { "epoch": 0.7187140621352172, "grad_norm": 4.263758587549024, "learning_rate": 4.2828183717247465e-06, "loss": 0.1561859130859375, "step": 83120 }, { "epoch": 0.7187572956567604, "grad_norm": 5.931803221385293, "learning_rate": 4.282634201079563e-06, "loss": 0.05452651977539062, "step": 83125 }, { "epoch": 0.7188005291783037, "grad_norm": 16.605072266645806, "learning_rate": 4.282450024518981e-06, "loss": 0.1493743896484375, "step": 83130 }, { "epoch": 0.718843762699847, "grad_norm": 3.059501096587703, "learning_rate": 4.2822658420438535e-06, "loss": 0.073602294921875, "step": 83135 }, { "epoch": 0.7188869962213902, "grad_norm": 0.5147861466659313, "learning_rate": 4.282081653655031e-06, "loss": 0.2383544921875, "step": 83140 }, { "epoch": 0.7189302297429335, "grad_norm": 2.112107420372575, "learning_rate": 4.281897459353359e-06, "loss": 0.05539398193359375, "step": 83145 }, { "epoch": 0.7189734632644768, "grad_norm": 7.47000251759952, "learning_rate": 4.281713259139691e-06, "loss": 0.17033348083496094, "step": 83150 }, { "epoch": 0.71901669678602, "grad_norm": 5.871252483052953, "learning_rate": 4.281529053014874e-06, "loss": 0.082720947265625, "step": 83155 }, { "epoch": 0.7190599303075633, "grad_norm": 1.9477528326194242, "learning_rate": 4.281344840979758e-06, "loss": 0.0588897705078125, "step": 83160 }, { "epoch": 0.7191031638291066, "grad_norm": 33.239487845402095, "learning_rate": 4.281160623035192e-06, "loss": 0.30925445556640624, "step": 83165 }, { "epoch": 0.7191463973506498, "grad_norm": 26.565834766199206, "learning_rate": 4.280976399182028e-06, "loss": 0.41852455139160155, "step": 83170 }, { "epoch": 0.719189630872193, "grad_norm": 12.12726831416927, "learning_rate": 4.280792169421114e-06, "loss": 0.1353271484375, "step": 83175 }, { "epoch": 0.7192328643937363, "grad_norm": 1.2012816592263424, "learning_rate": 4.280607933753299e-06, "loss": 0.29242706298828125, "step": 83180 }, { "epoch": 0.7192760979152796, "grad_norm": 2.6018707045291394, "learning_rate": 4.2804236921794334e-06, "loss": 0.10505599975585937, "step": 83185 }, { "epoch": 0.7193193314368228, "grad_norm": 7.761019444558062, "learning_rate": 4.280239444700366e-06, "loss": 0.11170883178710937, "step": 83190 }, { "epoch": 0.7193625649583661, "grad_norm": 0.5840261056280209, "learning_rate": 4.280055191316948e-06, "loss": 0.28312225341796876, "step": 83195 }, { "epoch": 0.7194057984799094, "grad_norm": 4.105251611275978, "learning_rate": 4.279870932030029e-06, "loss": 0.10407447814941406, "step": 83200 }, { "epoch": 0.7194490320014526, "grad_norm": 5.082435406056391, "learning_rate": 4.279686666840459e-06, "loss": 0.04357757568359375, "step": 83205 }, { "epoch": 0.7194922655229959, "grad_norm": 18.464325620369145, "learning_rate": 4.279502395749085e-06, "loss": 0.31471710205078124, "step": 83210 }, { "epoch": 0.7195354990445392, "grad_norm": 1.5417906179525425, "learning_rate": 4.279318118756761e-06, "loss": 0.024648284912109374, "step": 83215 }, { "epoch": 0.7195787325660824, "grad_norm": 1.1380675067896517, "learning_rate": 4.279133835864334e-06, "loss": 0.11003036499023437, "step": 83220 }, { "epoch": 0.7196219660876257, "grad_norm": 8.954055702612756, "learning_rate": 4.278949547072655e-06, "loss": 0.2646484375, "step": 83225 }, { "epoch": 0.719665199609169, "grad_norm": 23.610289515938582, "learning_rate": 4.2787652523825745e-06, "loss": 0.2153858184814453, "step": 83230 }, { "epoch": 0.7197084331307122, "grad_norm": 4.095464353514593, "learning_rate": 4.278580951794941e-06, "loss": 0.023532867431640625, "step": 83235 }, { "epoch": 0.7197516666522555, "grad_norm": 6.364191423573313, "learning_rate": 4.278396645310605e-06, "loss": 0.2904205322265625, "step": 83240 }, { "epoch": 0.7197949001737988, "grad_norm": 0.6434330966129521, "learning_rate": 4.278212332930416e-06, "loss": 0.2226348876953125, "step": 83245 }, { "epoch": 0.719838133695342, "grad_norm": 101.82928314686855, "learning_rate": 4.2780280146552254e-06, "loss": 0.36634979248046873, "step": 83250 }, { "epoch": 0.7198813672168853, "grad_norm": 20.873191528602973, "learning_rate": 4.2778436904858825e-06, "loss": 0.1676544189453125, "step": 83255 }, { "epoch": 0.7199246007384286, "grad_norm": 11.719317925022331, "learning_rate": 4.2776593604232375e-06, "loss": 0.2126178741455078, "step": 83260 }, { "epoch": 0.7199678342599718, "grad_norm": 11.803731955946237, "learning_rate": 4.277475024468141e-06, "loss": 0.17663536071777344, "step": 83265 }, { "epoch": 0.720011067781515, "grad_norm": 107.15044722686545, "learning_rate": 4.277290682621441e-06, "loss": 0.37757568359375, "step": 83270 }, { "epoch": 0.7200543013030584, "grad_norm": 0.1411229347181891, "learning_rate": 4.27710633488399e-06, "loss": 0.06272506713867188, "step": 83275 }, { "epoch": 0.7200975348246016, "grad_norm": 31.963161859193622, "learning_rate": 4.276921981256638e-06, "loss": 0.4836669921875, "step": 83280 }, { "epoch": 0.7201407683461448, "grad_norm": 7.254605542073886, "learning_rate": 4.276737621740234e-06, "loss": 0.22542037963867187, "step": 83285 }, { "epoch": 0.7201840018676882, "grad_norm": 2.6733254680389082, "learning_rate": 4.276553256335629e-06, "loss": 0.3048683166503906, "step": 83290 }, { "epoch": 0.7202272353892314, "grad_norm": 1.644445206627681, "learning_rate": 4.276368885043673e-06, "loss": 0.21849822998046875, "step": 83295 }, { "epoch": 0.7202704689107746, "grad_norm": 8.84831382507366, "learning_rate": 4.276184507865217e-06, "loss": 0.073681640625, "step": 83300 }, { "epoch": 0.720313702432318, "grad_norm": 19.715961236516822, "learning_rate": 4.2760001248011105e-06, "loss": 0.12639312744140624, "step": 83305 }, { "epoch": 0.7203569359538612, "grad_norm": 31.99478169483562, "learning_rate": 4.275815735852204e-06, "loss": 0.29297866821289065, "step": 83310 }, { "epoch": 0.7204001694754044, "grad_norm": 5.489063231768492, "learning_rate": 4.275631341019348e-06, "loss": 0.2225311279296875, "step": 83315 }, { "epoch": 0.7204434029969478, "grad_norm": 11.457568631582554, "learning_rate": 4.275446940303394e-06, "loss": 0.15850334167480468, "step": 83320 }, { "epoch": 0.720486636518491, "grad_norm": 1.1912923660408579, "learning_rate": 4.275262533705191e-06, "loss": 0.2492586135864258, "step": 83325 }, { "epoch": 0.7205298700400342, "grad_norm": 3.0884388572024957, "learning_rate": 4.275078121225589e-06, "loss": 0.19576416015625, "step": 83330 }, { "epoch": 0.7205731035615776, "grad_norm": 4.732217765940248, "learning_rate": 4.2748937028654396e-06, "loss": 0.10774078369140624, "step": 83335 }, { "epoch": 0.7206163370831208, "grad_norm": 2.448907964085193, "learning_rate": 4.274709278625594e-06, "loss": 0.05752487182617187, "step": 83340 }, { "epoch": 0.720659570604664, "grad_norm": 2.021036954904385, "learning_rate": 4.274524848506901e-06, "loss": 0.0591094970703125, "step": 83345 }, { "epoch": 0.7207028041262072, "grad_norm": 17.122874863256932, "learning_rate": 4.274340412510212e-06, "loss": 0.130572509765625, "step": 83350 }, { "epoch": 0.7207460376477506, "grad_norm": 6.174322738473592, "learning_rate": 4.274155970636378e-06, "loss": 0.11925086975097657, "step": 83355 }, { "epoch": 0.7207892711692938, "grad_norm": 0.7396376056239167, "learning_rate": 4.27397152288625e-06, "loss": 0.12271881103515625, "step": 83360 }, { "epoch": 0.720832504690837, "grad_norm": 6.447781594216949, "learning_rate": 4.273787069260676e-06, "loss": 0.20905799865722657, "step": 83365 }, { "epoch": 0.7208757382123804, "grad_norm": 2.2543289075247595, "learning_rate": 4.27360260976051e-06, "loss": 0.2658344268798828, "step": 83370 }, { "epoch": 0.7209189717339236, "grad_norm": 6.507408612894203, "learning_rate": 4.273418144386601e-06, "loss": 0.10706787109375, "step": 83375 }, { "epoch": 0.7209622052554668, "grad_norm": 0.20710746103588681, "learning_rate": 4.2732336731398e-06, "loss": 0.05135498046875, "step": 83380 }, { "epoch": 0.7210054387770102, "grad_norm": 0.5516131861801133, "learning_rate": 4.273049196020958e-06, "loss": 0.07557220458984375, "step": 83385 }, { "epoch": 0.7210486722985534, "grad_norm": 0.3711595593469142, "learning_rate": 4.272864713030925e-06, "loss": 0.21085205078125, "step": 83390 }, { "epoch": 0.7210919058200966, "grad_norm": 12.72622801095193, "learning_rate": 4.2726802241705535e-06, "loss": 0.1457366943359375, "step": 83395 }, { "epoch": 0.72113513934164, "grad_norm": 1.6099322222721606, "learning_rate": 4.272495729440692e-06, "loss": 0.07821025848388671, "step": 83400 }, { "epoch": 0.7211783728631832, "grad_norm": 15.454983222934557, "learning_rate": 4.272311228842193e-06, "loss": 0.24576683044433595, "step": 83405 }, { "epoch": 0.7212216063847264, "grad_norm": 1.4775378404742783, "learning_rate": 4.272126722375908e-06, "loss": 0.13079071044921875, "step": 83410 }, { "epoch": 0.7212648399062698, "grad_norm": 1.2937585760384986, "learning_rate": 4.271942210042685e-06, "loss": 0.27803955078125, "step": 83415 }, { "epoch": 0.721308073427813, "grad_norm": 11.119798200356161, "learning_rate": 4.271757691843378e-06, "loss": 0.041412353515625, "step": 83420 }, { "epoch": 0.7213513069493562, "grad_norm": 0.2707179520907113, "learning_rate": 4.271573167778838e-06, "loss": 0.08903541564941406, "step": 83425 }, { "epoch": 0.7213945404708996, "grad_norm": 7.783457048570428, "learning_rate": 4.271388637849913e-06, "loss": 0.05983409881591797, "step": 83430 }, { "epoch": 0.7214377739924428, "grad_norm": 1.4804170461292527, "learning_rate": 4.271204102057457e-06, "loss": 0.08402290344238281, "step": 83435 }, { "epoch": 0.721481007513986, "grad_norm": 2.7083968643491776, "learning_rate": 4.2710195604023196e-06, "loss": 0.16559982299804688, "step": 83440 }, { "epoch": 0.7215242410355293, "grad_norm": 3.9492039883902628, "learning_rate": 4.270835012885354e-06, "loss": 0.05680694580078125, "step": 83445 }, { "epoch": 0.7215674745570726, "grad_norm": 2.0264797662327942, "learning_rate": 4.270650459507407e-06, "loss": 0.1075958251953125, "step": 83450 }, { "epoch": 0.7216107080786158, "grad_norm": 0.4690303928561812, "learning_rate": 4.270465900269333e-06, "loss": 0.035748291015625, "step": 83455 }, { "epoch": 0.721653941600159, "grad_norm": 0.4651208391894143, "learning_rate": 4.270281335171984e-06, "loss": 0.051381683349609374, "step": 83460 }, { "epoch": 0.7216971751217024, "grad_norm": 5.002720070805769, "learning_rate": 4.270096764216209e-06, "loss": 0.15367279052734376, "step": 83465 }, { "epoch": 0.7217404086432456, "grad_norm": 6.846810611722094, "learning_rate": 4.269912187402859e-06, "loss": 0.4101585388183594, "step": 83470 }, { "epoch": 0.7217836421647889, "grad_norm": 7.650821023466303, "learning_rate": 4.269727604732787e-06, "loss": 0.2926826477050781, "step": 83475 }, { "epoch": 0.7218268756863322, "grad_norm": 5.301087402170309, "learning_rate": 4.2695430162068435e-06, "loss": 0.16665172576904297, "step": 83480 }, { "epoch": 0.7218701092078754, "grad_norm": 7.477096107624505, "learning_rate": 4.26935842182588e-06, "loss": 0.06351318359375, "step": 83485 }, { "epoch": 0.7219133427294187, "grad_norm": 12.110224858963607, "learning_rate": 4.269173821590748e-06, "loss": 0.10377273559570313, "step": 83490 }, { "epoch": 0.721956576250962, "grad_norm": 4.673974478869529, "learning_rate": 4.268989215502297e-06, "loss": 0.1884796142578125, "step": 83495 }, { "epoch": 0.7219998097725052, "grad_norm": 29.811995833881454, "learning_rate": 4.26880460356138e-06, "loss": 0.1991241455078125, "step": 83500 }, { "epoch": 0.7220430432940484, "grad_norm": 3.3822792150992074, "learning_rate": 4.268619985768849e-06, "loss": 0.3902439117431641, "step": 83505 }, { "epoch": 0.7220862768155918, "grad_norm": 18.224474677710663, "learning_rate": 4.2684353621255555e-06, "loss": 0.45353240966796876, "step": 83510 }, { "epoch": 0.722129510337135, "grad_norm": 4.216237486186528, "learning_rate": 4.26825073263235e-06, "loss": 0.020578765869140626, "step": 83515 }, { "epoch": 0.7221727438586782, "grad_norm": 31.274979267553878, "learning_rate": 4.2680660972900825e-06, "loss": 0.34396209716796877, "step": 83520 }, { "epoch": 0.7222159773802215, "grad_norm": 7.093111303890063, "learning_rate": 4.267881456099608e-06, "loss": 0.050787353515625, "step": 83525 }, { "epoch": 0.7222592109017648, "grad_norm": 42.52931759449287, "learning_rate": 4.2676968090617745e-06, "loss": 0.1317047119140625, "step": 83530 }, { "epoch": 0.722302444423308, "grad_norm": 0.08099071969165679, "learning_rate": 4.267512156177437e-06, "loss": 0.04294166564941406, "step": 83535 }, { "epoch": 0.7223456779448513, "grad_norm": 64.43843933583743, "learning_rate": 4.267327497447445e-06, "loss": 0.2934722900390625, "step": 83540 }, { "epoch": 0.7223889114663946, "grad_norm": 38.34344386284737, "learning_rate": 4.2671428328726495e-06, "loss": 0.2171539306640625, "step": 83545 }, { "epoch": 0.7224321449879378, "grad_norm": 3.4582403789988394, "learning_rate": 4.266958162453905e-06, "loss": 0.074359130859375, "step": 83550 }, { "epoch": 0.7224753785094811, "grad_norm": 1.9084468819700346, "learning_rate": 4.266773486192061e-06, "loss": 0.1510761260986328, "step": 83555 }, { "epoch": 0.7225186120310244, "grad_norm": 16.16260615270764, "learning_rate": 4.266588804087968e-06, "loss": 0.17591094970703125, "step": 83560 }, { "epoch": 0.7225618455525676, "grad_norm": 2.827089747867661, "learning_rate": 4.266404116142481e-06, "loss": 0.19447021484375, "step": 83565 }, { "epoch": 0.7226050790741109, "grad_norm": 1.6417608460613256, "learning_rate": 4.2662194223564495e-06, "loss": 0.244976806640625, "step": 83570 }, { "epoch": 0.7226483125956542, "grad_norm": 28.320254278708326, "learning_rate": 4.266034722730726e-06, "loss": 0.19554557800292968, "step": 83575 }, { "epoch": 0.7226915461171974, "grad_norm": 26.57204207647679, "learning_rate": 4.265850017266162e-06, "loss": 0.08645706176757813, "step": 83580 }, { "epoch": 0.7227347796387407, "grad_norm": 0.11505249912149565, "learning_rate": 4.26566530596361e-06, "loss": 0.1762237548828125, "step": 83585 }, { "epoch": 0.722778013160284, "grad_norm": 2.522168863291161, "learning_rate": 4.265480588823921e-06, "loss": 0.1426219940185547, "step": 83590 }, { "epoch": 0.7228212466818272, "grad_norm": 3.549745449879666, "learning_rate": 4.265295865847947e-06, "loss": 0.0675079345703125, "step": 83595 }, { "epoch": 0.7228644802033705, "grad_norm": 14.612126990437842, "learning_rate": 4.265111137036542e-06, "loss": 0.128594970703125, "step": 83600 }, { "epoch": 0.7229077137249138, "grad_norm": 0.19911765118009228, "learning_rate": 4.2649264023905545e-06, "loss": 0.04078216552734375, "step": 83605 }, { "epoch": 0.722950947246457, "grad_norm": 2.0733885667614924, "learning_rate": 4.2647416619108385e-06, "loss": 0.06103591918945313, "step": 83610 }, { "epoch": 0.7229941807680003, "grad_norm": 2.491219889021065, "learning_rate": 4.264556915598246e-06, "loss": 0.270562744140625, "step": 83615 }, { "epoch": 0.7230374142895435, "grad_norm": 0.13494743297061237, "learning_rate": 4.264372163453629e-06, "loss": 0.16216278076171875, "step": 83620 }, { "epoch": 0.7230806478110868, "grad_norm": 11.281960222179785, "learning_rate": 4.264187405477839e-06, "loss": 0.3336189270019531, "step": 83625 }, { "epoch": 0.7231238813326301, "grad_norm": 10.29118900032558, "learning_rate": 4.264002641671729e-06, "loss": 0.1022979736328125, "step": 83630 }, { "epoch": 0.7231671148541733, "grad_norm": 21.54360683056263, "learning_rate": 4.263817872036149e-06, "loss": 0.323126220703125, "step": 83635 }, { "epoch": 0.7232103483757166, "grad_norm": 13.22684532899637, "learning_rate": 4.263633096571953e-06, "loss": 0.22825469970703124, "step": 83640 }, { "epoch": 0.7232535818972599, "grad_norm": 2.28885118486056, "learning_rate": 4.263448315279994e-06, "loss": 0.1415264129638672, "step": 83645 }, { "epoch": 0.7232968154188031, "grad_norm": 5.932942874184703, "learning_rate": 4.263263528161123e-06, "loss": 0.127349853515625, "step": 83650 }, { "epoch": 0.7233400489403464, "grad_norm": 3.4275346071028188, "learning_rate": 4.263078735216191e-06, "loss": 0.06560897827148438, "step": 83655 }, { "epoch": 0.7233832824618897, "grad_norm": 3.296052230623242, "learning_rate": 4.262893936446053e-06, "loss": 0.28267059326171873, "step": 83660 }, { "epoch": 0.7234265159834329, "grad_norm": 3.8440449104926153, "learning_rate": 4.262709131851558e-06, "loss": 0.24228515625, "step": 83665 }, { "epoch": 0.7234697495049762, "grad_norm": 4.123573805787013, "learning_rate": 4.2625243214335605e-06, "loss": 0.10122413635253906, "step": 83670 }, { "epoch": 0.7235129830265195, "grad_norm": 10.846163206487011, "learning_rate": 4.262339505192913e-06, "loss": 0.1226470947265625, "step": 83675 }, { "epoch": 0.7235562165480627, "grad_norm": 2.9899870760146716, "learning_rate": 4.262154683130467e-06, "loss": 0.233575439453125, "step": 83680 }, { "epoch": 0.723599450069606, "grad_norm": 37.91849072055342, "learning_rate": 4.261969855247075e-06, "loss": 0.3006500244140625, "step": 83685 }, { "epoch": 0.7236426835911492, "grad_norm": 8.28641128708355, "learning_rate": 4.26178502154359e-06, "loss": 0.02668609619140625, "step": 83690 }, { "epoch": 0.7236859171126925, "grad_norm": 22.411914986919317, "learning_rate": 4.261600182020863e-06, "loss": 0.14203262329101562, "step": 83695 }, { "epoch": 0.7237291506342357, "grad_norm": 6.869757128876408, "learning_rate": 4.261415336679749e-06, "loss": 0.6893402099609375, "step": 83700 }, { "epoch": 0.723772384155779, "grad_norm": 2.609129645646317, "learning_rate": 4.2612304855210975e-06, "loss": 0.029213714599609374, "step": 83705 }, { "epoch": 0.7238156176773223, "grad_norm": 7.189377611117482, "learning_rate": 4.261045628545763e-06, "loss": 0.2597923278808594, "step": 83710 }, { "epoch": 0.7238588511988655, "grad_norm": 1.460017405548987, "learning_rate": 4.260860765754599e-06, "loss": 0.359716796875, "step": 83715 }, { "epoch": 0.7239020847204088, "grad_norm": 7.278219215127159, "learning_rate": 4.260675897148454e-06, "loss": 0.10433540344238282, "step": 83720 }, { "epoch": 0.7239453182419521, "grad_norm": 5.731116466580788, "learning_rate": 4.260491022728184e-06, "loss": 0.19917984008789064, "step": 83725 }, { "epoch": 0.7239885517634953, "grad_norm": 0.5299689963106804, "learning_rate": 4.260306142494641e-06, "loss": 0.04079360961914062, "step": 83730 }, { "epoch": 0.7240317852850386, "grad_norm": 0.9294462979912831, "learning_rate": 4.260121256448678e-06, "loss": 0.2201251983642578, "step": 83735 }, { "epoch": 0.7240750188065819, "grad_norm": 9.545557160749436, "learning_rate": 4.259936364591146e-06, "loss": 0.16157684326171876, "step": 83740 }, { "epoch": 0.7241182523281251, "grad_norm": 6.67215870042354, "learning_rate": 4.2597514669229e-06, "loss": 0.23767242431640626, "step": 83745 }, { "epoch": 0.7241614858496684, "grad_norm": 3.6866115369054286, "learning_rate": 4.25956656344479e-06, "loss": 0.17256011962890624, "step": 83750 }, { "epoch": 0.7242047193712117, "grad_norm": 6.205477935365689, "learning_rate": 4.259381654157671e-06, "loss": 0.30458984375, "step": 83755 }, { "epoch": 0.7242479528927549, "grad_norm": 22.470201996071985, "learning_rate": 4.259196739062396e-06, "loss": 0.21880168914794923, "step": 83760 }, { "epoch": 0.7242911864142982, "grad_norm": 1.5148894173023282, "learning_rate": 4.259011818159816e-06, "loss": 0.10556526184082031, "step": 83765 }, { "epoch": 0.7243344199358415, "grad_norm": 14.535512901549364, "learning_rate": 4.258826891450785e-06, "loss": 0.15438690185546874, "step": 83770 }, { "epoch": 0.7243776534573847, "grad_norm": 2.4311411463775396, "learning_rate": 4.258641958936156e-06, "loss": 0.07741546630859375, "step": 83775 }, { "epoch": 0.724420886978928, "grad_norm": 0.8007119207164002, "learning_rate": 4.25845702061678e-06, "loss": 0.08251876831054687, "step": 83780 }, { "epoch": 0.7244641205004713, "grad_norm": 20.617951548645166, "learning_rate": 4.258272076493513e-06, "loss": 0.283978271484375, "step": 83785 }, { "epoch": 0.7245073540220145, "grad_norm": 1.2953448986613298, "learning_rate": 4.258087126567206e-06, "loss": 0.2373668670654297, "step": 83790 }, { "epoch": 0.7245505875435577, "grad_norm": 5.765596438687675, "learning_rate": 4.257902170838712e-06, "loss": 0.08555831909179687, "step": 83795 }, { "epoch": 0.7245938210651011, "grad_norm": 4.414272610082724, "learning_rate": 4.257717209308885e-06, "loss": 0.3908355712890625, "step": 83800 }, { "epoch": 0.7246370545866443, "grad_norm": 6.466396335217968, "learning_rate": 4.257532241978577e-06, "loss": 0.101470947265625, "step": 83805 }, { "epoch": 0.7246802881081875, "grad_norm": 2.617054193566902, "learning_rate": 4.2573472688486404e-06, "loss": 0.097772216796875, "step": 83810 }, { "epoch": 0.7247235216297309, "grad_norm": 33.89456074273089, "learning_rate": 4.257162289919931e-06, "loss": 0.130792236328125, "step": 83815 }, { "epoch": 0.7247667551512741, "grad_norm": 3.9633830064821303, "learning_rate": 4.256977305193299e-06, "loss": 0.0480072021484375, "step": 83820 }, { "epoch": 0.7248099886728173, "grad_norm": 0.3379362956674923, "learning_rate": 4.256792314669599e-06, "loss": 0.166796875, "step": 83825 }, { "epoch": 0.7248532221943607, "grad_norm": 0.7259519790700418, "learning_rate": 4.256607318349684e-06, "loss": 0.2518341064453125, "step": 83830 }, { "epoch": 0.7248964557159039, "grad_norm": 0.9986437193629554, "learning_rate": 4.256422316234407e-06, "loss": 0.08576202392578125, "step": 83835 }, { "epoch": 0.7249396892374471, "grad_norm": 1.1170563132282156, "learning_rate": 4.256237308324622e-06, "loss": 0.0967864990234375, "step": 83840 }, { "epoch": 0.7249829227589905, "grad_norm": 2.20843922428221, "learning_rate": 4.256052294621181e-06, "loss": 0.01209087371826172, "step": 83845 }, { "epoch": 0.7250261562805337, "grad_norm": 11.886113592114608, "learning_rate": 4.2558672751249375e-06, "loss": 0.14703445434570311, "step": 83850 }, { "epoch": 0.7250693898020769, "grad_norm": 0.6301002765631346, "learning_rate": 4.255682249836745e-06, "loss": 0.05995941162109375, "step": 83855 }, { "epoch": 0.7251126233236203, "grad_norm": 2.1267990040318607, "learning_rate": 4.2554972187574575e-06, "loss": 0.10950698852539062, "step": 83860 }, { "epoch": 0.7251558568451635, "grad_norm": 14.673113021689899, "learning_rate": 4.255312181887927e-06, "loss": 0.10567703247070312, "step": 83865 }, { "epoch": 0.7251990903667067, "grad_norm": 7.407860553270111, "learning_rate": 4.2551271392290085e-06, "loss": 0.22977943420410157, "step": 83870 }, { "epoch": 0.7252423238882499, "grad_norm": 2.455780760280907, "learning_rate": 4.2549420907815545e-06, "loss": 0.23494415283203124, "step": 83875 }, { "epoch": 0.7252855574097933, "grad_norm": 37.899468023416375, "learning_rate": 4.2547570365464175e-06, "loss": 0.5481536865234375, "step": 83880 }, { "epoch": 0.7253287909313365, "grad_norm": 0.22442540065493427, "learning_rate": 4.254571976524453e-06, "loss": 0.33703155517578126, "step": 83885 }, { "epoch": 0.7253720244528797, "grad_norm": 1.3983524575374313, "learning_rate": 4.254386910716512e-06, "loss": 0.082891845703125, "step": 83890 }, { "epoch": 0.7254152579744231, "grad_norm": 5.472263839872116, "learning_rate": 4.254201839123451e-06, "loss": 0.13149795532226563, "step": 83895 }, { "epoch": 0.7254584914959663, "grad_norm": 30.52144655081904, "learning_rate": 4.254016761746121e-06, "loss": 0.26107330322265626, "step": 83900 }, { "epoch": 0.7255017250175095, "grad_norm": 30.958715508651252, "learning_rate": 4.253831678585378e-06, "loss": 0.23492202758789063, "step": 83905 }, { "epoch": 0.7255449585390529, "grad_norm": 2.4198534536206595, "learning_rate": 4.253646589642072e-06, "loss": 0.015382957458496094, "step": 83910 }, { "epoch": 0.7255881920605961, "grad_norm": 6.859481505841653, "learning_rate": 4.253461494917061e-06, "loss": 0.13634986877441407, "step": 83915 }, { "epoch": 0.7256314255821393, "grad_norm": 2.5223083128451003, "learning_rate": 4.253276394411194e-06, "loss": 0.08877182006835938, "step": 83920 }, { "epoch": 0.7256746591036827, "grad_norm": 0.8754990031824113, "learning_rate": 4.2530912881253276e-06, "loss": 0.353631591796875, "step": 83925 }, { "epoch": 0.7257178926252259, "grad_norm": 4.602240890339762, "learning_rate": 4.2529061760603155e-06, "loss": 0.062078857421875, "step": 83930 }, { "epoch": 0.7257611261467691, "grad_norm": 0.8706964094701963, "learning_rate": 4.252721058217011e-06, "loss": 0.24890899658203125, "step": 83935 }, { "epoch": 0.7258043596683125, "grad_norm": 6.474978541163824, "learning_rate": 4.252535934596267e-06, "loss": 0.01450347900390625, "step": 83940 }, { "epoch": 0.7258475931898557, "grad_norm": 16.70666369266751, "learning_rate": 4.252350805198939e-06, "loss": 0.22363052368164063, "step": 83945 }, { "epoch": 0.7258908267113989, "grad_norm": 26.52243632145259, "learning_rate": 4.252165670025878e-06, "loss": 0.25145587921142576, "step": 83950 }, { "epoch": 0.7259340602329423, "grad_norm": 4.275105405464428, "learning_rate": 4.251980529077941e-06, "loss": 0.15537261962890625, "step": 83955 }, { "epoch": 0.7259772937544855, "grad_norm": 0.6830413245959811, "learning_rate": 4.251795382355979e-06, "loss": 0.166717529296875, "step": 83960 }, { "epoch": 0.7260205272760287, "grad_norm": 7.943973558085221, "learning_rate": 4.2516102298608496e-06, "loss": 0.12809600830078124, "step": 83965 }, { "epoch": 0.726063760797572, "grad_norm": 4.489896086299559, "learning_rate": 4.251425071593403e-06, "loss": 0.20390777587890624, "step": 83970 }, { "epoch": 0.7261069943191153, "grad_norm": 6.187214806836528, "learning_rate": 4.251239907554495e-06, "loss": 0.2121795654296875, "step": 83975 }, { "epoch": 0.7261502278406585, "grad_norm": 39.607936969025296, "learning_rate": 4.251054737744978e-06, "loss": 0.3336669921875, "step": 83980 }, { "epoch": 0.7261934613622018, "grad_norm": 42.25640786135254, "learning_rate": 4.250869562165708e-06, "loss": 0.5181350708007812, "step": 83985 }, { "epoch": 0.7262366948837451, "grad_norm": 3.823510408869257, "learning_rate": 4.250684380817538e-06, "loss": 0.25753173828125, "step": 83990 }, { "epoch": 0.7262799284052883, "grad_norm": 11.684044701517278, "learning_rate": 4.250499193701321e-06, "loss": 0.09215660095214843, "step": 83995 }, { "epoch": 0.7263231619268316, "grad_norm": 10.711886453020652, "learning_rate": 4.250314000817914e-06, "loss": 0.1873779296875, "step": 84000 }, { "epoch": 0.7263663954483749, "grad_norm": 0.646007751011855, "learning_rate": 4.2501288021681685e-06, "loss": 0.07654571533203125, "step": 84005 }, { "epoch": 0.7264096289699181, "grad_norm": 13.830015527076636, "learning_rate": 4.24994359775294e-06, "loss": 0.09295196533203125, "step": 84010 }, { "epoch": 0.7264528624914613, "grad_norm": 13.284026854119356, "learning_rate": 4.249758387573082e-06, "loss": 0.28648033142089846, "step": 84015 }, { "epoch": 0.7264960960130047, "grad_norm": 8.589019417251762, "learning_rate": 4.249573171629448e-06, "loss": 0.29635086059570315, "step": 84020 }, { "epoch": 0.7265393295345479, "grad_norm": 8.694752573578276, "learning_rate": 4.249387949922893e-06, "loss": 0.3803955078125, "step": 84025 }, { "epoch": 0.7265825630560911, "grad_norm": 0.7925115866151112, "learning_rate": 4.249202722454272e-06, "loss": 0.1545989990234375, "step": 84030 }, { "epoch": 0.7266257965776345, "grad_norm": 22.114588526066985, "learning_rate": 4.249017489224437e-06, "loss": 0.2475505828857422, "step": 84035 }, { "epoch": 0.7266690300991777, "grad_norm": 3.113275280024159, "learning_rate": 4.248832250234245e-06, "loss": 0.18948898315429688, "step": 84040 }, { "epoch": 0.7267122636207209, "grad_norm": 3.225154475019402, "learning_rate": 4.248647005484548e-06, "loss": 0.2398193359375, "step": 84045 }, { "epoch": 0.7267554971422642, "grad_norm": 26.032620176413396, "learning_rate": 4.248461754976203e-06, "loss": 0.1292724609375, "step": 84050 }, { "epoch": 0.7267987306638075, "grad_norm": 2.035556013516875, "learning_rate": 4.248276498710061e-06, "loss": 0.06427230834960937, "step": 84055 }, { "epoch": 0.7268419641853507, "grad_norm": 0.4353340436171339, "learning_rate": 4.2480912366869785e-06, "loss": 0.025200653076171874, "step": 84060 }, { "epoch": 0.726885197706894, "grad_norm": 4.256592371807554, "learning_rate": 4.24790596890781e-06, "loss": 0.274725341796875, "step": 84065 }, { "epoch": 0.7269284312284373, "grad_norm": 2.48634251927284, "learning_rate": 4.247720695373409e-06, "loss": 0.5948394775390625, "step": 84070 }, { "epoch": 0.7269716647499805, "grad_norm": 34.39437851121331, "learning_rate": 4.247535416084631e-06, "loss": 0.2929046630859375, "step": 84075 }, { "epoch": 0.7270148982715238, "grad_norm": 9.412499516666482, "learning_rate": 4.24735013104233e-06, "loss": 0.09070358276367188, "step": 84080 }, { "epoch": 0.7270581317930671, "grad_norm": 4.6779246038752165, "learning_rate": 4.247164840247359e-06, "loss": 0.07815017700195312, "step": 84085 }, { "epoch": 0.7271013653146103, "grad_norm": 8.028350737800817, "learning_rate": 4.246979543700575e-06, "loss": 0.11791763305664063, "step": 84090 }, { "epoch": 0.7271445988361536, "grad_norm": 8.305639632899597, "learning_rate": 4.246794241402831e-06, "loss": 0.46358299255371094, "step": 84095 }, { "epoch": 0.7271878323576969, "grad_norm": 17.440461753314292, "learning_rate": 4.246608933354983e-06, "loss": 0.21133270263671874, "step": 84100 }, { "epoch": 0.7272310658792401, "grad_norm": 0.9309985504267717, "learning_rate": 4.246423619557885e-06, "loss": 0.12033653259277344, "step": 84105 }, { "epoch": 0.7272742994007834, "grad_norm": 3.1925659339893353, "learning_rate": 4.24623830001239e-06, "loss": 0.232415771484375, "step": 84110 }, { "epoch": 0.7273175329223267, "grad_norm": 0.8666631690951782, "learning_rate": 4.246052974719355e-06, "loss": 0.049163818359375, "step": 84115 }, { "epoch": 0.7273607664438699, "grad_norm": 1.4008568233128496, "learning_rate": 4.245867643679633e-06, "loss": 0.2993156433105469, "step": 84120 }, { "epoch": 0.7274039999654132, "grad_norm": 31.17164291441279, "learning_rate": 4.24568230689408e-06, "loss": 0.31982421875, "step": 84125 }, { "epoch": 0.7274472334869565, "grad_norm": 3.0748126869586145, "learning_rate": 4.245496964363551e-06, "loss": 0.11826324462890625, "step": 84130 }, { "epoch": 0.7274904670084997, "grad_norm": 9.634885566094574, "learning_rate": 4.2453116160889e-06, "loss": 0.15536575317382811, "step": 84135 }, { "epoch": 0.727533700530043, "grad_norm": 27.140186224838722, "learning_rate": 4.2451262620709805e-06, "loss": 0.1512054443359375, "step": 84140 }, { "epoch": 0.7275769340515862, "grad_norm": 1.8051917729312728, "learning_rate": 4.2449409023106495e-06, "loss": 0.44119873046875, "step": 84145 }, { "epoch": 0.7276201675731295, "grad_norm": 1.193583542987602, "learning_rate": 4.244755536808761e-06, "loss": 0.10908012390136719, "step": 84150 }, { "epoch": 0.7276634010946728, "grad_norm": 4.6390835314879695, "learning_rate": 4.2445701655661704e-06, "loss": 0.05146942138671875, "step": 84155 }, { "epoch": 0.727706634616216, "grad_norm": 1.197052817206225, "learning_rate": 4.244384788583732e-06, "loss": 0.2186309814453125, "step": 84160 }, { "epoch": 0.7277498681377593, "grad_norm": 0.6344631221058874, "learning_rate": 4.2441994058623e-06, "loss": 0.18036041259765626, "step": 84165 }, { "epoch": 0.7277931016593026, "grad_norm": 9.16292734961286, "learning_rate": 4.244014017402731e-06, "loss": 0.11030693054199218, "step": 84170 }, { "epoch": 0.7278363351808458, "grad_norm": 1.5663749296997216, "learning_rate": 4.243828623205879e-06, "loss": 0.11447257995605468, "step": 84175 }, { "epoch": 0.7278795687023891, "grad_norm": 0.9663583621852527, "learning_rate": 4.243643223272599e-06, "loss": 0.061894989013671874, "step": 84180 }, { "epoch": 0.7279228022239324, "grad_norm": 0.7365967625759573, "learning_rate": 4.243457817603747e-06, "loss": 0.15886764526367186, "step": 84185 }, { "epoch": 0.7279660357454756, "grad_norm": 3.789577651098886, "learning_rate": 4.243272406200177e-06, "loss": 0.20223541259765626, "step": 84190 }, { "epoch": 0.7280092692670189, "grad_norm": 53.92449031553241, "learning_rate": 4.243086989062746e-06, "loss": 0.13629913330078125, "step": 84195 }, { "epoch": 0.7280525027885622, "grad_norm": 64.5409060201135, "learning_rate": 4.242901566192306e-06, "loss": 0.628167724609375, "step": 84200 }, { "epoch": 0.7280957363101054, "grad_norm": 12.01803878805349, "learning_rate": 4.242716137589713e-06, "loss": 0.13508071899414062, "step": 84205 }, { "epoch": 0.7281389698316487, "grad_norm": 0.4421820181898576, "learning_rate": 4.242530703255824e-06, "loss": 0.07575244903564453, "step": 84210 }, { "epoch": 0.728182203353192, "grad_norm": 28.45948190975162, "learning_rate": 4.242345263191495e-06, "loss": 0.18738250732421874, "step": 84215 }, { "epoch": 0.7282254368747352, "grad_norm": 11.209744552608164, "learning_rate": 4.242159817397578e-06, "loss": 0.1592864990234375, "step": 84220 }, { "epoch": 0.7282686703962784, "grad_norm": 36.423107115173636, "learning_rate": 4.241974365874929e-06, "loss": 0.5447990417480468, "step": 84225 }, { "epoch": 0.7283119039178217, "grad_norm": 2.3248903235849503, "learning_rate": 4.2417889086244045e-06, "loss": 0.033282470703125, "step": 84230 }, { "epoch": 0.728355137439365, "grad_norm": 0.6312310590751623, "learning_rate": 4.2416034456468594e-06, "loss": 0.249542236328125, "step": 84235 }, { "epoch": 0.7283983709609082, "grad_norm": 1.7366138485077316, "learning_rate": 4.241417976943149e-06, "loss": 0.05434417724609375, "step": 84240 }, { "epoch": 0.7284416044824515, "grad_norm": 28.820787708591865, "learning_rate": 4.2412325025141275e-06, "loss": 0.197113037109375, "step": 84245 }, { "epoch": 0.7284848380039948, "grad_norm": 15.21940309636743, "learning_rate": 4.2410470223606525e-06, "loss": 0.2097198486328125, "step": 84250 }, { "epoch": 0.728528071525538, "grad_norm": 25.654889409350858, "learning_rate": 4.2408615364835785e-06, "loss": 0.39983062744140624, "step": 84255 }, { "epoch": 0.7285713050470813, "grad_norm": 6.1813958306273555, "learning_rate": 4.240676044883759e-06, "loss": 0.09600830078125, "step": 84260 }, { "epoch": 0.7286145385686246, "grad_norm": 3.66890192856096, "learning_rate": 4.240490547562053e-06, "loss": 0.05359344482421875, "step": 84265 }, { "epoch": 0.7286577720901678, "grad_norm": 0.37902845949057434, "learning_rate": 4.240305044519313e-06, "loss": 0.058705902099609374, "step": 84270 }, { "epoch": 0.7287010056117111, "grad_norm": 0.4222795949605815, "learning_rate": 4.240119535756397e-06, "loss": 0.1283782958984375, "step": 84275 }, { "epoch": 0.7287442391332544, "grad_norm": 0.8478599834936946, "learning_rate": 4.2399340212741595e-06, "loss": 0.14512481689453124, "step": 84280 }, { "epoch": 0.7287874726547976, "grad_norm": 27.54982134865894, "learning_rate": 4.239748501073455e-06, "loss": 0.11795654296875, "step": 84285 }, { "epoch": 0.7288307061763409, "grad_norm": 6.088326895330389, "learning_rate": 4.239562975155138e-06, "loss": 0.117767333984375, "step": 84290 }, { "epoch": 0.7288739396978842, "grad_norm": 4.300240726891603, "learning_rate": 4.239377443520069e-06, "loss": 0.27310028076171877, "step": 84295 }, { "epoch": 0.7289171732194274, "grad_norm": 0.29892646986487564, "learning_rate": 4.2391919061691e-06, "loss": 0.15272216796875, "step": 84300 }, { "epoch": 0.7289604067409707, "grad_norm": 1.7214913697856, "learning_rate": 4.239006363103087e-06, "loss": 0.02724456787109375, "step": 84305 }, { "epoch": 0.729003640262514, "grad_norm": 24.10673942894621, "learning_rate": 4.238820814322887e-06, "loss": 0.17634429931640624, "step": 84310 }, { "epoch": 0.7290468737840572, "grad_norm": 37.34332154909239, "learning_rate": 4.238635259829354e-06, "loss": 0.15858535766601561, "step": 84315 }, { "epoch": 0.7290901073056004, "grad_norm": 6.150395020647746, "learning_rate": 4.238449699623345e-06, "loss": 0.13323841094970704, "step": 84320 }, { "epoch": 0.7291333408271438, "grad_norm": 0.1036414132597783, "learning_rate": 4.238264133705715e-06, "loss": 0.07196578979492188, "step": 84325 }, { "epoch": 0.729176574348687, "grad_norm": 16.62961483077196, "learning_rate": 4.238078562077321e-06, "loss": 0.156488037109375, "step": 84330 }, { "epoch": 0.7292198078702302, "grad_norm": 35.38042975584637, "learning_rate": 4.237892984739017e-06, "loss": 0.22403717041015625, "step": 84335 }, { "epoch": 0.7292630413917736, "grad_norm": 4.516194248775158, "learning_rate": 4.237707401691661e-06, "loss": 0.10970611572265625, "step": 84340 }, { "epoch": 0.7293062749133168, "grad_norm": 26.11398928447353, "learning_rate": 4.237521812936107e-06, "loss": 0.5223594665527344, "step": 84345 }, { "epoch": 0.72934950843486, "grad_norm": 6.017721536963525, "learning_rate": 4.237336218473213e-06, "loss": 0.11381416320800782, "step": 84350 }, { "epoch": 0.7293927419564034, "grad_norm": 7.930732824076691, "learning_rate": 4.237150618303832e-06, "loss": 0.2372406005859375, "step": 84355 }, { "epoch": 0.7294359754779466, "grad_norm": 18.41478610434044, "learning_rate": 4.236965012428822e-06, "loss": 0.16429977416992186, "step": 84360 }, { "epoch": 0.7294792089994898, "grad_norm": 29.569368055671166, "learning_rate": 4.23677940084904e-06, "loss": 0.1687591552734375, "step": 84365 }, { "epoch": 0.7295224425210332, "grad_norm": 2.3020649395545645, "learning_rate": 4.23659378356534e-06, "loss": 0.29993896484375, "step": 84370 }, { "epoch": 0.7295656760425764, "grad_norm": 17.62964137574456, "learning_rate": 4.236408160578578e-06, "loss": 0.2454132080078125, "step": 84375 }, { "epoch": 0.7296089095641196, "grad_norm": 16.210865113954895, "learning_rate": 4.2362225318896115e-06, "loss": 0.5902755737304688, "step": 84380 }, { "epoch": 0.729652143085663, "grad_norm": 41.276111678549384, "learning_rate": 4.2360368974992955e-06, "loss": 0.42918243408203127, "step": 84385 }, { "epoch": 0.7296953766072062, "grad_norm": 8.328668950023726, "learning_rate": 4.235851257408487e-06, "loss": 0.2185588836669922, "step": 84390 }, { "epoch": 0.7297386101287494, "grad_norm": 0.42863874865421553, "learning_rate": 4.235665611618042e-06, "loss": 0.07691459655761719, "step": 84395 }, { "epoch": 0.7297818436502926, "grad_norm": 42.59907117003878, "learning_rate": 4.235479960128816e-06, "loss": 0.2853370666503906, "step": 84400 }, { "epoch": 0.729825077171836, "grad_norm": 11.955445846469969, "learning_rate": 4.235294302941665e-06, "loss": 0.21484832763671874, "step": 84405 }, { "epoch": 0.7298683106933792, "grad_norm": 6.972058247153089, "learning_rate": 4.235108640057446e-06, "loss": 0.0618804931640625, "step": 84410 }, { "epoch": 0.7299115442149224, "grad_norm": 1.6470391815844305, "learning_rate": 4.234922971477016e-06, "loss": 0.06138038635253906, "step": 84415 }, { "epoch": 0.7299547777364658, "grad_norm": 2.332520284580805, "learning_rate": 4.234737297201229e-06, "loss": 0.16098709106445314, "step": 84420 }, { "epoch": 0.729998011258009, "grad_norm": 6.149047648723553, "learning_rate": 4.234551617230945e-06, "loss": 0.052630615234375, "step": 84425 }, { "epoch": 0.7300412447795522, "grad_norm": 37.25467754046997, "learning_rate": 4.2343659315670155e-06, "loss": 0.1545948028564453, "step": 84430 }, { "epoch": 0.7300844783010956, "grad_norm": 5.019904533586117, "learning_rate": 4.2341802402103e-06, "loss": 0.06952438354492188, "step": 84435 }, { "epoch": 0.7301277118226388, "grad_norm": 3.7141197615460864, "learning_rate": 4.233994543161655e-06, "loss": 0.060955047607421875, "step": 84440 }, { "epoch": 0.730170945344182, "grad_norm": 3.717368323556138, "learning_rate": 4.233808840421936e-06, "loss": 0.0428253173828125, "step": 84445 }, { "epoch": 0.7302141788657254, "grad_norm": 1.2966349141038234, "learning_rate": 4.233623131991999e-06, "loss": 0.13477020263671874, "step": 84450 }, { "epoch": 0.7302574123872686, "grad_norm": 6.797390396539649, "learning_rate": 4.233437417872702e-06, "loss": 0.08897552490234376, "step": 84455 }, { "epoch": 0.7303006459088118, "grad_norm": 1.9618935159343345, "learning_rate": 4.2332516980649e-06, "loss": 0.20350875854492187, "step": 84460 }, { "epoch": 0.7303438794303552, "grad_norm": 5.072370765752823, "learning_rate": 4.23306597256945e-06, "loss": 0.3709678649902344, "step": 84465 }, { "epoch": 0.7303871129518984, "grad_norm": 14.36345824289482, "learning_rate": 4.23288024138721e-06, "loss": 0.0668243408203125, "step": 84470 }, { "epoch": 0.7304303464734416, "grad_norm": 27.019493769722146, "learning_rate": 4.232694504519034e-06, "loss": 0.40690460205078127, "step": 84475 }, { "epoch": 0.7304735799949849, "grad_norm": 10.485300694992718, "learning_rate": 4.232508761965781e-06, "loss": 0.10232734680175781, "step": 84480 }, { "epoch": 0.7305168135165282, "grad_norm": 7.692075434405819, "learning_rate": 4.232323013728305e-06, "loss": 0.12431793212890625, "step": 84485 }, { "epoch": 0.7305600470380714, "grad_norm": 0.8327481627691707, "learning_rate": 4.232137259807465e-06, "loss": 0.07005290985107422, "step": 84490 }, { "epoch": 0.7306032805596147, "grad_norm": 32.610897912763335, "learning_rate": 4.231951500204117e-06, "loss": 0.19680938720703126, "step": 84495 }, { "epoch": 0.730646514081158, "grad_norm": 16.16816892834761, "learning_rate": 4.231765734919117e-06, "loss": 0.09533939361572266, "step": 84500 }, { "epoch": 0.7306897476027012, "grad_norm": 22.674725965293177, "learning_rate": 4.2315799639533226e-06, "loss": 0.1954681396484375, "step": 84505 }, { "epoch": 0.7307329811242445, "grad_norm": 2.778595868576687, "learning_rate": 4.231394187307591e-06, "loss": 0.28274078369140626, "step": 84510 }, { "epoch": 0.7307762146457878, "grad_norm": 18.949742190253097, "learning_rate": 4.2312084049827766e-06, "loss": 0.3318639755249023, "step": 84515 }, { "epoch": 0.730819448167331, "grad_norm": 3.78929725467462, "learning_rate": 4.231022616979739e-06, "loss": 0.10584716796875, "step": 84520 }, { "epoch": 0.7308626816888742, "grad_norm": 3.6637468645247475, "learning_rate": 4.2308368232993335e-06, "loss": 0.16651840209960939, "step": 84525 }, { "epoch": 0.7309059152104176, "grad_norm": 8.404637566083617, "learning_rate": 4.230651023942417e-06, "loss": 0.06157989501953125, "step": 84530 }, { "epoch": 0.7309491487319608, "grad_norm": 13.943801148266175, "learning_rate": 4.2304652189098474e-06, "loss": 0.06962242126464843, "step": 84535 }, { "epoch": 0.730992382253504, "grad_norm": 0.20637988135660884, "learning_rate": 4.23027940820248e-06, "loss": 0.08662986755371094, "step": 84540 }, { "epoch": 0.7310356157750474, "grad_norm": 6.637268091780927, "learning_rate": 4.230093591821173e-06, "loss": 0.607061767578125, "step": 84545 }, { "epoch": 0.7310788492965906, "grad_norm": 1.499517673639293, "learning_rate": 4.229907769766783e-06, "loss": 0.5429718017578125, "step": 84550 }, { "epoch": 0.7311220828181338, "grad_norm": 1.3753754912635685, "learning_rate": 4.229721942040168e-06, "loss": 0.36233062744140626, "step": 84555 }, { "epoch": 0.7311653163396772, "grad_norm": 1.167840902300939, "learning_rate": 4.229536108642183e-06, "loss": 0.17586517333984375, "step": 84560 }, { "epoch": 0.7312085498612204, "grad_norm": 30.677678712672815, "learning_rate": 4.229350269573687e-06, "loss": 0.24127578735351562, "step": 84565 }, { "epoch": 0.7312517833827636, "grad_norm": 55.96114201814945, "learning_rate": 4.229164424835536e-06, "loss": 0.48255462646484376, "step": 84570 }, { "epoch": 0.7312950169043069, "grad_norm": 9.01747440860128, "learning_rate": 4.228978574428586e-06, "loss": 0.06499481201171875, "step": 84575 }, { "epoch": 0.7313382504258502, "grad_norm": 20.234441153120315, "learning_rate": 4.2287927183536965e-06, "loss": 0.403839111328125, "step": 84580 }, { "epoch": 0.7313814839473934, "grad_norm": 8.336891116184018, "learning_rate": 4.228606856611724e-06, "loss": 0.1190765380859375, "step": 84585 }, { "epoch": 0.7314247174689367, "grad_norm": 4.891247610803181, "learning_rate": 4.228420989203525e-06, "loss": 0.19469451904296875, "step": 84590 }, { "epoch": 0.73146795099048, "grad_norm": 2.1873050229088395, "learning_rate": 4.2282351161299555e-06, "loss": 0.1021754264831543, "step": 84595 }, { "epoch": 0.7315111845120232, "grad_norm": 0.6919177339794907, "learning_rate": 4.228049237391875e-06, "loss": 0.15350875854492188, "step": 84600 }, { "epoch": 0.7315544180335665, "grad_norm": 0.6093281318984936, "learning_rate": 4.2278633529901415e-06, "loss": 0.042791748046875, "step": 84605 }, { "epoch": 0.7315976515551098, "grad_norm": 3.4530537779474564, "learning_rate": 4.227677462925609e-06, "loss": 0.31472015380859375, "step": 84610 }, { "epoch": 0.731640885076653, "grad_norm": 1.008094090039079, "learning_rate": 4.227491567199137e-06, "loss": 0.1613300323486328, "step": 84615 }, { "epoch": 0.7316841185981963, "grad_norm": 1.0406546792826712, "learning_rate": 4.227305665811583e-06, "loss": 0.2556312561035156, "step": 84620 }, { "epoch": 0.7317273521197396, "grad_norm": 1.3615454409684955, "learning_rate": 4.227119758763802e-06, "loss": 0.094952392578125, "step": 84625 }, { "epoch": 0.7317705856412828, "grad_norm": 0.42309464526003276, "learning_rate": 4.226933846056654e-06, "loss": 0.30537109375, "step": 84630 }, { "epoch": 0.7318138191628261, "grad_norm": 2.863765135846331, "learning_rate": 4.226747927690996e-06, "loss": 0.0481170654296875, "step": 84635 }, { "epoch": 0.7318570526843694, "grad_norm": 0.8486962909569518, "learning_rate": 4.2265620036676845e-06, "loss": 0.08644561767578125, "step": 84640 }, { "epoch": 0.7319002862059126, "grad_norm": 17.34267485485547, "learning_rate": 4.2263760739875775e-06, "loss": 0.5615234375, "step": 84645 }, { "epoch": 0.7319435197274559, "grad_norm": 9.013604210461768, "learning_rate": 4.2261901386515325e-06, "loss": 0.19283065795898438, "step": 84650 }, { "epoch": 0.7319867532489991, "grad_norm": 29.350771035264593, "learning_rate": 4.226004197660407e-06, "loss": 0.17650318145751953, "step": 84655 }, { "epoch": 0.7320299867705424, "grad_norm": 20.74954995475334, "learning_rate": 4.225818251015057e-06, "loss": 0.34817657470703123, "step": 84660 }, { "epoch": 0.7320732202920857, "grad_norm": 3.3847773285883793, "learning_rate": 4.225632298716344e-06, "loss": 0.09912147521972656, "step": 84665 }, { "epoch": 0.7321164538136289, "grad_norm": 0.43693765643808397, "learning_rate": 4.2254463407651225e-06, "loss": 0.2721435546875, "step": 84670 }, { "epoch": 0.7321596873351722, "grad_norm": 28.262733568422078, "learning_rate": 4.22526037716225e-06, "loss": 0.24478302001953126, "step": 84675 }, { "epoch": 0.7322029208567155, "grad_norm": 4.469023392654252, "learning_rate": 4.225074407908585e-06, "loss": 0.14249343872070314, "step": 84680 }, { "epoch": 0.7322461543782587, "grad_norm": 0.394000093204043, "learning_rate": 4.2248884330049855e-06, "loss": 0.0833892822265625, "step": 84685 }, { "epoch": 0.732289387899802, "grad_norm": 13.794529787145285, "learning_rate": 4.224702452452308e-06, "loss": 0.3736927032470703, "step": 84690 }, { "epoch": 0.7323326214213453, "grad_norm": 4.336977672940649, "learning_rate": 4.224516466251412e-06, "loss": 0.046544647216796874, "step": 84695 }, { "epoch": 0.7323758549428885, "grad_norm": 0.9006709077859371, "learning_rate": 4.224330474403154e-06, "loss": 0.5019630432128906, "step": 84700 }, { "epoch": 0.7324190884644318, "grad_norm": 19.011139529620436, "learning_rate": 4.224144476908391e-06, "loss": 0.11392822265625, "step": 84705 }, { "epoch": 0.732462321985975, "grad_norm": 2.6225413100266737, "learning_rate": 4.223958473767983e-06, "loss": 0.02679290771484375, "step": 84710 }, { "epoch": 0.7325055555075183, "grad_norm": 0.6075720418482047, "learning_rate": 4.223772464982786e-06, "loss": 0.09916229248046875, "step": 84715 }, { "epoch": 0.7325487890290616, "grad_norm": 0.8887460390608654, "learning_rate": 4.223586450553659e-06, "loss": 0.09921875, "step": 84720 }, { "epoch": 0.7325920225506048, "grad_norm": 25.014199353176597, "learning_rate": 4.22340043048146e-06, "loss": 0.26551361083984376, "step": 84725 }, { "epoch": 0.7326352560721481, "grad_norm": 1.1205928440766324, "learning_rate": 4.223214404767046e-06, "loss": 0.12209625244140625, "step": 84730 }, { "epoch": 0.7326784895936914, "grad_norm": 1.8692212052575372, "learning_rate": 4.223028373411274e-06, "loss": 0.042508697509765624, "step": 84735 }, { "epoch": 0.7327217231152346, "grad_norm": 52.49028071671379, "learning_rate": 4.222842336415004e-06, "loss": 0.32745018005371096, "step": 84740 }, { "epoch": 0.7327649566367779, "grad_norm": 1.1802559826955863, "learning_rate": 4.222656293779093e-06, "loss": 0.2757083892822266, "step": 84745 }, { "epoch": 0.7328081901583211, "grad_norm": 13.470374861382464, "learning_rate": 4.2224702455044e-06, "loss": 0.1548736572265625, "step": 84750 }, { "epoch": 0.7328514236798644, "grad_norm": 1.069399134521909, "learning_rate": 4.2222841915917806e-06, "loss": 0.683837890625, "step": 84755 }, { "epoch": 0.7328946572014077, "grad_norm": 3.2207310016881623, "learning_rate": 4.222098132042096e-06, "loss": 0.30956497192382815, "step": 84760 }, { "epoch": 0.7329378907229509, "grad_norm": 13.096364088488889, "learning_rate": 4.221912066856203e-06, "loss": 0.07706146240234375, "step": 84765 }, { "epoch": 0.7329811242444942, "grad_norm": 12.860984161874196, "learning_rate": 4.221725996034958e-06, "loss": 0.20364532470703126, "step": 84770 }, { "epoch": 0.7330243577660375, "grad_norm": 3.5499164018815295, "learning_rate": 4.221539919579222e-06, "loss": 0.31245956420898435, "step": 84775 }, { "epoch": 0.7330675912875807, "grad_norm": 22.24898667613334, "learning_rate": 4.221353837489852e-06, "loss": 0.2131011962890625, "step": 84780 }, { "epoch": 0.733110824809124, "grad_norm": 8.320825287349244, "learning_rate": 4.221167749767705e-06, "loss": 0.10880889892578124, "step": 84785 }, { "epoch": 0.7331540583306673, "grad_norm": 16.70764348382996, "learning_rate": 4.220981656413641e-06, "loss": 0.3207859039306641, "step": 84790 }, { "epoch": 0.7331972918522105, "grad_norm": 4.598823626924354, "learning_rate": 4.220795557428517e-06, "loss": 0.21815261840820313, "step": 84795 }, { "epoch": 0.7332405253737538, "grad_norm": 2.0016312663411235, "learning_rate": 4.220609452813192e-06, "loss": 0.12303466796875, "step": 84800 }, { "epoch": 0.7332837588952971, "grad_norm": 21.396869655964384, "learning_rate": 4.220423342568524e-06, "loss": 0.24690475463867187, "step": 84805 }, { "epoch": 0.7333269924168403, "grad_norm": 1.5620445072249693, "learning_rate": 4.220237226695371e-06, "loss": 0.04062662124633789, "step": 84810 }, { "epoch": 0.7333702259383836, "grad_norm": 2.371806108206899, "learning_rate": 4.220051105194593e-06, "loss": 0.0687103271484375, "step": 84815 }, { "epoch": 0.7334134594599269, "grad_norm": 2.0970927873896157, "learning_rate": 4.219864978067046e-06, "loss": 0.07117156982421875, "step": 84820 }, { "epoch": 0.7334566929814701, "grad_norm": 8.367159925663648, "learning_rate": 4.219678845313589e-06, "loss": 0.1067291259765625, "step": 84825 }, { "epoch": 0.7334999265030133, "grad_norm": 0.37043139554214816, "learning_rate": 4.2194927069350825e-06, "loss": 0.10085792541503906, "step": 84830 }, { "epoch": 0.7335431600245567, "grad_norm": 30.532266375063625, "learning_rate": 4.219306562932382e-06, "loss": 0.2832965850830078, "step": 84835 }, { "epoch": 0.7335863935460999, "grad_norm": 0.5688634127603264, "learning_rate": 4.219120413306348e-06, "loss": 0.18505859375, "step": 84840 }, { "epoch": 0.7336296270676431, "grad_norm": 69.5283950706316, "learning_rate": 4.218934258057839e-06, "loss": 0.2821063995361328, "step": 84845 }, { "epoch": 0.7336728605891865, "grad_norm": 2.4949834505910773, "learning_rate": 4.218748097187712e-06, "loss": 0.2761688232421875, "step": 84850 }, { "epoch": 0.7337160941107297, "grad_norm": 20.823890606669032, "learning_rate": 4.2185619306968255e-06, "loss": 0.1384765625, "step": 84855 }, { "epoch": 0.7337593276322729, "grad_norm": 24.14613078496927, "learning_rate": 4.2183757585860406e-06, "loss": 0.15248184204101561, "step": 84860 }, { "epoch": 0.7338025611538163, "grad_norm": 44.17159079843773, "learning_rate": 4.218189580856214e-06, "loss": 0.23032569885253906, "step": 84865 }, { "epoch": 0.7338457946753595, "grad_norm": 21.40881405125596, "learning_rate": 4.218003397508205e-06, "loss": 0.20450439453125, "step": 84870 }, { "epoch": 0.7338890281969027, "grad_norm": 2.2812702218222896, "learning_rate": 4.217817208542872e-06, "loss": 0.0794830322265625, "step": 84875 }, { "epoch": 0.733932261718446, "grad_norm": 2.115709504101234, "learning_rate": 4.217631013961072e-06, "loss": 0.21196212768554687, "step": 84880 }, { "epoch": 0.7339754952399893, "grad_norm": 9.617984769134603, "learning_rate": 4.217444813763667e-06, "loss": 0.38827972412109374, "step": 84885 }, { "epoch": 0.7340187287615325, "grad_norm": 8.838541354956813, "learning_rate": 4.217258607951514e-06, "loss": 0.13236465454101562, "step": 84890 }, { "epoch": 0.7340619622830759, "grad_norm": 1.02715522797999, "learning_rate": 4.217072396525471e-06, "loss": 0.317547607421875, "step": 84895 }, { "epoch": 0.7341051958046191, "grad_norm": 4.386274125237071, "learning_rate": 4.216886179486399e-06, "loss": 0.0441864013671875, "step": 84900 }, { "epoch": 0.7341484293261623, "grad_norm": 6.182795003520763, "learning_rate": 4.216699956835154e-06, "loss": 0.0792144775390625, "step": 84905 }, { "epoch": 0.7341916628477057, "grad_norm": 0.8971404938641288, "learning_rate": 4.216513728572598e-06, "loss": 0.15880126953125, "step": 84910 }, { "epoch": 0.7342348963692489, "grad_norm": 14.457478271385417, "learning_rate": 4.216327494699586e-06, "loss": 0.1565582275390625, "step": 84915 }, { "epoch": 0.7342781298907921, "grad_norm": 10.743076716630299, "learning_rate": 4.2161412552169815e-06, "loss": 0.08974151611328125, "step": 84920 }, { "epoch": 0.7343213634123353, "grad_norm": 15.774113001888091, "learning_rate": 4.2159550101256396e-06, "loss": 0.12317352294921875, "step": 84925 }, { "epoch": 0.7343645969338787, "grad_norm": 4.448761194027551, "learning_rate": 4.21576875942642e-06, "loss": 0.0381988525390625, "step": 84930 }, { "epoch": 0.7344078304554219, "grad_norm": 5.488937238709093, "learning_rate": 4.2155825031201835e-06, "loss": 0.6163864135742188, "step": 84935 }, { "epoch": 0.7344510639769651, "grad_norm": 1.5821624600877955, "learning_rate": 4.215396241207787e-06, "loss": 0.17851791381835938, "step": 84940 }, { "epoch": 0.7344942974985085, "grad_norm": 19.096762373170797, "learning_rate": 4.215209973690092e-06, "loss": 0.1361175537109375, "step": 84945 }, { "epoch": 0.7345375310200517, "grad_norm": 21.099526202034145, "learning_rate": 4.2150237005679545e-06, "loss": 0.17171745300292968, "step": 84950 }, { "epoch": 0.7345807645415949, "grad_norm": 2.2906291402296195, "learning_rate": 4.214837421842236e-06, "loss": 0.07509307861328125, "step": 84955 }, { "epoch": 0.7346239980631383, "grad_norm": 6.153921151680609, "learning_rate": 4.214651137513794e-06, "loss": 0.09506893157958984, "step": 84960 }, { "epoch": 0.7346672315846815, "grad_norm": 4.976263787579256, "learning_rate": 4.214464847583488e-06, "loss": 0.19196624755859376, "step": 84965 }, { "epoch": 0.7347104651062247, "grad_norm": 0.026486674851587566, "learning_rate": 4.214278552052178e-06, "loss": 0.2668478012084961, "step": 84970 }, { "epoch": 0.7347536986277681, "grad_norm": 13.507113157251185, "learning_rate": 4.214092250920722e-06, "loss": 0.284832763671875, "step": 84975 }, { "epoch": 0.7347969321493113, "grad_norm": 2.7710511560664735, "learning_rate": 4.21390594418998e-06, "loss": 0.24014892578125, "step": 84980 }, { "epoch": 0.7348401656708545, "grad_norm": 1.612082909512949, "learning_rate": 4.213719631860812e-06, "loss": 0.25174102783203123, "step": 84985 }, { "epoch": 0.7348833991923979, "grad_norm": 0.5023850650000301, "learning_rate": 4.213533313934075e-06, "loss": 0.022522735595703124, "step": 84990 }, { "epoch": 0.7349266327139411, "grad_norm": 7.963612663492367, "learning_rate": 4.21334699041063e-06, "loss": 0.21880216598510743, "step": 84995 }, { "epoch": 0.7349698662354843, "grad_norm": 2.976425129500343, "learning_rate": 4.213160661291336e-06, "loss": 0.4348804473876953, "step": 85000 }, { "epoch": 0.7350130997570276, "grad_norm": 1.7600668394621226, "learning_rate": 4.2129743265770535e-06, "loss": 0.22443618774414062, "step": 85005 }, { "epoch": 0.7350563332785709, "grad_norm": 1.4376003562856154, "learning_rate": 4.212787986268639e-06, "loss": 0.06662750244140625, "step": 85010 }, { "epoch": 0.7350995668001141, "grad_norm": 8.73816176229515, "learning_rate": 4.212601640366954e-06, "loss": 0.30009765625, "step": 85015 }, { "epoch": 0.7351428003216574, "grad_norm": 19.07638477903776, "learning_rate": 4.2124152888728565e-06, "loss": 0.062841796875, "step": 85020 }, { "epoch": 0.7351860338432007, "grad_norm": 11.12227536341906, "learning_rate": 4.212228931787208e-06, "loss": 0.06998748779296875, "step": 85025 }, { "epoch": 0.7352292673647439, "grad_norm": 25.151638446534406, "learning_rate": 4.212042569110866e-06, "loss": 0.3422737121582031, "step": 85030 }, { "epoch": 0.7352725008862872, "grad_norm": 1.846901234614829, "learning_rate": 4.2118562008446915e-06, "loss": 0.05770263671875, "step": 85035 }, { "epoch": 0.7353157344078305, "grad_norm": 3.1558569726959784, "learning_rate": 4.211669826989542e-06, "loss": 0.09407958984375, "step": 85040 }, { "epoch": 0.7353589679293737, "grad_norm": 18.26975015898733, "learning_rate": 4.21148344754628e-06, "loss": 0.17505836486816406, "step": 85045 }, { "epoch": 0.735402201450917, "grad_norm": 24.05794099295921, "learning_rate": 4.211297062515763e-06, "loss": 0.18872356414794922, "step": 85050 }, { "epoch": 0.7354454349724603, "grad_norm": 0.6752077466699845, "learning_rate": 4.21111067189885e-06, "loss": 0.05340576171875, "step": 85055 }, { "epoch": 0.7354886684940035, "grad_norm": 0.9210523703051186, "learning_rate": 4.210924275696402e-06, "loss": 0.31389503479003905, "step": 85060 }, { "epoch": 0.7355319020155467, "grad_norm": 2.080431905896346, "learning_rate": 4.210737873909279e-06, "loss": 0.015625762939453124, "step": 85065 }, { "epoch": 0.7355751355370901, "grad_norm": 2.575545938717458, "learning_rate": 4.210551466538339e-06, "loss": 0.10521240234375, "step": 85070 }, { "epoch": 0.7356183690586333, "grad_norm": 19.06410812031101, "learning_rate": 4.210365053584443e-06, "loss": 0.12866973876953125, "step": 85075 }, { "epoch": 0.7356616025801765, "grad_norm": 5.04730718811967, "learning_rate": 4.21017863504845e-06, "loss": 0.06499176025390625, "step": 85080 }, { "epoch": 0.7357048361017199, "grad_norm": 21.539206227388217, "learning_rate": 4.209992210931221e-06, "loss": 0.09638900756835937, "step": 85085 }, { "epoch": 0.7357480696232631, "grad_norm": 35.09605431737974, "learning_rate": 4.209805781233614e-06, "loss": 0.3890899658203125, "step": 85090 }, { "epoch": 0.7357913031448063, "grad_norm": 1.061607483142542, "learning_rate": 4.2096193459564904e-06, "loss": 0.083294677734375, "step": 85095 }, { "epoch": 0.7358345366663496, "grad_norm": 6.601675238140127, "learning_rate": 4.209432905100708e-06, "loss": 0.13037109375, "step": 85100 }, { "epoch": 0.7358777701878929, "grad_norm": 1.8077638545522088, "learning_rate": 4.209246458667128e-06, "loss": 0.1125274658203125, "step": 85105 }, { "epoch": 0.7359210037094361, "grad_norm": 10.920522508876722, "learning_rate": 4.2090600066566115e-06, "loss": 0.1216339111328125, "step": 85110 }, { "epoch": 0.7359642372309794, "grad_norm": 5.378251613724262, "learning_rate": 4.208873549070017e-06, "loss": 0.26207275390625, "step": 85115 }, { "epoch": 0.7360074707525227, "grad_norm": 14.239748638382228, "learning_rate": 4.208687085908203e-06, "loss": 0.0927642822265625, "step": 85120 }, { "epoch": 0.7360507042740659, "grad_norm": 47.592454342741334, "learning_rate": 4.208500617172032e-06, "loss": 0.8566364288330078, "step": 85125 }, { "epoch": 0.7360939377956092, "grad_norm": 15.777194299346146, "learning_rate": 4.208314142862362e-06, "loss": 0.29021873474121096, "step": 85130 }, { "epoch": 0.7361371713171525, "grad_norm": 6.130989719951991, "learning_rate": 4.2081276629800546e-06, "loss": 0.1365234375, "step": 85135 }, { "epoch": 0.7361804048386957, "grad_norm": 3.804189314541226, "learning_rate": 4.20794117752597e-06, "loss": 0.1540740966796875, "step": 85140 }, { "epoch": 0.736223638360239, "grad_norm": 30.2080703599434, "learning_rate": 4.2077546865009665e-06, "loss": 0.10758399963378906, "step": 85145 }, { "epoch": 0.7362668718817823, "grad_norm": 32.315530176363616, "learning_rate": 4.207568189905906e-06, "loss": 0.1585418701171875, "step": 85150 }, { "epoch": 0.7363101054033255, "grad_norm": 3.5827519921399174, "learning_rate": 4.207381687741647e-06, "loss": 0.0541412353515625, "step": 85155 }, { "epoch": 0.7363533389248688, "grad_norm": 60.27469003781032, "learning_rate": 4.20719518000905e-06, "loss": 0.3616813659667969, "step": 85160 }, { "epoch": 0.7363965724464121, "grad_norm": 1.9986553063551031, "learning_rate": 4.207008666708975e-06, "loss": 0.19809513092041015, "step": 85165 }, { "epoch": 0.7364398059679553, "grad_norm": 2.21677865560416, "learning_rate": 4.206822147842284e-06, "loss": 0.49593505859375, "step": 85170 }, { "epoch": 0.7364830394894986, "grad_norm": 1.9997398712285193, "learning_rate": 4.206635623409836e-06, "loss": 0.04060287475585937, "step": 85175 }, { "epoch": 0.7365262730110418, "grad_norm": 27.547745828240366, "learning_rate": 4.20644909341249e-06, "loss": 0.2191558837890625, "step": 85180 }, { "epoch": 0.7365695065325851, "grad_norm": 28.751980059708572, "learning_rate": 4.2062625578511075e-06, "loss": 0.13500823974609374, "step": 85185 }, { "epoch": 0.7366127400541284, "grad_norm": 0.3364284176795268, "learning_rate": 4.206076016726549e-06, "loss": 0.165826416015625, "step": 85190 }, { "epoch": 0.7366559735756716, "grad_norm": 38.074745805974956, "learning_rate": 4.205889470039676e-06, "loss": 0.20742015838623046, "step": 85195 }, { "epoch": 0.7366992070972149, "grad_norm": 6.373511065050166, "learning_rate": 4.205702917791345e-06, "loss": 0.07497596740722656, "step": 85200 }, { "epoch": 0.7367424406187582, "grad_norm": 36.587133499252566, "learning_rate": 4.20551635998242e-06, "loss": 0.23169784545898436, "step": 85205 }, { "epoch": 0.7367856741403014, "grad_norm": 2.184320985964636, "learning_rate": 4.2053297966137595e-06, "loss": 0.0410888671875, "step": 85210 }, { "epoch": 0.7368289076618447, "grad_norm": 0.5326494185574132, "learning_rate": 4.2051432276862246e-06, "loss": 0.1193939208984375, "step": 85215 }, { "epoch": 0.736872141183388, "grad_norm": 43.0118097058009, "learning_rate": 4.204956653200675e-06, "loss": 0.26697845458984376, "step": 85220 }, { "epoch": 0.7369153747049312, "grad_norm": 13.613238391493288, "learning_rate": 4.2047700731579735e-06, "loss": 0.12857093811035156, "step": 85225 }, { "epoch": 0.7369586082264745, "grad_norm": 41.26334469752486, "learning_rate": 4.204583487558977e-06, "loss": 0.3560943603515625, "step": 85230 }, { "epoch": 0.7370018417480177, "grad_norm": 8.176512957179972, "learning_rate": 4.204396896404549e-06, "loss": 0.332086181640625, "step": 85235 }, { "epoch": 0.737045075269561, "grad_norm": 6.152353421684419, "learning_rate": 4.204210299695549e-06, "loss": 0.1060394287109375, "step": 85240 }, { "epoch": 0.7370883087911043, "grad_norm": 3.1352909076744644, "learning_rate": 4.204023697432837e-06, "loss": 0.09970474243164062, "step": 85245 }, { "epoch": 0.7371315423126475, "grad_norm": 5.9476361911766515, "learning_rate": 4.203837089617274e-06, "loss": 0.060660934448242186, "step": 85250 }, { "epoch": 0.7371747758341908, "grad_norm": 12.13998964792482, "learning_rate": 4.203650476249721e-06, "loss": 0.19753570556640626, "step": 85255 }, { "epoch": 0.7372180093557341, "grad_norm": 4.5187617366187185, "learning_rate": 4.203463857331038e-06, "loss": 0.09166526794433594, "step": 85260 }, { "epoch": 0.7372612428772773, "grad_norm": 1.8531467277841753, "learning_rate": 4.203277232862087e-06, "loss": 0.10064678192138672, "step": 85265 }, { "epoch": 0.7373044763988206, "grad_norm": 1.2721850084019137, "learning_rate": 4.203090602843727e-06, "loss": 0.0937896728515625, "step": 85270 }, { "epoch": 0.7373477099203638, "grad_norm": 1.4866666923083227, "learning_rate": 4.202903967276819e-06, "loss": 0.17871856689453125, "step": 85275 }, { "epoch": 0.7373909434419071, "grad_norm": 1.3251184199184907, "learning_rate": 4.202717326162225e-06, "loss": 0.11210479736328124, "step": 85280 }, { "epoch": 0.7374341769634504, "grad_norm": 1.6782119223922762, "learning_rate": 4.202530679500805e-06, "loss": 0.17375411987304687, "step": 85285 }, { "epoch": 0.7374774104849936, "grad_norm": 4.8431913854770965, "learning_rate": 4.202344027293419e-06, "loss": 0.22406234741210937, "step": 85290 }, { "epoch": 0.7375206440065369, "grad_norm": 4.988963168793822, "learning_rate": 4.20215736954093e-06, "loss": 0.12262191772460937, "step": 85295 }, { "epoch": 0.7375638775280802, "grad_norm": 5.5033759170064265, "learning_rate": 4.201970706244196e-06, "loss": 0.1309112548828125, "step": 85300 }, { "epoch": 0.7376071110496234, "grad_norm": 1.8243987857396053, "learning_rate": 4.201784037404079e-06, "loss": 0.04072113037109375, "step": 85305 }, { "epoch": 0.7376503445711667, "grad_norm": 13.063587783925207, "learning_rate": 4.201597363021441e-06, "loss": 0.1414886474609375, "step": 85310 }, { "epoch": 0.73769357809271, "grad_norm": 27.951570867767256, "learning_rate": 4.201410683097143e-06, "loss": 0.050351715087890624, "step": 85315 }, { "epoch": 0.7377368116142532, "grad_norm": 4.080298340144235, "learning_rate": 4.2012239976320435e-06, "loss": 0.2671318054199219, "step": 85320 }, { "epoch": 0.7377800451357965, "grad_norm": 36.06133394341363, "learning_rate": 4.201037306627006e-06, "loss": 0.4202779769897461, "step": 85325 }, { "epoch": 0.7378232786573398, "grad_norm": 35.47652499836889, "learning_rate": 4.20085061008289e-06, "loss": 0.1895111083984375, "step": 85330 }, { "epoch": 0.737866512178883, "grad_norm": 29.678846056060422, "learning_rate": 4.200663908000558e-06, "loss": 0.1735870361328125, "step": 85335 }, { "epoch": 0.7379097457004263, "grad_norm": 1.9923022707773852, "learning_rate": 4.200477200380869e-06, "loss": 0.18898468017578124, "step": 85340 }, { "epoch": 0.7379529792219696, "grad_norm": 5.988897837986307, "learning_rate": 4.200290487224687e-06, "loss": 0.0634979248046875, "step": 85345 }, { "epoch": 0.7379962127435128, "grad_norm": 29.30125576593559, "learning_rate": 4.200103768532869e-06, "loss": 0.14344635009765624, "step": 85350 }, { "epoch": 0.738039446265056, "grad_norm": 14.030074192108257, "learning_rate": 4.19991704430628e-06, "loss": 0.11669921875, "step": 85355 }, { "epoch": 0.7380826797865994, "grad_norm": 16.58042564755201, "learning_rate": 4.199730314545779e-06, "loss": 0.319952392578125, "step": 85360 }, { "epoch": 0.7381259133081426, "grad_norm": 18.160908677386328, "learning_rate": 4.199543579252228e-06, "loss": 0.47599258422851565, "step": 85365 }, { "epoch": 0.7381691468296858, "grad_norm": 0.43432571865394565, "learning_rate": 4.199356838426489e-06, "loss": 0.08044662475585937, "step": 85370 }, { "epoch": 0.7382123803512292, "grad_norm": 5.100068677062647, "learning_rate": 4.19917009206942e-06, "loss": 0.1284912109375, "step": 85375 }, { "epoch": 0.7382556138727724, "grad_norm": 13.147254252547768, "learning_rate": 4.198983340181887e-06, "loss": 0.143804931640625, "step": 85380 }, { "epoch": 0.7382988473943156, "grad_norm": 1.591468004951329, "learning_rate": 4.198796582764746e-06, "loss": 0.2910442352294922, "step": 85385 }, { "epoch": 0.738342080915859, "grad_norm": 6.617833416870797, "learning_rate": 4.198609819818863e-06, "loss": 0.10694122314453125, "step": 85390 }, { "epoch": 0.7383853144374022, "grad_norm": 0.51356806647009, "learning_rate": 4.1984230513450975e-06, "loss": 0.2920867919921875, "step": 85395 }, { "epoch": 0.7384285479589454, "grad_norm": 2.4899163508538074, "learning_rate": 4.1982362773443105e-06, "loss": 0.1957935333251953, "step": 85400 }, { "epoch": 0.7384717814804888, "grad_norm": 12.673875202029592, "learning_rate": 4.198049497817364e-06, "loss": 0.06436614990234375, "step": 85405 }, { "epoch": 0.738515015002032, "grad_norm": 0.3396100187018671, "learning_rate": 4.1978627127651184e-06, "loss": 0.16867599487304688, "step": 85410 }, { "epoch": 0.7385582485235752, "grad_norm": 5.434987013411536, "learning_rate": 4.197675922188435e-06, "loss": 0.0981658935546875, "step": 85415 }, { "epoch": 0.7386014820451186, "grad_norm": 1.1588553358913791, "learning_rate": 4.197489126088177e-06, "loss": 0.158660888671875, "step": 85420 }, { "epoch": 0.7386447155666618, "grad_norm": 46.72197078237622, "learning_rate": 4.197302324465206e-06, "loss": 0.3024555206298828, "step": 85425 }, { "epoch": 0.738687949088205, "grad_norm": 12.339248391365757, "learning_rate": 4.197115517320381e-06, "loss": 0.124017333984375, "step": 85430 }, { "epoch": 0.7387311826097483, "grad_norm": 1.3408121910669448, "learning_rate": 4.196928704654565e-06, "loss": 0.09393310546875, "step": 85435 }, { "epoch": 0.7387744161312916, "grad_norm": 7.483300281847848, "learning_rate": 4.1967418864686215e-06, "loss": 0.09131317138671875, "step": 85440 }, { "epoch": 0.7388176496528348, "grad_norm": 0.865633132807852, "learning_rate": 4.196555062763408e-06, "loss": 0.039779281616210936, "step": 85445 }, { "epoch": 0.738860883174378, "grad_norm": 0.6542203659692691, "learning_rate": 4.196368233539789e-06, "loss": 0.19764537811279298, "step": 85450 }, { "epoch": 0.7389041166959214, "grad_norm": 34.02649030783514, "learning_rate": 4.196181398798626e-06, "loss": 0.3023529052734375, "step": 85455 }, { "epoch": 0.7389473502174646, "grad_norm": 41.06396448841012, "learning_rate": 4.19599455854078e-06, "loss": 0.362353515625, "step": 85460 }, { "epoch": 0.7389905837390078, "grad_norm": 24.134417392053063, "learning_rate": 4.195807712767112e-06, "loss": 0.21756439208984374, "step": 85465 }, { "epoch": 0.7390338172605512, "grad_norm": 0.368227128147041, "learning_rate": 4.195620861478484e-06, "loss": 0.054498291015625, "step": 85470 }, { "epoch": 0.7390770507820944, "grad_norm": 8.332571841029894, "learning_rate": 4.195434004675759e-06, "loss": 0.0932281494140625, "step": 85475 }, { "epoch": 0.7391202843036376, "grad_norm": 42.78414624599149, "learning_rate": 4.195247142359799e-06, "loss": 0.20977783203125, "step": 85480 }, { "epoch": 0.739163517825181, "grad_norm": 1.5384305451283657, "learning_rate": 4.195060274531463e-06, "loss": 0.05513458251953125, "step": 85485 }, { "epoch": 0.7392067513467242, "grad_norm": 5.621860848795191, "learning_rate": 4.194873401191616e-06, "loss": 0.28180732727050783, "step": 85490 }, { "epoch": 0.7392499848682674, "grad_norm": 55.036124649928524, "learning_rate": 4.194686522341118e-06, "loss": 0.3642402648925781, "step": 85495 }, { "epoch": 0.7392932183898108, "grad_norm": 1.9498692496028696, "learning_rate": 4.194499637980831e-06, "loss": 0.0656005859375, "step": 85500 }, { "epoch": 0.739336451911354, "grad_norm": 5.659612849990857, "learning_rate": 4.194312748111617e-06, "loss": 0.051943206787109376, "step": 85505 }, { "epoch": 0.7393796854328972, "grad_norm": 28.898301660557607, "learning_rate": 4.194125852734339e-06, "loss": 0.3081512451171875, "step": 85510 }, { "epoch": 0.7394229189544406, "grad_norm": 0.4029405635849502, "learning_rate": 4.193938951849857e-06, "loss": 0.037578582763671875, "step": 85515 }, { "epoch": 0.7394661524759838, "grad_norm": 39.1213718391735, "learning_rate": 4.193752045459036e-06, "loss": 0.12483978271484375, "step": 85520 }, { "epoch": 0.739509385997527, "grad_norm": 0.16514394938756774, "learning_rate": 4.1935651335627335e-06, "loss": 0.3622161865234375, "step": 85525 }, { "epoch": 0.7395526195190703, "grad_norm": 20.60142840715932, "learning_rate": 4.193378216161816e-06, "loss": 0.20938568115234374, "step": 85530 }, { "epoch": 0.7395958530406136, "grad_norm": 4.017583357730669, "learning_rate": 4.193191293257143e-06, "loss": 0.2776020050048828, "step": 85535 }, { "epoch": 0.7396390865621568, "grad_norm": 0.2070227305925609, "learning_rate": 4.193004364849577e-06, "loss": 0.09813385009765625, "step": 85540 }, { "epoch": 0.7396823200837, "grad_norm": 1.8131259144356422, "learning_rate": 4.192817430939981e-06, "loss": 0.05610809326171875, "step": 85545 }, { "epoch": 0.7397255536052434, "grad_norm": 21.49069669915236, "learning_rate": 4.192630491529215e-06, "loss": 0.2191650390625, "step": 85550 }, { "epoch": 0.7397687871267866, "grad_norm": 9.548519868116296, "learning_rate": 4.1924435466181435e-06, "loss": 0.16561031341552734, "step": 85555 }, { "epoch": 0.7398120206483298, "grad_norm": 20.182698993340207, "learning_rate": 4.1922565962076274e-06, "loss": 0.17648487091064452, "step": 85560 }, { "epoch": 0.7398552541698732, "grad_norm": 1.3663738179613303, "learning_rate": 4.19206964029853e-06, "loss": 0.05367908477783203, "step": 85565 }, { "epoch": 0.7398984876914164, "grad_norm": 15.559416614475525, "learning_rate": 4.191882678891713e-06, "loss": 0.11465950012207031, "step": 85570 }, { "epoch": 0.7399417212129596, "grad_norm": 10.716756753610097, "learning_rate": 4.191695711988037e-06, "loss": 0.34005126953125, "step": 85575 }, { "epoch": 0.739984954734503, "grad_norm": 8.397372794824268, "learning_rate": 4.191508739588366e-06, "loss": 0.05738372802734375, "step": 85580 }, { "epoch": 0.7400281882560462, "grad_norm": 4.017729701632622, "learning_rate": 4.191321761693562e-06, "loss": 0.03910140991210938, "step": 85585 }, { "epoch": 0.7400714217775894, "grad_norm": 0.2050683252154956, "learning_rate": 4.191134778304488e-06, "loss": 0.20430908203125, "step": 85590 }, { "epoch": 0.7401146552991328, "grad_norm": 41.10336001858673, "learning_rate": 4.190947789422004e-06, "loss": 0.20036544799804687, "step": 85595 }, { "epoch": 0.740157888820676, "grad_norm": 5.861952755780574, "learning_rate": 4.190760795046976e-06, "loss": 0.05694427490234375, "step": 85600 }, { "epoch": 0.7402011223422192, "grad_norm": 7.547034519162328, "learning_rate": 4.190573795180264e-06, "loss": 0.0371429443359375, "step": 85605 }, { "epoch": 0.7402443558637626, "grad_norm": 11.933574225035915, "learning_rate": 4.19038678982273e-06, "loss": 0.0775146484375, "step": 85610 }, { "epoch": 0.7402875893853058, "grad_norm": 17.44903552658708, "learning_rate": 4.190199778975238e-06, "loss": 0.12620162963867188, "step": 85615 }, { "epoch": 0.740330822906849, "grad_norm": 0.5339316178197823, "learning_rate": 4.190012762638649e-06, "loss": 0.09925689697265624, "step": 85620 }, { "epoch": 0.7403740564283923, "grad_norm": 15.860168409007414, "learning_rate": 4.189825740813828e-06, "loss": 0.14983291625976564, "step": 85625 }, { "epoch": 0.7404172899499356, "grad_norm": 12.860010650752654, "learning_rate": 4.189638713501635e-06, "loss": 0.16473617553710937, "step": 85630 }, { "epoch": 0.7404605234714788, "grad_norm": 10.981910566159945, "learning_rate": 4.189451680702933e-06, "loss": 0.08895339965820312, "step": 85635 }, { "epoch": 0.7405037569930221, "grad_norm": 12.928259215539075, "learning_rate": 4.189264642418584e-06, "loss": 0.46320037841796874, "step": 85640 }, { "epoch": 0.7405469905145654, "grad_norm": 89.95810804575132, "learning_rate": 4.189077598649453e-06, "loss": 0.5575668334960937, "step": 85645 }, { "epoch": 0.7405902240361086, "grad_norm": 0.10326595618701473, "learning_rate": 4.1888905493964006e-06, "loss": 0.02917327880859375, "step": 85650 }, { "epoch": 0.7406334575576519, "grad_norm": 1.7131311713965587, "learning_rate": 4.1887034946602905e-06, "loss": 0.03876190185546875, "step": 85655 }, { "epoch": 0.7406766910791952, "grad_norm": 4.815601788407156, "learning_rate": 4.188516434441984e-06, "loss": 0.1040283203125, "step": 85660 }, { "epoch": 0.7407199246007384, "grad_norm": 15.437626067477947, "learning_rate": 4.1883293687423465e-06, "loss": 0.24755172729492186, "step": 85665 }, { "epoch": 0.7407631581222817, "grad_norm": 0.0693049665088619, "learning_rate": 4.188142297562237e-06, "loss": 0.16251754760742188, "step": 85670 }, { "epoch": 0.740806391643825, "grad_norm": 4.794944861242935, "learning_rate": 4.187955220902521e-06, "loss": 0.09524345397949219, "step": 85675 }, { "epoch": 0.7408496251653682, "grad_norm": 0.7594807976141325, "learning_rate": 4.187768138764061e-06, "loss": 0.10894622802734374, "step": 85680 }, { "epoch": 0.7408928586869115, "grad_norm": 0.09358451101411155, "learning_rate": 4.1875810511477195e-06, "loss": 0.2637825012207031, "step": 85685 }, { "epoch": 0.7409360922084548, "grad_norm": 0.6045742536525764, "learning_rate": 4.187393958054358e-06, "loss": 0.06366405487060547, "step": 85690 }, { "epoch": 0.740979325729998, "grad_norm": 4.325796708346382, "learning_rate": 4.187206859484841e-06, "loss": 0.1151153564453125, "step": 85695 }, { "epoch": 0.7410225592515413, "grad_norm": 29.78120413251435, "learning_rate": 4.187019755440031e-06, "loss": 0.17525253295898438, "step": 85700 }, { "epoch": 0.7410657927730845, "grad_norm": 33.412101465220374, "learning_rate": 4.18683264592079e-06, "loss": 0.07723579406738282, "step": 85705 }, { "epoch": 0.7411090262946278, "grad_norm": 10.159054862333111, "learning_rate": 4.186645530927983e-06, "loss": 0.068939208984375, "step": 85710 }, { "epoch": 0.741152259816171, "grad_norm": 4.874490248376691, "learning_rate": 4.186458410462471e-06, "loss": 0.0462005615234375, "step": 85715 }, { "epoch": 0.7411954933377143, "grad_norm": 21.368497323192983, "learning_rate": 4.186271284525118e-06, "loss": 0.14168548583984375, "step": 85720 }, { "epoch": 0.7412387268592576, "grad_norm": 0.44165821563258306, "learning_rate": 4.186084153116785e-06, "loss": 0.16781158447265626, "step": 85725 }, { "epoch": 0.7412819603808009, "grad_norm": 36.24161339125894, "learning_rate": 4.1858970162383375e-06, "loss": 0.288134765625, "step": 85730 }, { "epoch": 0.7413251939023441, "grad_norm": 1.8543634668572304, "learning_rate": 4.185709873890639e-06, "loss": 0.3261604309082031, "step": 85735 }, { "epoch": 0.7413684274238874, "grad_norm": 0.7937266767639172, "learning_rate": 4.1855227260745495e-06, "loss": 0.08538665771484374, "step": 85740 }, { "epoch": 0.7414116609454307, "grad_norm": 4.878221699991963, "learning_rate": 4.185335572790936e-06, "loss": 0.0501617431640625, "step": 85745 }, { "epoch": 0.7414548944669739, "grad_norm": 11.356180437646792, "learning_rate": 4.185148414040658e-06, "loss": 0.328607177734375, "step": 85750 }, { "epoch": 0.7414981279885172, "grad_norm": 4.046269074550881, "learning_rate": 4.184961249824581e-06, "loss": 0.13604068756103516, "step": 85755 }, { "epoch": 0.7415413615100604, "grad_norm": 7.644098819602781, "learning_rate": 4.184774080143567e-06, "loss": 0.02690887451171875, "step": 85760 }, { "epoch": 0.7415845950316037, "grad_norm": 3.3353779772698937, "learning_rate": 4.1845869049984795e-06, "loss": 0.16831207275390625, "step": 85765 }, { "epoch": 0.741627828553147, "grad_norm": 12.035792941296185, "learning_rate": 4.184399724390183e-06, "loss": 0.09154052734375, "step": 85770 }, { "epoch": 0.7416710620746902, "grad_norm": 3.0555938262551705, "learning_rate": 4.184212538319539e-06, "loss": 0.02053070068359375, "step": 85775 }, { "epoch": 0.7417142955962335, "grad_norm": 5.784643515520763, "learning_rate": 4.184025346787411e-06, "loss": 0.1182403564453125, "step": 85780 }, { "epoch": 0.7417575291177768, "grad_norm": 6.627550888590399, "learning_rate": 4.1838381497946625e-06, "loss": 0.1132568359375, "step": 85785 }, { "epoch": 0.74180076263932, "grad_norm": 7.13653827970006, "learning_rate": 4.183650947342158e-06, "loss": 0.19003067016601563, "step": 85790 }, { "epoch": 0.7418439961608633, "grad_norm": 2.0747505188397635, "learning_rate": 4.183463739430759e-06, "loss": 0.028763580322265624, "step": 85795 }, { "epoch": 0.7418872296824065, "grad_norm": 26.98890110664896, "learning_rate": 4.183276526061331e-06, "loss": 0.378033447265625, "step": 85800 }, { "epoch": 0.7419304632039498, "grad_norm": 58.971415846110574, "learning_rate": 4.183089307234735e-06, "loss": 0.224224853515625, "step": 85805 }, { "epoch": 0.7419736967254931, "grad_norm": 17.532530180778668, "learning_rate": 4.182902082951836e-06, "loss": 0.2361572265625, "step": 85810 }, { "epoch": 0.7420169302470363, "grad_norm": 19.28112302089749, "learning_rate": 4.182714853213497e-06, "loss": 0.1582733154296875, "step": 85815 }, { "epoch": 0.7420601637685796, "grad_norm": 36.944159314319016, "learning_rate": 4.182527618020581e-06, "loss": 0.23109893798828124, "step": 85820 }, { "epoch": 0.7421033972901229, "grad_norm": 15.178969759930817, "learning_rate": 4.182340377373954e-06, "loss": 0.3395263671875, "step": 85825 }, { "epoch": 0.7421466308116661, "grad_norm": 0.47078197531170674, "learning_rate": 4.1821531312744775e-06, "loss": 0.08306045532226562, "step": 85830 }, { "epoch": 0.7421898643332094, "grad_norm": 5.128989017083138, "learning_rate": 4.181965879723013e-06, "loss": 0.21251449584960938, "step": 85835 }, { "epoch": 0.7422330978547527, "grad_norm": 1.080120370616402, "learning_rate": 4.181778622720427e-06, "loss": 0.04507522583007813, "step": 85840 }, { "epoch": 0.7422763313762959, "grad_norm": 3.198978065755641, "learning_rate": 4.181591360267584e-06, "loss": 0.0824493408203125, "step": 85845 }, { "epoch": 0.7423195648978392, "grad_norm": 33.63524761172036, "learning_rate": 4.181404092365344e-06, "loss": 0.254925537109375, "step": 85850 }, { "epoch": 0.7423627984193825, "grad_norm": 21.376872418242492, "learning_rate": 4.181216819014575e-06, "loss": 0.1338134765625, "step": 85855 }, { "epoch": 0.7424060319409257, "grad_norm": 3.453367164909741, "learning_rate": 4.181029540216138e-06, "loss": 0.056708908081054686, "step": 85860 }, { "epoch": 0.742449265462469, "grad_norm": 57.05257740451872, "learning_rate": 4.180842255970896e-06, "loss": 0.203948974609375, "step": 85865 }, { "epoch": 0.7424924989840123, "grad_norm": 8.622180499318164, "learning_rate": 4.180654966279713e-06, "loss": 0.2388702392578125, "step": 85870 }, { "epoch": 0.7425357325055555, "grad_norm": 3.7713457804166333, "learning_rate": 4.180467671143455e-06, "loss": 0.07740306854248047, "step": 85875 }, { "epoch": 0.7425789660270987, "grad_norm": 36.115374645194706, "learning_rate": 4.180280370562985e-06, "loss": 0.21477737426757812, "step": 85880 }, { "epoch": 0.7426221995486421, "grad_norm": 5.955892247988357, "learning_rate": 4.180093064539165e-06, "loss": 0.09137725830078125, "step": 85885 }, { "epoch": 0.7426654330701853, "grad_norm": 0.05414892071516142, "learning_rate": 4.1799057530728605e-06, "loss": 0.07565135955810547, "step": 85890 }, { "epoch": 0.7427086665917285, "grad_norm": 14.465568280385861, "learning_rate": 4.179718436164935e-06, "loss": 0.11909637451171876, "step": 85895 }, { "epoch": 0.7427519001132719, "grad_norm": 25.812687251253735, "learning_rate": 4.179531113816252e-06, "loss": 0.1685516357421875, "step": 85900 }, { "epoch": 0.7427951336348151, "grad_norm": 36.51432523627017, "learning_rate": 4.179343786027676e-06, "loss": 0.6815673828125, "step": 85905 }, { "epoch": 0.7428383671563583, "grad_norm": 8.023972417838339, "learning_rate": 4.179156452800071e-06, "loss": 0.3111530303955078, "step": 85910 }, { "epoch": 0.7428816006779017, "grad_norm": 50.475552436776816, "learning_rate": 4.1789691141343e-06, "loss": 0.8684814453125, "step": 85915 }, { "epoch": 0.7429248341994449, "grad_norm": 5.889551491058872, "learning_rate": 4.178781770031229e-06, "loss": 0.06172599792480469, "step": 85920 }, { "epoch": 0.7429680677209881, "grad_norm": 1.2684850769278235, "learning_rate": 4.1785944204917185e-06, "loss": 0.05396881103515625, "step": 85925 }, { "epoch": 0.7430113012425315, "grad_norm": 5.580356984076044, "learning_rate": 4.178407065516635e-06, "loss": 0.14629440307617186, "step": 85930 }, { "epoch": 0.7430545347640747, "grad_norm": 0.2748937035577271, "learning_rate": 4.178219705106843e-06, "loss": 0.0159271240234375, "step": 85935 }, { "epoch": 0.7430977682856179, "grad_norm": 25.269940632610805, "learning_rate": 4.178032339263206e-06, "loss": 0.20466079711914062, "step": 85940 }, { "epoch": 0.7431410018071612, "grad_norm": 2.902958157849841, "learning_rate": 4.177844967986588e-06, "loss": 0.08743743896484375, "step": 85945 }, { "epoch": 0.7431842353287045, "grad_norm": 25.547245960940344, "learning_rate": 4.177657591277852e-06, "loss": 0.125750732421875, "step": 85950 }, { "epoch": 0.7432274688502477, "grad_norm": 7.354500907145441, "learning_rate": 4.177470209137865e-06, "loss": 0.17019805908203126, "step": 85955 }, { "epoch": 0.7432707023717909, "grad_norm": 4.358502863935871, "learning_rate": 4.177282821567488e-06, "loss": 0.35324630737304685, "step": 85960 }, { "epoch": 0.7433139358933343, "grad_norm": 9.283216041960001, "learning_rate": 4.177095428567587e-06, "loss": 0.06037445068359375, "step": 85965 }, { "epoch": 0.7433571694148775, "grad_norm": 1.564200561988183, "learning_rate": 4.176908030139026e-06, "loss": 0.040810012817382814, "step": 85970 }, { "epoch": 0.7434004029364207, "grad_norm": 28.342119217704425, "learning_rate": 4.1767206262826695e-06, "loss": 0.0795989990234375, "step": 85975 }, { "epoch": 0.7434436364579641, "grad_norm": 0.36221106547998194, "learning_rate": 4.176533216999381e-06, "loss": 0.043927764892578124, "step": 85980 }, { "epoch": 0.7434868699795073, "grad_norm": 40.092217378350284, "learning_rate": 4.176345802290025e-06, "loss": 0.1092529296875, "step": 85985 }, { "epoch": 0.7435301035010505, "grad_norm": 0.23858346235081546, "learning_rate": 4.176158382155467e-06, "loss": 0.15194244384765626, "step": 85990 }, { "epoch": 0.7435733370225939, "grad_norm": 32.5085145691647, "learning_rate": 4.17597095659657e-06, "loss": 0.39964599609375, "step": 85995 }, { "epoch": 0.7436165705441371, "grad_norm": 0.36021267737410256, "learning_rate": 4.175783525614199e-06, "loss": 0.012014007568359375, "step": 86000 }, { "epoch": 0.7436598040656803, "grad_norm": 11.874848741120973, "learning_rate": 4.175596089209218e-06, "loss": 0.14393310546875, "step": 86005 }, { "epoch": 0.7437030375872237, "grad_norm": 5.241644526562773, "learning_rate": 4.175408647382492e-06, "loss": 0.05434684753417969, "step": 86010 }, { "epoch": 0.7437462711087669, "grad_norm": 16.46101287593678, "learning_rate": 4.175221200134885e-06, "loss": 0.080364990234375, "step": 86015 }, { "epoch": 0.7437895046303101, "grad_norm": 7.7598653734594585, "learning_rate": 4.175033747467262e-06, "loss": 0.15843505859375, "step": 86020 }, { "epoch": 0.7438327381518535, "grad_norm": 7.553561504672671, "learning_rate": 4.174846289380486e-06, "loss": 0.353057861328125, "step": 86025 }, { "epoch": 0.7438759716733967, "grad_norm": 2.6078116929964392, "learning_rate": 4.174658825875424e-06, "loss": 0.07360916137695313, "step": 86030 }, { "epoch": 0.7439192051949399, "grad_norm": 1.1071559795029478, "learning_rate": 4.174471356952939e-06, "loss": 0.43463287353515623, "step": 86035 }, { "epoch": 0.7439624387164833, "grad_norm": 6.008838000361411, "learning_rate": 4.1742838826138945e-06, "loss": 0.09102020263671876, "step": 86040 }, { "epoch": 0.7440056722380265, "grad_norm": 1.5007907229151725, "learning_rate": 4.174096402859158e-06, "loss": 0.06967926025390625, "step": 86045 }, { "epoch": 0.7440489057595697, "grad_norm": 6.410530504673818, "learning_rate": 4.173908917689592e-06, "loss": 0.022837448120117187, "step": 86050 }, { "epoch": 0.744092139281113, "grad_norm": 0.9290270983744735, "learning_rate": 4.173721427106063e-06, "loss": 0.14060630798339843, "step": 86055 }, { "epoch": 0.7441353728026563, "grad_norm": 0.6851145614793728, "learning_rate": 4.1735339311094336e-06, "loss": 0.026947975158691406, "step": 86060 }, { "epoch": 0.7441786063241995, "grad_norm": 20.10040428646671, "learning_rate": 4.17334642970057e-06, "loss": 0.05526161193847656, "step": 86065 }, { "epoch": 0.7442218398457427, "grad_norm": 1.4584815756588632, "learning_rate": 4.173158922880336e-06, "loss": 0.06766204833984375, "step": 86070 }, { "epoch": 0.7442650733672861, "grad_norm": 0.6084009426026997, "learning_rate": 4.1729714106495954e-06, "loss": 0.13315887451171876, "step": 86075 }, { "epoch": 0.7443083068888293, "grad_norm": 22.939803454389565, "learning_rate": 4.1727838930092166e-06, "loss": 0.4608802795410156, "step": 86080 }, { "epoch": 0.7443515404103725, "grad_norm": 42.209514911023085, "learning_rate": 4.1725963699600605e-06, "loss": 0.3020050048828125, "step": 86085 }, { "epoch": 0.7443947739319159, "grad_norm": 0.20762522489108232, "learning_rate": 4.172408841502994e-06, "loss": 0.068878173828125, "step": 86090 }, { "epoch": 0.7444380074534591, "grad_norm": 14.7061838332285, "learning_rate": 4.1722213076388805e-06, "loss": 0.3988555908203125, "step": 86095 }, { "epoch": 0.7444812409750023, "grad_norm": 0.5377955087244839, "learning_rate": 4.172033768368588e-06, "loss": 0.014814138412475586, "step": 86100 }, { "epoch": 0.7445244744965457, "grad_norm": 22.89517057272061, "learning_rate": 4.171846223692977e-06, "loss": 0.1217010498046875, "step": 86105 }, { "epoch": 0.7445677080180889, "grad_norm": 9.67788675043155, "learning_rate": 4.171658673612916e-06, "loss": 0.23391494750976563, "step": 86110 }, { "epoch": 0.7446109415396321, "grad_norm": 1.874159894308713, "learning_rate": 4.17147111812927e-06, "loss": 0.16860733032226563, "step": 86115 }, { "epoch": 0.7446541750611755, "grad_norm": 2.4638846314881744, "learning_rate": 4.171283557242901e-06, "loss": 0.05516891479492188, "step": 86120 }, { "epoch": 0.7446974085827187, "grad_norm": 0.3536853346611453, "learning_rate": 4.171095990954676e-06, "loss": 0.12211761474609376, "step": 86125 }, { "epoch": 0.7447406421042619, "grad_norm": 23.72037796958185, "learning_rate": 4.1709084192654604e-06, "loss": 0.2730892181396484, "step": 86130 }, { "epoch": 0.7447838756258052, "grad_norm": 12.4409307527273, "learning_rate": 4.170720842176118e-06, "loss": 0.10974082946777344, "step": 86135 }, { "epoch": 0.7448271091473485, "grad_norm": 2.8582923303250882, "learning_rate": 4.170533259687514e-06, "loss": 0.09728889465332032, "step": 86140 }, { "epoch": 0.7448703426688917, "grad_norm": 2.3303570583870172, "learning_rate": 4.170345671800516e-06, "loss": 0.030397796630859376, "step": 86145 }, { "epoch": 0.744913576190435, "grad_norm": 25.296928536158216, "learning_rate": 4.170158078515985e-06, "loss": 0.15233154296875, "step": 86150 }, { "epoch": 0.7449568097119783, "grad_norm": 3.789410172502339, "learning_rate": 4.169970479834789e-06, "loss": 0.16849365234375, "step": 86155 }, { "epoch": 0.7450000432335215, "grad_norm": 2.4251600577773003, "learning_rate": 4.169782875757794e-06, "loss": 0.2193836212158203, "step": 86160 }, { "epoch": 0.7450432767550648, "grad_norm": 6.428653593687252, "learning_rate": 4.169595266285862e-06, "loss": 0.05625, "step": 86165 }, { "epoch": 0.7450865102766081, "grad_norm": 4.080693863478584, "learning_rate": 4.16940765141986e-06, "loss": 0.22249832153320312, "step": 86170 }, { "epoch": 0.7451297437981513, "grad_norm": 1.2956816277067709, "learning_rate": 4.1692200311606535e-06, "loss": 0.16520004272460936, "step": 86175 }, { "epoch": 0.7451729773196946, "grad_norm": 19.339719887737733, "learning_rate": 4.169032405509108e-06, "loss": 0.07537841796875, "step": 86180 }, { "epoch": 0.7452162108412379, "grad_norm": 3.2907037548367284, "learning_rate": 4.168844774466088e-06, "loss": 0.10957088470458984, "step": 86185 }, { "epoch": 0.7452594443627811, "grad_norm": 0.5432781982583583, "learning_rate": 4.16865713803246e-06, "loss": 0.054627609252929685, "step": 86190 }, { "epoch": 0.7453026778843244, "grad_norm": 0.6087001073390952, "learning_rate": 4.168469496209088e-06, "loss": 0.1438995361328125, "step": 86195 }, { "epoch": 0.7453459114058677, "grad_norm": 2.3929934800432147, "learning_rate": 4.168281848996836e-06, "loss": 0.23953704833984374, "step": 86200 }, { "epoch": 0.7453891449274109, "grad_norm": 33.91126668249374, "learning_rate": 4.1680941963965745e-06, "loss": 0.19542236328125, "step": 86205 }, { "epoch": 0.7454323784489542, "grad_norm": 6.1240440903106865, "learning_rate": 4.1679065384091645e-06, "loss": 0.141961669921875, "step": 86210 }, { "epoch": 0.7454756119704975, "grad_norm": 0.4378961148487985, "learning_rate": 4.167718875035472e-06, "loss": 0.1748016357421875, "step": 86215 }, { "epoch": 0.7455188454920407, "grad_norm": 1.396940512824273, "learning_rate": 4.167531206276364e-06, "loss": 0.34184722900390624, "step": 86220 }, { "epoch": 0.745562079013584, "grad_norm": 6.581701426214985, "learning_rate": 4.167343532132705e-06, "loss": 0.20865249633789062, "step": 86225 }, { "epoch": 0.7456053125351272, "grad_norm": 20.026242206951945, "learning_rate": 4.167155852605361e-06, "loss": 0.3001991271972656, "step": 86230 }, { "epoch": 0.7456485460566705, "grad_norm": 0.5927733657401248, "learning_rate": 4.166968167695197e-06, "loss": 0.1534149169921875, "step": 86235 }, { "epoch": 0.7456917795782138, "grad_norm": 4.232350064472784, "learning_rate": 4.166780477403079e-06, "loss": 0.09883804321289062, "step": 86240 }, { "epoch": 0.745735013099757, "grad_norm": 0.5291937506032577, "learning_rate": 4.166592781729873e-06, "loss": 0.20297813415527344, "step": 86245 }, { "epoch": 0.7457782466213003, "grad_norm": 1.2488311423509388, "learning_rate": 4.166405080676444e-06, "loss": 0.09080047607421875, "step": 86250 }, { "epoch": 0.7458214801428436, "grad_norm": 5.858961644448077, "learning_rate": 4.166217374243658e-06, "loss": 0.12697296142578124, "step": 86255 }, { "epoch": 0.7458647136643868, "grad_norm": 2.086234897252883, "learning_rate": 4.16602966243238e-06, "loss": 0.06592559814453125, "step": 86260 }, { "epoch": 0.7459079471859301, "grad_norm": 19.709263470973294, "learning_rate": 4.165841945243475e-06, "loss": 0.08218555450439453, "step": 86265 }, { "epoch": 0.7459511807074733, "grad_norm": 21.21475083805183, "learning_rate": 4.165654222677813e-06, "loss": 0.30600738525390625, "step": 86270 }, { "epoch": 0.7459944142290166, "grad_norm": 6.88533876105179, "learning_rate": 4.1654664947362546e-06, "loss": 0.09671554565429688, "step": 86275 }, { "epoch": 0.7460376477505599, "grad_norm": 12.881603768532804, "learning_rate": 4.1652787614196685e-06, "loss": 0.221099853515625, "step": 86280 }, { "epoch": 0.7460808812721031, "grad_norm": 24.885036022018404, "learning_rate": 4.165091022728919e-06, "loss": 0.119134521484375, "step": 86285 }, { "epoch": 0.7461241147936464, "grad_norm": 5.203332136834062, "learning_rate": 4.164903278664873e-06, "loss": 0.1118804931640625, "step": 86290 }, { "epoch": 0.7461673483151897, "grad_norm": 0.05702065414851712, "learning_rate": 4.1647155292283965e-06, "loss": 0.017847442626953126, "step": 86295 }, { "epoch": 0.746210581836733, "grad_norm": 1.8009293932180195, "learning_rate": 4.164527774420354e-06, "loss": 0.37917251586914064, "step": 86300 }, { "epoch": 0.7462538153582762, "grad_norm": 8.459300360941146, "learning_rate": 4.164340014241613e-06, "loss": 0.0706268310546875, "step": 86305 }, { "epoch": 0.7462970488798194, "grad_norm": 5.068274138152662, "learning_rate": 4.164152248693039e-06, "loss": 0.17057952880859376, "step": 86310 }, { "epoch": 0.7463402824013627, "grad_norm": 2.3728315573939534, "learning_rate": 4.163964477775498e-06, "loss": 0.030328941345214844, "step": 86315 }, { "epoch": 0.746383515922906, "grad_norm": 0.9179782196953542, "learning_rate": 4.163776701489854e-06, "loss": 0.0929931640625, "step": 86320 }, { "epoch": 0.7464267494444492, "grad_norm": 22.62966351688704, "learning_rate": 4.163588919836976e-06, "loss": 0.09401569366455079, "step": 86325 }, { "epoch": 0.7464699829659925, "grad_norm": 10.032398966221342, "learning_rate": 4.163401132817727e-06, "loss": 0.048246002197265624, "step": 86330 }, { "epoch": 0.7465132164875358, "grad_norm": 4.600326194193162, "learning_rate": 4.163213340432977e-06, "loss": 0.12650909423828124, "step": 86335 }, { "epoch": 0.746556450009079, "grad_norm": 11.922669737132237, "learning_rate": 4.16302554268359e-06, "loss": 0.0802215576171875, "step": 86340 }, { "epoch": 0.7465996835306223, "grad_norm": 14.553558869550098, "learning_rate": 4.16283773957043e-06, "loss": 0.05710945129394531, "step": 86345 }, { "epoch": 0.7466429170521656, "grad_norm": 2.8260766250069937, "learning_rate": 4.162649931094366e-06, "loss": 0.3454242706298828, "step": 86350 }, { "epoch": 0.7466861505737088, "grad_norm": 45.567551260268, "learning_rate": 4.162462117256263e-06, "loss": 0.2907524108886719, "step": 86355 }, { "epoch": 0.7467293840952521, "grad_norm": 4.282174327168123, "learning_rate": 4.162274298056987e-06, "loss": 0.044720458984375, "step": 86360 }, { "epoch": 0.7467726176167954, "grad_norm": 5.931164622748046, "learning_rate": 4.1620864734974064e-06, "loss": 0.07211837768554688, "step": 86365 }, { "epoch": 0.7468158511383386, "grad_norm": 1.7562817959522883, "learning_rate": 4.161898643578384e-06, "loss": 0.06246337890625, "step": 86370 }, { "epoch": 0.7468590846598819, "grad_norm": 52.798660776660256, "learning_rate": 4.161710808300789e-06, "loss": 0.40054931640625, "step": 86375 }, { "epoch": 0.7469023181814252, "grad_norm": 3.3178460057689527, "learning_rate": 4.161522967665485e-06, "loss": 0.097259521484375, "step": 86380 }, { "epoch": 0.7469455517029684, "grad_norm": 12.751785014597038, "learning_rate": 4.161335121673341e-06, "loss": 0.20885982513427734, "step": 86385 }, { "epoch": 0.7469887852245117, "grad_norm": 1.555199915022431, "learning_rate": 4.161147270325221e-06, "loss": 0.165972900390625, "step": 86390 }, { "epoch": 0.747032018746055, "grad_norm": 13.368313110437693, "learning_rate": 4.160959413621993e-06, "loss": 0.13193359375, "step": 86395 }, { "epoch": 0.7470752522675982, "grad_norm": 12.87035825168922, "learning_rate": 4.160771551564522e-06, "loss": 0.1416473388671875, "step": 86400 }, { "epoch": 0.7471184857891414, "grad_norm": 86.92619127272215, "learning_rate": 4.1605836841536765e-06, "loss": 0.3357666015625, "step": 86405 }, { "epoch": 0.7471617193106848, "grad_norm": 9.293246335940175, "learning_rate": 4.1603958113903206e-06, "loss": 0.08543930053710938, "step": 86410 }, { "epoch": 0.747204952832228, "grad_norm": 41.68015128671953, "learning_rate": 4.160207933275322e-06, "loss": 0.1239532470703125, "step": 86415 }, { "epoch": 0.7472481863537712, "grad_norm": 14.32399197323688, "learning_rate": 4.160020049809548e-06, "loss": 0.22090644836425782, "step": 86420 }, { "epoch": 0.7472914198753146, "grad_norm": 9.708564403495284, "learning_rate": 4.159832160993862e-06, "loss": 0.2555545806884766, "step": 86425 }, { "epoch": 0.7473346533968578, "grad_norm": 0.7501000083902168, "learning_rate": 4.159644266829133e-06, "loss": 0.12542800903320311, "step": 86430 }, { "epoch": 0.747377886918401, "grad_norm": 0.019618365111586628, "learning_rate": 4.159456367316228e-06, "loss": 0.39189720153808594, "step": 86435 }, { "epoch": 0.7474211204399444, "grad_norm": 8.02910954492936, "learning_rate": 4.159268462456012e-06, "loss": 0.20890464782714843, "step": 86440 }, { "epoch": 0.7474643539614876, "grad_norm": 8.709559537393838, "learning_rate": 4.159080552249354e-06, "loss": 0.04397125244140625, "step": 86445 }, { "epoch": 0.7475075874830308, "grad_norm": 4.077779378907635, "learning_rate": 4.1588926366971175e-06, "loss": 0.18798980712890626, "step": 86450 }, { "epoch": 0.7475508210045742, "grad_norm": 30.122048029653136, "learning_rate": 4.15870471580017e-06, "loss": 0.501461410522461, "step": 86455 }, { "epoch": 0.7475940545261174, "grad_norm": 10.871706735161576, "learning_rate": 4.15851678955938e-06, "loss": 0.07414932250976562, "step": 86460 }, { "epoch": 0.7476372880476606, "grad_norm": 8.213285245011454, "learning_rate": 4.158328857975611e-06, "loss": 0.0603057861328125, "step": 86465 }, { "epoch": 0.747680521569204, "grad_norm": 7.4622626655798925, "learning_rate": 4.158140921049734e-06, "loss": 0.07300853729248047, "step": 86470 }, { "epoch": 0.7477237550907472, "grad_norm": 0.06809088005696019, "learning_rate": 4.157952978782612e-06, "loss": 0.30060863494873047, "step": 86475 }, { "epoch": 0.7477669886122904, "grad_norm": 21.456302236799498, "learning_rate": 4.157765031175114e-06, "loss": 0.14334945678710936, "step": 86480 }, { "epoch": 0.7478102221338336, "grad_norm": 1.8193867725601194, "learning_rate": 4.157577078228105e-06, "loss": 0.07652740478515625, "step": 86485 }, { "epoch": 0.747853455655377, "grad_norm": 7.672503929117214, "learning_rate": 4.157389119942453e-06, "loss": 0.2126220703125, "step": 86490 }, { "epoch": 0.7478966891769202, "grad_norm": 25.437918184091245, "learning_rate": 4.157201156319025e-06, "loss": 0.1249786376953125, "step": 86495 }, { "epoch": 0.7479399226984634, "grad_norm": 9.402093390029167, "learning_rate": 4.157013187358689e-06, "loss": 0.09195671081542969, "step": 86500 }, { "epoch": 0.7479831562200068, "grad_norm": 28.165864222700083, "learning_rate": 4.156825213062308e-06, "loss": 0.1006988525390625, "step": 86505 }, { "epoch": 0.74802638974155, "grad_norm": 33.22585201252008, "learning_rate": 4.156637233430753e-06, "loss": 0.36728515625, "step": 86510 }, { "epoch": 0.7480696232630932, "grad_norm": 3.793920753004979, "learning_rate": 4.156449248464888e-06, "loss": 0.07946624755859374, "step": 86515 }, { "epoch": 0.7481128567846366, "grad_norm": 3.2835580793822685, "learning_rate": 4.156261258165581e-06, "loss": 0.15620498657226561, "step": 86520 }, { "epoch": 0.7481560903061798, "grad_norm": 1.1998902629570374, "learning_rate": 4.156073262533701e-06, "loss": 0.16828994750976561, "step": 86525 }, { "epoch": 0.748199323827723, "grad_norm": 23.67710520695194, "learning_rate": 4.155885261570112e-06, "loss": 0.16303768157958984, "step": 86530 }, { "epoch": 0.7482425573492664, "grad_norm": 2.1851497816742858, "learning_rate": 4.155697255275682e-06, "loss": 0.3014190673828125, "step": 86535 }, { "epoch": 0.7482857908708096, "grad_norm": 1.638549439013079, "learning_rate": 4.1555092436512794e-06, "loss": 0.08758201599121093, "step": 86540 }, { "epoch": 0.7483290243923528, "grad_norm": 0.11628612877211739, "learning_rate": 4.155321226697769e-06, "loss": 0.09853477478027343, "step": 86545 }, { "epoch": 0.7483722579138962, "grad_norm": 29.34559822933684, "learning_rate": 4.15513320441602e-06, "loss": 0.13950538635253906, "step": 86550 }, { "epoch": 0.7484154914354394, "grad_norm": 13.52987809793618, "learning_rate": 4.1549451768069e-06, "loss": 0.16335601806640626, "step": 86555 }, { "epoch": 0.7484587249569826, "grad_norm": 11.214390374643633, "learning_rate": 4.154757143871273e-06, "loss": 0.33894500732421873, "step": 86560 }, { "epoch": 0.748501958478526, "grad_norm": 1.1748978499006764, "learning_rate": 4.154569105610009e-06, "loss": 0.04590911865234375, "step": 86565 }, { "epoch": 0.7485451920000692, "grad_norm": 3.1838897273648454, "learning_rate": 4.154381062023974e-06, "loss": 0.10846786499023438, "step": 86570 }, { "epoch": 0.7485884255216124, "grad_norm": 46.23689074552767, "learning_rate": 4.154193013114035e-06, "loss": 0.23340988159179688, "step": 86575 }, { "epoch": 0.7486316590431557, "grad_norm": 2.893754005972654, "learning_rate": 4.154004958881061e-06, "loss": 0.08474502563476563, "step": 86580 }, { "epoch": 0.748674892564699, "grad_norm": 25.138137594269285, "learning_rate": 4.153816899325917e-06, "loss": 0.28529052734375, "step": 86585 }, { "epoch": 0.7487181260862422, "grad_norm": 1.630334268431898, "learning_rate": 4.153628834449473e-06, "loss": 0.15872802734375, "step": 86590 }, { "epoch": 0.7487613596077854, "grad_norm": 1.6341186944576063, "learning_rate": 4.153440764252593e-06, "loss": 0.3922740936279297, "step": 86595 }, { "epoch": 0.7488045931293288, "grad_norm": 0.576024641676508, "learning_rate": 4.153252688736145e-06, "loss": 0.0712799072265625, "step": 86600 }, { "epoch": 0.748847826650872, "grad_norm": 25.366617094665347, "learning_rate": 4.153064607901001e-06, "loss": 0.25693511962890625, "step": 86605 }, { "epoch": 0.7488910601724152, "grad_norm": 35.873091535425054, "learning_rate": 4.152876521748023e-06, "loss": 0.1281951904296875, "step": 86610 }, { "epoch": 0.7489342936939586, "grad_norm": 0.5883518552358091, "learning_rate": 4.152688430278081e-06, "loss": 0.07355270385742188, "step": 86615 }, { "epoch": 0.7489775272155018, "grad_norm": 2.851062262578317, "learning_rate": 4.152500333492041e-06, "loss": 0.050651168823242186, "step": 86620 }, { "epoch": 0.749020760737045, "grad_norm": 11.304665122831624, "learning_rate": 4.152312231390771e-06, "loss": 0.16793975830078126, "step": 86625 }, { "epoch": 0.7490639942585884, "grad_norm": 31.693643898003216, "learning_rate": 4.152124123975139e-06, "loss": 0.42571773529052737, "step": 86630 }, { "epoch": 0.7491072277801316, "grad_norm": 3.964016761534703, "learning_rate": 4.151936011246013e-06, "loss": 0.15406341552734376, "step": 86635 }, { "epoch": 0.7491504613016748, "grad_norm": 4.023387962920305, "learning_rate": 4.151747893204261e-06, "loss": 0.15364532470703124, "step": 86640 }, { "epoch": 0.7491936948232182, "grad_norm": 2.505270302530748, "learning_rate": 4.1515597698507475e-06, "loss": 0.06204147338867187, "step": 86645 }, { "epoch": 0.7492369283447614, "grad_norm": 14.811802196018798, "learning_rate": 4.1513716411863436e-06, "loss": 0.17982177734375, "step": 86650 }, { "epoch": 0.7492801618663046, "grad_norm": 40.40101964421521, "learning_rate": 4.151183507211914e-06, "loss": 0.31733551025390627, "step": 86655 }, { "epoch": 0.7493233953878479, "grad_norm": 0.2667825878200438, "learning_rate": 4.150995367928328e-06, "loss": 0.013623046875, "step": 86660 }, { "epoch": 0.7493666289093912, "grad_norm": 3.692784178963595, "learning_rate": 4.150807223336454e-06, "loss": 0.03040008544921875, "step": 86665 }, { "epoch": 0.7494098624309344, "grad_norm": 6.217220844810119, "learning_rate": 4.1506190734371585e-06, "loss": 0.16190643310546876, "step": 86670 }, { "epoch": 0.7494530959524777, "grad_norm": 3.214279544406209, "learning_rate": 4.150430918231309e-06, "loss": 0.1839630126953125, "step": 86675 }, { "epoch": 0.749496329474021, "grad_norm": 0.4562215888174895, "learning_rate": 4.150242757719774e-06, "loss": 0.2007720947265625, "step": 86680 }, { "epoch": 0.7495395629955642, "grad_norm": 1.1215429706254987, "learning_rate": 4.15005459190342e-06, "loss": 0.057879638671875, "step": 86685 }, { "epoch": 0.7495827965171075, "grad_norm": 44.91022584863214, "learning_rate": 4.149866420783118e-06, "loss": 0.667437744140625, "step": 86690 }, { "epoch": 0.7496260300386508, "grad_norm": 6.316642153296231, "learning_rate": 4.149678244359732e-06, "loss": 0.05531463623046875, "step": 86695 }, { "epoch": 0.749669263560194, "grad_norm": 3.5325219398645915, "learning_rate": 4.1494900626341325e-06, "loss": 0.03817825317382813, "step": 86700 }, { "epoch": 0.7497124970817373, "grad_norm": 14.689001874613373, "learning_rate": 4.149301875607187e-06, "loss": 0.16568603515625, "step": 86705 }, { "epoch": 0.7497557306032806, "grad_norm": 1.0959761300904938, "learning_rate": 4.149113683279761e-06, "loss": 0.3960836410522461, "step": 86710 }, { "epoch": 0.7497989641248238, "grad_norm": 30.74848304652641, "learning_rate": 4.148925485652725e-06, "loss": 0.20557403564453125, "step": 86715 }, { "epoch": 0.7498421976463671, "grad_norm": 0.43906063874923834, "learning_rate": 4.148737282726946e-06, "loss": 0.09730110168457032, "step": 86720 }, { "epoch": 0.7498854311679104, "grad_norm": 1.171695803467683, "learning_rate": 4.148549074503292e-06, "loss": 0.4861602783203125, "step": 86725 }, { "epoch": 0.7499286646894536, "grad_norm": 15.869814427460925, "learning_rate": 4.148360860982632e-06, "loss": 0.30378265380859376, "step": 86730 }, { "epoch": 0.7499718982109969, "grad_norm": 10.222881700833073, "learning_rate": 4.148172642165833e-06, "loss": 0.34899368286132815, "step": 86735 }, { "epoch": 0.7500151317325402, "grad_norm": 26.18486936179846, "learning_rate": 4.147984418053762e-06, "loss": 0.12708740234375, "step": 86740 }, { "epoch": 0.7500583652540834, "grad_norm": 10.111612142439151, "learning_rate": 4.1477961886472895e-06, "loss": 0.062481689453125, "step": 86745 }, { "epoch": 0.7501015987756267, "grad_norm": 52.38324289085845, "learning_rate": 4.147607953947282e-06, "loss": 0.536404037475586, "step": 86750 }, { "epoch": 0.7501448322971699, "grad_norm": 4.080618011569881, "learning_rate": 4.147419713954609e-06, "loss": 0.2371826171875, "step": 86755 }, { "epoch": 0.7501880658187132, "grad_norm": 4.2443300364499414, "learning_rate": 4.147231468670136e-06, "loss": 0.117022705078125, "step": 86760 }, { "epoch": 0.7502312993402565, "grad_norm": 4.402257696093449, "learning_rate": 4.147043218094734e-06, "loss": 0.09253692626953125, "step": 86765 }, { "epoch": 0.7502745328617997, "grad_norm": 52.17511368440454, "learning_rate": 4.14685496222927e-06, "loss": 0.2949485778808594, "step": 86770 }, { "epoch": 0.750317766383343, "grad_norm": 18.262995404587553, "learning_rate": 4.146666701074611e-06, "loss": 0.1692718505859375, "step": 86775 }, { "epoch": 0.7503609999048863, "grad_norm": 0.2717573510411089, "learning_rate": 4.146478434631628e-06, "loss": 0.29234619140625, "step": 86780 }, { "epoch": 0.7504042334264295, "grad_norm": 12.915377495205755, "learning_rate": 4.146290162901187e-06, "loss": 0.25333251953125, "step": 86785 }, { "epoch": 0.7504474669479728, "grad_norm": 28.124503105408923, "learning_rate": 4.1461018858841575e-06, "loss": 0.16540184020996093, "step": 86790 }, { "epoch": 0.750490700469516, "grad_norm": 20.373009512733773, "learning_rate": 4.145913603581407e-06, "loss": 0.45638580322265626, "step": 86795 }, { "epoch": 0.7505339339910593, "grad_norm": 34.07252624502705, "learning_rate": 4.145725315993804e-06, "loss": 0.22300262451171876, "step": 86800 }, { "epoch": 0.7505771675126026, "grad_norm": 12.452885151281937, "learning_rate": 4.145537023122217e-06, "loss": 0.16058425903320311, "step": 86805 }, { "epoch": 0.7506204010341458, "grad_norm": 11.624384084928014, "learning_rate": 4.145348724967515e-06, "loss": 0.2609382629394531, "step": 86810 }, { "epoch": 0.7506636345556891, "grad_norm": 0.509790125638261, "learning_rate": 4.145160421530567e-06, "loss": 0.25077285766601565, "step": 86815 }, { "epoch": 0.7507068680772324, "grad_norm": 8.827571813389074, "learning_rate": 4.144972112812238e-06, "loss": 0.07624359130859375, "step": 86820 }, { "epoch": 0.7507501015987756, "grad_norm": 2.527081526157638, "learning_rate": 4.144783798813399e-06, "loss": 0.09406661987304688, "step": 86825 }, { "epoch": 0.7507933351203189, "grad_norm": 0.12107773400388612, "learning_rate": 4.14459547953492e-06, "loss": 0.20338668823242187, "step": 86830 }, { "epoch": 0.7508365686418621, "grad_norm": 1.1667641756854783, "learning_rate": 4.1444071549776674e-06, "loss": 0.03405303955078125, "step": 86835 }, { "epoch": 0.7508798021634054, "grad_norm": 34.12209832217845, "learning_rate": 4.144218825142509e-06, "loss": 0.16655349731445312, "step": 86840 }, { "epoch": 0.7509230356849487, "grad_norm": 0.45693072693688247, "learning_rate": 4.144030490030316e-06, "loss": 0.04239349365234375, "step": 86845 }, { "epoch": 0.7509662692064919, "grad_norm": 4.190338987320019, "learning_rate": 4.143842149641954e-06, "loss": 0.059864234924316403, "step": 86850 }, { "epoch": 0.7510095027280352, "grad_norm": 31.251454774519402, "learning_rate": 4.1436538039782935e-06, "loss": 0.12537384033203125, "step": 86855 }, { "epoch": 0.7510527362495785, "grad_norm": 2.623643499517929, "learning_rate": 4.143465453040203e-06, "loss": 0.17010269165039063, "step": 86860 }, { "epoch": 0.7510959697711217, "grad_norm": 1.7707191751029083, "learning_rate": 4.143277096828551e-06, "loss": 0.209893798828125, "step": 86865 }, { "epoch": 0.751139203292665, "grad_norm": 38.229895755259136, "learning_rate": 4.143088735344206e-06, "loss": 0.11828498840332032, "step": 86870 }, { "epoch": 0.7511824368142083, "grad_norm": 13.170335260945214, "learning_rate": 4.142900368588036e-06, "loss": 0.12598876953125, "step": 86875 }, { "epoch": 0.7512256703357515, "grad_norm": 13.913872946157877, "learning_rate": 4.142711996560911e-06, "loss": 0.06672439575195313, "step": 86880 }, { "epoch": 0.7512689038572948, "grad_norm": 0.61014964998869, "learning_rate": 4.1425236192637e-06, "loss": 0.15780029296875, "step": 86885 }, { "epoch": 0.7513121373788381, "grad_norm": 16.825213922948226, "learning_rate": 4.14233523669727e-06, "loss": 0.5296417236328125, "step": 86890 }, { "epoch": 0.7513553709003813, "grad_norm": 3.616532933292272, "learning_rate": 4.1421468488624915e-06, "loss": 0.044174575805664064, "step": 86895 }, { "epoch": 0.7513986044219246, "grad_norm": 0.5009661587570082, "learning_rate": 4.141958455760232e-06, "loss": 0.0715780258178711, "step": 86900 }, { "epoch": 0.7514418379434679, "grad_norm": 22.42384802713741, "learning_rate": 4.141770057391361e-06, "loss": 0.160028076171875, "step": 86905 }, { "epoch": 0.7514850714650111, "grad_norm": 0.32718861396700716, "learning_rate": 4.141581653756748e-06, "loss": 0.18749542236328126, "step": 86910 }, { "epoch": 0.7515283049865544, "grad_norm": 0.5993749445662756, "learning_rate": 4.14139324485726e-06, "loss": 0.01104278564453125, "step": 86915 }, { "epoch": 0.7515715385080977, "grad_norm": 4.191111814242021, "learning_rate": 4.141204830693769e-06, "loss": 0.14899444580078125, "step": 86920 }, { "epoch": 0.7516147720296409, "grad_norm": 27.4403805024297, "learning_rate": 4.141016411267142e-06, "loss": 0.2933784484863281, "step": 86925 }, { "epoch": 0.7516580055511841, "grad_norm": 37.73273089087591, "learning_rate": 4.140827986578248e-06, "loss": 0.3001270294189453, "step": 86930 }, { "epoch": 0.7517012390727275, "grad_norm": 2.116033027986699, "learning_rate": 4.140639556627955e-06, "loss": 0.253900146484375, "step": 86935 }, { "epoch": 0.7517444725942707, "grad_norm": 8.192174498127324, "learning_rate": 4.140451121417133e-06, "loss": 0.23665618896484375, "step": 86940 }, { "epoch": 0.7517877061158139, "grad_norm": 44.388550316435875, "learning_rate": 4.140262680946653e-06, "loss": 0.153436279296875, "step": 86945 }, { "epoch": 0.7518309396373573, "grad_norm": 17.637968530200062, "learning_rate": 4.14007423521738e-06, "loss": 0.0686981201171875, "step": 86950 }, { "epoch": 0.7518741731589005, "grad_norm": 5.367707274921029, "learning_rate": 4.139885784230187e-06, "loss": 0.403515625, "step": 86955 }, { "epoch": 0.7519174066804437, "grad_norm": 13.223803022409736, "learning_rate": 4.139697327985942e-06, "loss": 0.1165252685546875, "step": 86960 }, { "epoch": 0.751960640201987, "grad_norm": 97.95387531221294, "learning_rate": 4.139508866485512e-06, "loss": 0.5506599426269532, "step": 86965 }, { "epoch": 0.7520038737235303, "grad_norm": 2.5256161163395485, "learning_rate": 4.139320399729768e-06, "loss": 0.283868408203125, "step": 86970 }, { "epoch": 0.7520471072450735, "grad_norm": 5.409304141354786, "learning_rate": 4.139131927719581e-06, "loss": 0.2480175018310547, "step": 86975 }, { "epoch": 0.7520903407666168, "grad_norm": 14.418430462563471, "learning_rate": 4.138943450455816e-06, "loss": 0.102581787109375, "step": 86980 }, { "epoch": 0.7521335742881601, "grad_norm": 4.392774758770661, "learning_rate": 4.138754967939345e-06, "loss": 0.043994140625, "step": 86985 }, { "epoch": 0.7521768078097033, "grad_norm": 0.0968148214780277, "learning_rate": 4.138566480171037e-06, "loss": 0.0521270751953125, "step": 86990 }, { "epoch": 0.7522200413312466, "grad_norm": 6.292772213561931, "learning_rate": 4.13837798715176e-06, "loss": 0.145263671875, "step": 86995 }, { "epoch": 0.7522632748527899, "grad_norm": 0.793607351990162, "learning_rate": 4.138189488882386e-06, "loss": 0.09802627563476562, "step": 87000 }, { "epoch": 0.7523065083743331, "grad_norm": 3.506119287343912, "learning_rate": 4.138000985363782e-06, "loss": 0.227496337890625, "step": 87005 }, { "epoch": 0.7523497418958763, "grad_norm": 10.426317858296116, "learning_rate": 4.137812476596818e-06, "loss": 0.08487205505371094, "step": 87010 }, { "epoch": 0.7523929754174197, "grad_norm": 5.045565633955442, "learning_rate": 4.137623962582363e-06, "loss": 0.08267860412597657, "step": 87015 }, { "epoch": 0.7524362089389629, "grad_norm": 13.125105252136699, "learning_rate": 4.137435443321287e-06, "loss": 0.2689910888671875, "step": 87020 }, { "epoch": 0.7524794424605061, "grad_norm": 4.316607041611107, "learning_rate": 4.137246918814459e-06, "loss": 0.24339599609375, "step": 87025 }, { "epoch": 0.7525226759820495, "grad_norm": 8.401762227812231, "learning_rate": 4.1370583890627485e-06, "loss": 0.43743438720703126, "step": 87030 }, { "epoch": 0.7525659095035927, "grad_norm": 1.7801763501595256, "learning_rate": 4.136869854067026e-06, "loss": 0.06302032470703126, "step": 87035 }, { "epoch": 0.7526091430251359, "grad_norm": 0.8502173046040358, "learning_rate": 4.13668131382816e-06, "loss": 0.285626220703125, "step": 87040 }, { "epoch": 0.7526523765466793, "grad_norm": 21.804515487738747, "learning_rate": 4.13649276834702e-06, "loss": 0.11019287109375, "step": 87045 }, { "epoch": 0.7526956100682225, "grad_norm": 3.209058973355902, "learning_rate": 4.136304217624476e-06, "loss": 0.088616943359375, "step": 87050 }, { "epoch": 0.7527388435897657, "grad_norm": 16.869703165197674, "learning_rate": 4.136115661661397e-06, "loss": 0.4343994140625, "step": 87055 }, { "epoch": 0.7527820771113091, "grad_norm": 52.664048263189436, "learning_rate": 4.135927100458653e-06, "loss": 0.22548751831054686, "step": 87060 }, { "epoch": 0.7528253106328523, "grad_norm": 12.12571545063261, "learning_rate": 4.135738534017114e-06, "loss": 0.11226806640625, "step": 87065 }, { "epoch": 0.7528685441543955, "grad_norm": 21.089789435907523, "learning_rate": 4.13554996233765e-06, "loss": 0.19652023315429687, "step": 87070 }, { "epoch": 0.7529117776759389, "grad_norm": 4.251970053201964, "learning_rate": 4.135361385421128e-06, "loss": 0.10286216735839844, "step": 87075 }, { "epoch": 0.7529550111974821, "grad_norm": 130.26489473186737, "learning_rate": 4.13517280326842e-06, "loss": 0.5508893013000489, "step": 87080 }, { "epoch": 0.7529982447190253, "grad_norm": 0.18571159811011925, "learning_rate": 4.134984215880396e-06, "loss": 0.015439605712890625, "step": 87085 }, { "epoch": 0.7530414782405687, "grad_norm": 2.176044452236998, "learning_rate": 4.134795623257925e-06, "loss": 0.15414962768554688, "step": 87090 }, { "epoch": 0.7530847117621119, "grad_norm": 8.480131993720965, "learning_rate": 4.134607025401877e-06, "loss": 0.0978759765625, "step": 87095 }, { "epoch": 0.7531279452836551, "grad_norm": 4.324124575215095, "learning_rate": 4.134418422313122e-06, "loss": 0.19559783935546876, "step": 87100 }, { "epoch": 0.7531711788051983, "grad_norm": 1.8175781824267585, "learning_rate": 4.1342298139925284e-06, "loss": 0.07924957275390625, "step": 87105 }, { "epoch": 0.7532144123267417, "grad_norm": 2.256963662425557, "learning_rate": 4.134041200440967e-06, "loss": 0.188446044921875, "step": 87110 }, { "epoch": 0.7532576458482849, "grad_norm": 6.235890093655849, "learning_rate": 4.1338525816593094e-06, "loss": 0.24298248291015626, "step": 87115 }, { "epoch": 0.7533008793698281, "grad_norm": 5.104508049976248, "learning_rate": 4.1336639576484236e-06, "loss": 0.0878509521484375, "step": 87120 }, { "epoch": 0.7533441128913715, "grad_norm": 0.6901392706911523, "learning_rate": 4.1334753284091785e-06, "loss": 0.10653800964355468, "step": 87125 }, { "epoch": 0.7533873464129147, "grad_norm": 2.050809576750728, "learning_rate": 4.133286693942445e-06, "loss": 0.06568603515625, "step": 87130 }, { "epoch": 0.753430579934458, "grad_norm": 4.7810891725292795, "learning_rate": 4.133098054249095e-06, "loss": 0.051055908203125, "step": 87135 }, { "epoch": 0.7534738134560013, "grad_norm": 2.357817474191496, "learning_rate": 4.132909409329996e-06, "loss": 0.10389328002929688, "step": 87140 }, { "epoch": 0.7535170469775445, "grad_norm": 3.7782107245403393, "learning_rate": 4.132720759186019e-06, "loss": 0.08883819580078126, "step": 87145 }, { "epoch": 0.7535602804990877, "grad_norm": 6.279109372481647, "learning_rate": 4.132532103818035e-06, "loss": 0.1830047607421875, "step": 87150 }, { "epoch": 0.7536035140206311, "grad_norm": 0.043277008446505574, "learning_rate": 4.132343443226912e-06, "loss": 0.3522144317626953, "step": 87155 }, { "epoch": 0.7536467475421743, "grad_norm": 20.40230009306368, "learning_rate": 4.132154777413521e-06, "loss": 0.17954483032226562, "step": 87160 }, { "epoch": 0.7536899810637175, "grad_norm": 7.726884569418477, "learning_rate": 4.131966106378732e-06, "loss": 0.29624786376953127, "step": 87165 }, { "epoch": 0.7537332145852609, "grad_norm": 2.919138807446465, "learning_rate": 4.131777430123416e-06, "loss": 0.048464202880859376, "step": 87170 }, { "epoch": 0.7537764481068041, "grad_norm": 2.854546886806803, "learning_rate": 4.1315887486484425e-06, "loss": 0.1147491455078125, "step": 87175 }, { "epoch": 0.7538196816283473, "grad_norm": 10.693371101366735, "learning_rate": 4.131400061954682e-06, "loss": 0.06273345947265625, "step": 87180 }, { "epoch": 0.7538629151498906, "grad_norm": 1.8595379901652114, "learning_rate": 4.131211370043003e-06, "loss": 0.013710784912109374, "step": 87185 }, { "epoch": 0.7539061486714339, "grad_norm": 5.388792285618186, "learning_rate": 4.131022672914278e-06, "loss": 0.3714569091796875, "step": 87190 }, { "epoch": 0.7539493821929771, "grad_norm": 2.742957720924458, "learning_rate": 4.130833970569375e-06, "loss": 0.09087677001953125, "step": 87195 }, { "epoch": 0.7539926157145204, "grad_norm": 9.617204767101809, "learning_rate": 4.1306452630091686e-06, "loss": 0.1494720458984375, "step": 87200 }, { "epoch": 0.7540358492360637, "grad_norm": 1.6990609100529537, "learning_rate": 4.130456550234524e-06, "loss": 0.13287353515625, "step": 87205 }, { "epoch": 0.7540790827576069, "grad_norm": 4.956347059456797, "learning_rate": 4.130267832246314e-06, "loss": 0.14415969848632812, "step": 87210 }, { "epoch": 0.7541223162791502, "grad_norm": 0.9949715004071894, "learning_rate": 4.130079109045409e-06, "loss": 0.037322998046875, "step": 87215 }, { "epoch": 0.7541655498006935, "grad_norm": 0.8848818831744587, "learning_rate": 4.129890380632678e-06, "loss": 0.09047698974609375, "step": 87220 }, { "epoch": 0.7542087833222367, "grad_norm": 2.4007975824395347, "learning_rate": 4.129701647008994e-06, "loss": 0.02993621826171875, "step": 87225 }, { "epoch": 0.75425201684378, "grad_norm": 2.712356649876676, "learning_rate": 4.129512908175225e-06, "loss": 0.07207794189453125, "step": 87230 }, { "epoch": 0.7542952503653233, "grad_norm": 12.660835043829058, "learning_rate": 4.129324164132242e-06, "loss": 0.27523040771484375, "step": 87235 }, { "epoch": 0.7543384838868665, "grad_norm": 7.637512561401792, "learning_rate": 4.129135414880917e-06, "loss": 0.3451869964599609, "step": 87240 }, { "epoch": 0.7543817174084098, "grad_norm": 6.946051554083539, "learning_rate": 4.128946660422118e-06, "loss": 0.07064247131347656, "step": 87245 }, { "epoch": 0.7544249509299531, "grad_norm": 19.016130961979854, "learning_rate": 4.128757900756716e-06, "loss": 0.1189056396484375, "step": 87250 }, { "epoch": 0.7544681844514963, "grad_norm": 9.703983746520327, "learning_rate": 4.128569135885584e-06, "loss": 0.25740737915039064, "step": 87255 }, { "epoch": 0.7545114179730396, "grad_norm": 3.4584820139423313, "learning_rate": 4.12838036580959e-06, "loss": 0.0783599853515625, "step": 87260 }, { "epoch": 0.7545546514945828, "grad_norm": 7.471208696286041, "learning_rate": 4.128191590529606e-06, "loss": 0.08387298583984375, "step": 87265 }, { "epoch": 0.7545978850161261, "grad_norm": 1.7762381275067423, "learning_rate": 4.128002810046502e-06, "loss": 0.064617919921875, "step": 87270 }, { "epoch": 0.7546411185376694, "grad_norm": 1.717155883635366, "learning_rate": 4.127814024361148e-06, "loss": 0.25458297729492185, "step": 87275 }, { "epoch": 0.7546843520592126, "grad_norm": 2.9484923297630155, "learning_rate": 4.127625233474417e-06, "loss": 0.1215423583984375, "step": 87280 }, { "epoch": 0.7547275855807559, "grad_norm": 0.22097137205179745, "learning_rate": 4.127436437387176e-06, "loss": 0.127825927734375, "step": 87285 }, { "epoch": 0.7547708191022992, "grad_norm": 4.823201308759952, "learning_rate": 4.1272476361002985e-06, "loss": 0.192425537109375, "step": 87290 }, { "epoch": 0.7548140526238424, "grad_norm": 0.14710688092265628, "learning_rate": 4.127058829614656e-06, "loss": 0.08120880126953126, "step": 87295 }, { "epoch": 0.7548572861453857, "grad_norm": 0.342905392566264, "learning_rate": 4.126870017931116e-06, "loss": 0.06571502685546875, "step": 87300 }, { "epoch": 0.754900519666929, "grad_norm": 20.877606874159373, "learning_rate": 4.126681201050551e-06, "loss": 0.1403339385986328, "step": 87305 }, { "epoch": 0.7549437531884722, "grad_norm": 5.014815279474688, "learning_rate": 4.126492378973832e-06, "loss": 0.1324798583984375, "step": 87310 }, { "epoch": 0.7549869867100155, "grad_norm": 0.5991179761387478, "learning_rate": 4.12630355170183e-06, "loss": 0.166070556640625, "step": 87315 }, { "epoch": 0.7550302202315587, "grad_norm": 51.116951734848335, "learning_rate": 4.126114719235416e-06, "loss": 0.5015140533447265, "step": 87320 }, { "epoch": 0.755073453753102, "grad_norm": 1.8318589912398515, "learning_rate": 4.12592588157546e-06, "loss": 0.03546333312988281, "step": 87325 }, { "epoch": 0.7551166872746453, "grad_norm": 37.597309637370365, "learning_rate": 4.125737038722833e-06, "loss": 0.23475875854492187, "step": 87330 }, { "epoch": 0.7551599207961885, "grad_norm": 4.742936093081202, "learning_rate": 4.125548190678407e-06, "loss": 0.44572906494140624, "step": 87335 }, { "epoch": 0.7552031543177318, "grad_norm": 4.502188830795132, "learning_rate": 4.125359337443052e-06, "loss": 0.1483783721923828, "step": 87340 }, { "epoch": 0.7552463878392751, "grad_norm": 11.199807569553794, "learning_rate": 4.125170479017638e-06, "loss": 0.15859375, "step": 87345 }, { "epoch": 0.7552896213608183, "grad_norm": 3.9878204111890234, "learning_rate": 4.124981615403039e-06, "loss": 0.2435638427734375, "step": 87350 }, { "epoch": 0.7553328548823616, "grad_norm": 14.036629555140125, "learning_rate": 4.124792746600123e-06, "loss": 0.09242095947265624, "step": 87355 }, { "epoch": 0.7553760884039048, "grad_norm": 1.3438763726516003, "learning_rate": 4.124603872609762e-06, "loss": 0.4350870132446289, "step": 87360 }, { "epoch": 0.7554193219254481, "grad_norm": 16.59584827525914, "learning_rate": 4.124414993432828e-06, "loss": 0.1520721435546875, "step": 87365 }, { "epoch": 0.7554625554469914, "grad_norm": 33.54384455025707, "learning_rate": 4.124226109070191e-06, "loss": 0.13839302062988282, "step": 87370 }, { "epoch": 0.7555057889685346, "grad_norm": 20.017421710757656, "learning_rate": 4.124037219522724e-06, "loss": 0.18670921325683593, "step": 87375 }, { "epoch": 0.7555490224900779, "grad_norm": 0.09912552892777736, "learning_rate": 4.1238483247912946e-06, "loss": 0.22368392944335938, "step": 87380 }, { "epoch": 0.7555922560116212, "grad_norm": 13.058076738358338, "learning_rate": 4.1236594248767776e-06, "loss": 0.0995208740234375, "step": 87385 }, { "epoch": 0.7556354895331644, "grad_norm": 1.5710175394667423, "learning_rate": 4.123470519780042e-06, "loss": 0.026031494140625, "step": 87390 }, { "epoch": 0.7556787230547077, "grad_norm": 25.508813611710405, "learning_rate": 4.123281609501959e-06, "loss": 0.1255828857421875, "step": 87395 }, { "epoch": 0.755721956576251, "grad_norm": 0.8289621573810992, "learning_rate": 4.1230926940434015e-06, "loss": 0.16100082397460938, "step": 87400 }, { "epoch": 0.7557651900977942, "grad_norm": 56.86973995871997, "learning_rate": 4.122903773405239e-06, "loss": 0.3432472229003906, "step": 87405 }, { "epoch": 0.7558084236193375, "grad_norm": 3.483042355464892, "learning_rate": 4.122714847588344e-06, "loss": 0.1183563232421875, "step": 87410 }, { "epoch": 0.7558516571408808, "grad_norm": 3.915849954792469, "learning_rate": 4.122525916593587e-06, "loss": 0.18530311584472656, "step": 87415 }, { "epoch": 0.755894890662424, "grad_norm": 25.413016858382797, "learning_rate": 4.1223369804218396e-06, "loss": 0.22599639892578124, "step": 87420 }, { "epoch": 0.7559381241839673, "grad_norm": 31.274107537140456, "learning_rate": 4.122148039073973e-06, "loss": 0.1658966064453125, "step": 87425 }, { "epoch": 0.7559813577055106, "grad_norm": 5.4137401114757, "learning_rate": 4.121959092550859e-06, "loss": 0.1536083221435547, "step": 87430 }, { "epoch": 0.7560245912270538, "grad_norm": 6.756471221050331, "learning_rate": 4.12177014085337e-06, "loss": 0.19347763061523438, "step": 87435 }, { "epoch": 0.756067824748597, "grad_norm": 24.336581698641634, "learning_rate": 4.121581183982375e-06, "loss": 0.11084537506103516, "step": 87440 }, { "epoch": 0.7561110582701404, "grad_norm": 14.3084599635627, "learning_rate": 4.121392221938746e-06, "loss": 0.288079833984375, "step": 87445 }, { "epoch": 0.7561542917916836, "grad_norm": 5.904942298401907, "learning_rate": 4.121203254723357e-06, "loss": 0.18040618896484376, "step": 87450 }, { "epoch": 0.7561975253132268, "grad_norm": 2.571170594992171, "learning_rate": 4.121014282337076e-06, "loss": 0.132220458984375, "step": 87455 }, { "epoch": 0.7562407588347702, "grad_norm": 6.287679170139308, "learning_rate": 4.120825304780777e-06, "loss": 0.12131824493408203, "step": 87460 }, { "epoch": 0.7562839923563134, "grad_norm": 12.13472309916441, "learning_rate": 4.120636322055331e-06, "loss": 0.36223602294921875, "step": 87465 }, { "epoch": 0.7563272258778566, "grad_norm": 15.919545839746478, "learning_rate": 4.120447334161609e-06, "loss": 0.2624382019042969, "step": 87470 }, { "epoch": 0.7563704593994, "grad_norm": 3.5598303194691403, "learning_rate": 4.1202583411004815e-06, "loss": 0.10702590942382813, "step": 87475 }, { "epoch": 0.7564136929209432, "grad_norm": 0.710035624944718, "learning_rate": 4.120069342872824e-06, "loss": 0.15772590637207032, "step": 87480 }, { "epoch": 0.7564569264424864, "grad_norm": 24.430213548413505, "learning_rate": 4.119880339479505e-06, "loss": 0.14805908203125, "step": 87485 }, { "epoch": 0.7565001599640298, "grad_norm": 26.45683894811189, "learning_rate": 4.119691330921396e-06, "loss": 0.1592193603515625, "step": 87490 }, { "epoch": 0.756543393485573, "grad_norm": 14.136899437249635, "learning_rate": 4.1195023171993695e-06, "loss": 0.14869441986083984, "step": 87495 }, { "epoch": 0.7565866270071162, "grad_norm": 1.46578364584968, "learning_rate": 4.119313298314298e-06, "loss": 0.04840850830078125, "step": 87500 }, { "epoch": 0.7566298605286595, "grad_norm": 0.7892019503899628, "learning_rate": 4.1191242742670515e-06, "loss": 0.2992236614227295, "step": 87505 }, { "epoch": 0.7566730940502028, "grad_norm": 5.921727211163373, "learning_rate": 4.118935245058504e-06, "loss": 0.0730499267578125, "step": 87510 }, { "epoch": 0.756716327571746, "grad_norm": 6.2883926628038145, "learning_rate": 4.118746210689525e-06, "loss": 0.16572418212890624, "step": 87515 }, { "epoch": 0.7567595610932893, "grad_norm": 2.843783552813995, "learning_rate": 4.1185571711609875e-06, "loss": 0.3915271759033203, "step": 87520 }, { "epoch": 0.7568027946148326, "grad_norm": 16.334627567503475, "learning_rate": 4.118368126473764e-06, "loss": 0.303125, "step": 87525 }, { "epoch": 0.7568460281363758, "grad_norm": 24.523681145585957, "learning_rate": 4.118179076628724e-06, "loss": 0.14946212768554687, "step": 87530 }, { "epoch": 0.756889261657919, "grad_norm": 1.364178289606804, "learning_rate": 4.117990021626743e-06, "loss": 0.65330810546875, "step": 87535 }, { "epoch": 0.7569324951794624, "grad_norm": 4.948957437309477, "learning_rate": 4.117800961468688e-06, "loss": 0.07932510375976562, "step": 87540 }, { "epoch": 0.7569757287010056, "grad_norm": 21.518456065030442, "learning_rate": 4.117611896155437e-06, "loss": 0.1686492919921875, "step": 87545 }, { "epoch": 0.7570189622225488, "grad_norm": 0.24521202812251655, "learning_rate": 4.117422825687856e-06, "loss": 0.24055938720703124, "step": 87550 }, { "epoch": 0.7570621957440922, "grad_norm": 2.1802581939629184, "learning_rate": 4.117233750066821e-06, "loss": 0.08880500793457032, "step": 87555 }, { "epoch": 0.7571054292656354, "grad_norm": 1.0609597431891944, "learning_rate": 4.117044669293201e-06, "loss": 0.176629638671875, "step": 87560 }, { "epoch": 0.7571486627871786, "grad_norm": 7.440603841688025, "learning_rate": 4.116855583367872e-06, "loss": 0.28964385986328123, "step": 87565 }, { "epoch": 0.757191896308722, "grad_norm": 0.42658443373807314, "learning_rate": 4.116666492291702e-06, "loss": 0.10877838134765624, "step": 87570 }, { "epoch": 0.7572351298302652, "grad_norm": 38.44905086103207, "learning_rate": 4.116477396065566e-06, "loss": 0.22830657958984374, "step": 87575 }, { "epoch": 0.7572783633518084, "grad_norm": 0.7912757950157056, "learning_rate": 4.116288294690335e-06, "loss": 0.238238525390625, "step": 87580 }, { "epoch": 0.7573215968733518, "grad_norm": 3.0919866702481618, "learning_rate": 4.116099188166879e-06, "loss": 0.08048095703125, "step": 87585 }, { "epoch": 0.757364830394895, "grad_norm": 9.773965453560436, "learning_rate": 4.1159100764960746e-06, "loss": 0.12336196899414062, "step": 87590 }, { "epoch": 0.7574080639164382, "grad_norm": 16.30888706815507, "learning_rate": 4.115720959678791e-06, "loss": 0.14986801147460938, "step": 87595 }, { "epoch": 0.7574512974379816, "grad_norm": 9.078011249310299, "learning_rate": 4.1155318377159e-06, "loss": 0.052567481994628906, "step": 87600 }, { "epoch": 0.7574945309595248, "grad_norm": 6.318176229620599, "learning_rate": 4.115342710608275e-06, "loss": 0.04819183349609375, "step": 87605 }, { "epoch": 0.757537764481068, "grad_norm": 13.873368983373453, "learning_rate": 4.115153578356789e-06, "loss": 0.09530296325683593, "step": 87610 }, { "epoch": 0.7575809980026113, "grad_norm": 1.596281280516417, "learning_rate": 4.114964440962312e-06, "loss": 0.07265777587890625, "step": 87615 }, { "epoch": 0.7576242315241546, "grad_norm": 15.001119908259415, "learning_rate": 4.114775298425718e-06, "loss": 0.2163970947265625, "step": 87620 }, { "epoch": 0.7576674650456978, "grad_norm": 6.731470448795407, "learning_rate": 4.114586150747879e-06, "loss": 0.0897003173828125, "step": 87625 }, { "epoch": 0.757710698567241, "grad_norm": 30.84561887742745, "learning_rate": 4.114396997929667e-06, "loss": 0.3876808166503906, "step": 87630 }, { "epoch": 0.7577539320887844, "grad_norm": 0.8619218987782173, "learning_rate": 4.114207839971954e-06, "loss": 0.076226806640625, "step": 87635 }, { "epoch": 0.7577971656103276, "grad_norm": 11.66818176877291, "learning_rate": 4.114018676875613e-06, "loss": 0.0711761474609375, "step": 87640 }, { "epoch": 0.7578403991318708, "grad_norm": 17.673006989548405, "learning_rate": 4.113829508641517e-06, "loss": 0.16627197265625, "step": 87645 }, { "epoch": 0.7578836326534142, "grad_norm": 18.67695787391542, "learning_rate": 4.113640335270537e-06, "loss": 0.23389511108398436, "step": 87650 }, { "epoch": 0.7579268661749574, "grad_norm": 1.3875488688401947, "learning_rate": 4.113451156763547e-06, "loss": 0.10894775390625, "step": 87655 }, { "epoch": 0.7579700996965006, "grad_norm": 0.16326111745861766, "learning_rate": 4.113261973121418e-06, "loss": 0.40727920532226564, "step": 87660 }, { "epoch": 0.758013333218044, "grad_norm": 8.07377434298047, "learning_rate": 4.113072784345023e-06, "loss": 0.06578216552734376, "step": 87665 }, { "epoch": 0.7580565667395872, "grad_norm": 3.7240490807032995, "learning_rate": 4.112883590435235e-06, "loss": 0.10087890625, "step": 87670 }, { "epoch": 0.7580998002611304, "grad_norm": 51.62239411201953, "learning_rate": 4.112694391392927e-06, "loss": 0.3349601745605469, "step": 87675 }, { "epoch": 0.7581430337826738, "grad_norm": 1.4350140097919852, "learning_rate": 4.112505187218969e-06, "loss": 0.04262809753417969, "step": 87680 }, { "epoch": 0.758186267304217, "grad_norm": 6.11368454627595, "learning_rate": 4.112315977914237e-06, "loss": 0.17142257690429688, "step": 87685 }, { "epoch": 0.7582295008257602, "grad_norm": 12.346275732607065, "learning_rate": 4.112126763479602e-06, "loss": 0.1134613037109375, "step": 87690 }, { "epoch": 0.7582727343473036, "grad_norm": 2.853643552420332, "learning_rate": 4.111937543915936e-06, "loss": 0.3211027145385742, "step": 87695 }, { "epoch": 0.7583159678688468, "grad_norm": 0.6745339294460306, "learning_rate": 4.111748319224112e-06, "loss": 0.30676841735839844, "step": 87700 }, { "epoch": 0.75835920139039, "grad_norm": 6.54369000849581, "learning_rate": 4.111559089405004e-06, "loss": 0.07781333923339843, "step": 87705 }, { "epoch": 0.7584024349119333, "grad_norm": 4.430391623542247, "learning_rate": 4.111369854459482e-06, "loss": 0.0985382080078125, "step": 87710 }, { "epoch": 0.7584456684334766, "grad_norm": 3.7406971023208175, "learning_rate": 4.111180614388421e-06, "loss": 0.13963470458984376, "step": 87715 }, { "epoch": 0.7584889019550198, "grad_norm": 8.316962704742942, "learning_rate": 4.1109913691926945e-06, "loss": 0.1924560546875, "step": 87720 }, { "epoch": 0.7585321354765631, "grad_norm": 7.220782049037121, "learning_rate": 4.110802118873172e-06, "loss": 0.377545166015625, "step": 87725 }, { "epoch": 0.7585753689981064, "grad_norm": 2.956219269911191, "learning_rate": 4.11061286343073e-06, "loss": 0.36701011657714844, "step": 87730 }, { "epoch": 0.7586186025196496, "grad_norm": 24.25320770018308, "learning_rate": 4.110423602866239e-06, "loss": 0.196728515625, "step": 87735 }, { "epoch": 0.7586618360411929, "grad_norm": 17.440244570083824, "learning_rate": 4.110234337180573e-06, "loss": 0.05897789001464844, "step": 87740 }, { "epoch": 0.7587050695627362, "grad_norm": 1.7045371954451345, "learning_rate": 4.110045066374603e-06, "loss": 0.11354827880859375, "step": 87745 }, { "epoch": 0.7587483030842794, "grad_norm": 1.17756806348863, "learning_rate": 4.109855790449205e-06, "loss": 0.113214111328125, "step": 87750 }, { "epoch": 0.7587915366058227, "grad_norm": 4.812266321267527, "learning_rate": 4.109666509405249e-06, "loss": 0.0572296142578125, "step": 87755 }, { "epoch": 0.758834770127366, "grad_norm": 30.420410567839426, "learning_rate": 4.109477223243608e-06, "loss": 0.2860107421875, "step": 87760 }, { "epoch": 0.7588780036489092, "grad_norm": 2.906583412362509, "learning_rate": 4.109287931965158e-06, "loss": 0.15416297912597657, "step": 87765 }, { "epoch": 0.7589212371704525, "grad_norm": 13.213379390414131, "learning_rate": 4.109098635570769e-06, "loss": 0.03807525634765625, "step": 87770 }, { "epoch": 0.7589644706919958, "grad_norm": 0.7384373364148438, "learning_rate": 4.108909334061316e-06, "loss": 0.034368896484375, "step": 87775 }, { "epoch": 0.759007704213539, "grad_norm": 2.320601843085421, "learning_rate": 4.1087200274376705e-06, "loss": 0.1204010009765625, "step": 87780 }, { "epoch": 0.7590509377350823, "grad_norm": 1.5221200640866128, "learning_rate": 4.108530715700707e-06, "loss": 0.0580230712890625, "step": 87785 }, { "epoch": 0.7590941712566255, "grad_norm": 8.132141703072202, "learning_rate": 4.108341398851297e-06, "loss": 0.209942626953125, "step": 87790 }, { "epoch": 0.7591374047781688, "grad_norm": 1.4909129462986628, "learning_rate": 4.108152076890315e-06, "loss": 0.017693328857421874, "step": 87795 }, { "epoch": 0.759180638299712, "grad_norm": 5.3450132666158074, "learning_rate": 4.107962749818633e-06, "loss": 0.2912773132324219, "step": 87800 }, { "epoch": 0.7592238718212553, "grad_norm": 2.006446942759243, "learning_rate": 4.107773417637125e-06, "loss": 0.18449935913085938, "step": 87805 }, { "epoch": 0.7592671053427986, "grad_norm": 36.827553661018385, "learning_rate": 4.107584080346663e-06, "loss": 0.5679031372070312, "step": 87810 }, { "epoch": 0.7593103388643418, "grad_norm": 18.18627418740021, "learning_rate": 4.107394737948122e-06, "loss": 0.35564193725585935, "step": 87815 }, { "epoch": 0.7593535723858851, "grad_norm": 3.0145218508286207, "learning_rate": 4.1072053904423745e-06, "loss": 0.1885009765625, "step": 87820 }, { "epoch": 0.7593968059074284, "grad_norm": 5.242884796028356, "learning_rate": 4.1070160378302925e-06, "loss": 0.08758544921875, "step": 87825 }, { "epoch": 0.7594400394289716, "grad_norm": 26.29734881380694, "learning_rate": 4.1068266801127515e-06, "loss": 0.28177490234375, "step": 87830 }, { "epoch": 0.7594832729505149, "grad_norm": 32.31817525468004, "learning_rate": 4.106637317290624e-06, "loss": 0.15689697265625, "step": 87835 }, { "epoch": 0.7595265064720582, "grad_norm": 0.6859976828147555, "learning_rate": 4.106447949364782e-06, "loss": 0.08041191101074219, "step": 87840 }, { "epoch": 0.7595697399936014, "grad_norm": 6.153938286735018, "learning_rate": 4.106258576336099e-06, "loss": 0.0568450927734375, "step": 87845 }, { "epoch": 0.7596129735151447, "grad_norm": 11.871945785775639, "learning_rate": 4.10606919820545e-06, "loss": 0.185174560546875, "step": 87850 }, { "epoch": 0.759656207036688, "grad_norm": 1.506328071690533, "learning_rate": 4.105879814973708e-06, "loss": 0.1901031494140625, "step": 87855 }, { "epoch": 0.7596994405582312, "grad_norm": 2.9721724955716895, "learning_rate": 4.105690426641746e-06, "loss": 0.0743408203125, "step": 87860 }, { "epoch": 0.7597426740797745, "grad_norm": 0.32858337475515337, "learning_rate": 4.105501033210437e-06, "loss": 0.17546234130859376, "step": 87865 }, { "epoch": 0.7597859076013178, "grad_norm": 43.1948269883902, "learning_rate": 4.105311634680655e-06, "loss": 0.41693572998046874, "step": 87870 }, { "epoch": 0.759829141122861, "grad_norm": 8.272934537079456, "learning_rate": 4.105122231053274e-06, "loss": 0.1694976806640625, "step": 87875 }, { "epoch": 0.7598723746444043, "grad_norm": 10.12784245574341, "learning_rate": 4.1049328223291665e-06, "loss": 0.03522186279296875, "step": 87880 }, { "epoch": 0.7599156081659475, "grad_norm": 14.148130347704937, "learning_rate": 4.104743408509207e-06, "loss": 0.23330078125, "step": 87885 }, { "epoch": 0.7599588416874908, "grad_norm": 12.056077609442555, "learning_rate": 4.104553989594268e-06, "loss": 0.17357025146484376, "step": 87890 }, { "epoch": 0.7600020752090341, "grad_norm": 3.0703425306345298, "learning_rate": 4.104364565585223e-06, "loss": 0.0365631103515625, "step": 87895 }, { "epoch": 0.7600453087305773, "grad_norm": 1.1464114830860357, "learning_rate": 4.104175136482947e-06, "loss": 0.13207550048828126, "step": 87900 }, { "epoch": 0.7600885422521206, "grad_norm": 30.24788305994352, "learning_rate": 4.103985702288311e-06, "loss": 0.4962018966674805, "step": 87905 }, { "epoch": 0.7601317757736639, "grad_norm": 27.669511781816446, "learning_rate": 4.103796263002193e-06, "loss": 0.170721435546875, "step": 87910 }, { "epoch": 0.7601750092952071, "grad_norm": 3.111884878512903, "learning_rate": 4.103606818625464e-06, "loss": 0.032031822204589847, "step": 87915 }, { "epoch": 0.7602182428167504, "grad_norm": 38.818858575957115, "learning_rate": 4.103417369158997e-06, "loss": 0.5868728637695313, "step": 87920 }, { "epoch": 0.7602614763382937, "grad_norm": 2.9736650444931367, "learning_rate": 4.103227914603666e-06, "loss": 0.27208423614501953, "step": 87925 }, { "epoch": 0.7603047098598369, "grad_norm": 12.736900730177396, "learning_rate": 4.103038454960347e-06, "loss": 0.07763824462890626, "step": 87930 }, { "epoch": 0.7603479433813802, "grad_norm": 1.6285990257622047, "learning_rate": 4.102848990229911e-06, "loss": 0.1072021484375, "step": 87935 }, { "epoch": 0.7603911769029235, "grad_norm": 0.9666787310372714, "learning_rate": 4.102659520413233e-06, "loss": 0.11776885986328126, "step": 87940 }, { "epoch": 0.7604344104244667, "grad_norm": 0.7867391587126693, "learning_rate": 4.1024700455111875e-06, "loss": 0.0624053955078125, "step": 87945 }, { "epoch": 0.76047764394601, "grad_norm": 0.5278893872450539, "learning_rate": 4.1022805655246465e-06, "loss": 0.090057373046875, "step": 87950 }, { "epoch": 0.7605208774675533, "grad_norm": 12.58132328367526, "learning_rate": 4.102091080454485e-06, "loss": 0.07779788970947266, "step": 87955 }, { "epoch": 0.7605641109890965, "grad_norm": 29.29412497083463, "learning_rate": 4.101901590301577e-06, "loss": 0.08189659118652344, "step": 87960 }, { "epoch": 0.7606073445106397, "grad_norm": 4.70929496911521, "learning_rate": 4.101712095066797e-06, "loss": 0.0866729736328125, "step": 87965 }, { "epoch": 0.7606505780321831, "grad_norm": 1.9571688685039734, "learning_rate": 4.101522594751017e-06, "loss": 0.1546173095703125, "step": 87970 }, { "epoch": 0.7606938115537263, "grad_norm": 9.39208533614935, "learning_rate": 4.101333089355113e-06, "loss": 0.1713836669921875, "step": 87975 }, { "epoch": 0.7607370450752695, "grad_norm": 3.4439020186821274, "learning_rate": 4.101143578879957e-06, "loss": 0.156170654296875, "step": 87980 }, { "epoch": 0.7607802785968129, "grad_norm": 5.232244476529405, "learning_rate": 4.100954063326425e-06, "loss": 0.40659027099609374, "step": 87985 }, { "epoch": 0.7608235121183561, "grad_norm": 6.646667867277535, "learning_rate": 4.1007645426953904e-06, "loss": 0.14412384033203124, "step": 87990 }, { "epoch": 0.7608667456398993, "grad_norm": 7.08287752507795, "learning_rate": 4.100575016987726e-06, "loss": 0.1655853271484375, "step": 87995 }, { "epoch": 0.7609099791614427, "grad_norm": 3.767110479634651, "learning_rate": 4.1003854862043076e-06, "loss": 0.03341178894042969, "step": 88000 }, { "epoch": 0.7609532126829859, "grad_norm": 22.36347355319353, "learning_rate": 4.100195950346009e-06, "loss": 0.20470476150512695, "step": 88005 }, { "epoch": 0.7609964462045291, "grad_norm": 2.8372624987872697, "learning_rate": 4.100006409413702e-06, "loss": 0.2013824462890625, "step": 88010 }, { "epoch": 0.7610396797260724, "grad_norm": 8.321387995477117, "learning_rate": 4.099816863408265e-06, "loss": 0.2278289794921875, "step": 88015 }, { "epoch": 0.7610829132476157, "grad_norm": 15.509737330653394, "learning_rate": 4.099627312330569e-06, "loss": 0.1106048583984375, "step": 88020 }, { "epoch": 0.7611261467691589, "grad_norm": 2.586580293725584, "learning_rate": 4.099437756181489e-06, "loss": 0.23514862060546876, "step": 88025 }, { "epoch": 0.7611693802907022, "grad_norm": 4.745949689614772, "learning_rate": 4.099248194961898e-06, "loss": 0.36161651611328127, "step": 88030 }, { "epoch": 0.7612126138122455, "grad_norm": 0.3851243122717502, "learning_rate": 4.099058628672673e-06, "loss": 0.03640670776367187, "step": 88035 }, { "epoch": 0.7612558473337887, "grad_norm": 2.853234434206487, "learning_rate": 4.098869057314686e-06, "loss": 0.025574874877929688, "step": 88040 }, { "epoch": 0.761299080855332, "grad_norm": 0.4412696473405313, "learning_rate": 4.098679480888812e-06, "loss": 0.05905609130859375, "step": 88045 }, { "epoch": 0.7613423143768753, "grad_norm": 0.18026931011843234, "learning_rate": 4.098489899395926e-06, "loss": 0.0444854736328125, "step": 88050 }, { "epoch": 0.7613855478984185, "grad_norm": 27.705358202896928, "learning_rate": 4.098300312836901e-06, "loss": 0.11858596801757812, "step": 88055 }, { "epoch": 0.7614287814199617, "grad_norm": 3.3155479330392947, "learning_rate": 4.098110721212613e-06, "loss": 0.45902328491210936, "step": 88060 }, { "epoch": 0.7614720149415051, "grad_norm": 6.96293750570868, "learning_rate": 4.097921124523933e-06, "loss": 0.11871414184570313, "step": 88065 }, { "epoch": 0.7615152484630483, "grad_norm": 7.83657920860511, "learning_rate": 4.09773152277174e-06, "loss": 0.050304412841796875, "step": 88070 }, { "epoch": 0.7615584819845915, "grad_norm": 20.36093726170591, "learning_rate": 4.097541915956906e-06, "loss": 0.2036958694458008, "step": 88075 }, { "epoch": 0.7616017155061349, "grad_norm": 5.20711079616148, "learning_rate": 4.097352304080304e-06, "loss": 0.2089752197265625, "step": 88080 }, { "epoch": 0.7616449490276781, "grad_norm": 25.233589296389987, "learning_rate": 4.0971626871428125e-06, "loss": 0.14832191467285155, "step": 88085 }, { "epoch": 0.7616881825492213, "grad_norm": 1.4803921732259202, "learning_rate": 4.096973065145302e-06, "loss": 0.20292892456054687, "step": 88090 }, { "epoch": 0.7617314160707647, "grad_norm": 30.654793698990545, "learning_rate": 4.0967834380886485e-06, "loss": 0.11517105102539063, "step": 88095 }, { "epoch": 0.7617746495923079, "grad_norm": 18.306501014065333, "learning_rate": 4.096593805973727e-06, "loss": 0.09654998779296875, "step": 88100 }, { "epoch": 0.7618178831138511, "grad_norm": 11.345081933220818, "learning_rate": 4.096404168801414e-06, "loss": 0.13851318359375, "step": 88105 }, { "epoch": 0.7618611166353945, "grad_norm": 1.397810290726582, "learning_rate": 4.096214526572578e-06, "loss": 0.1206146240234375, "step": 88110 }, { "epoch": 0.7619043501569377, "grad_norm": 19.894625334559013, "learning_rate": 4.0960248792881e-06, "loss": 0.12807445526123046, "step": 88115 }, { "epoch": 0.7619475836784809, "grad_norm": 10.118734012871618, "learning_rate": 4.095835226948852e-06, "loss": 0.45457763671875, "step": 88120 }, { "epoch": 0.7619908172000243, "grad_norm": 3.630736882154413, "learning_rate": 4.095645569555708e-06, "loss": 0.1465057373046875, "step": 88125 }, { "epoch": 0.7620340507215675, "grad_norm": 58.91631643975708, "learning_rate": 4.095455907109544e-06, "loss": 0.2608772277832031, "step": 88130 }, { "epoch": 0.7620772842431107, "grad_norm": 7.204833878790414, "learning_rate": 4.095266239611234e-06, "loss": 0.11872940063476563, "step": 88135 }, { "epoch": 0.762120517764654, "grad_norm": 0.938737968619357, "learning_rate": 4.095076567061653e-06, "loss": 0.0274658203125, "step": 88140 }, { "epoch": 0.7621637512861973, "grad_norm": 0.5782105284011337, "learning_rate": 4.094886889461675e-06, "loss": 0.3177825927734375, "step": 88145 }, { "epoch": 0.7622069848077405, "grad_norm": 18.7071138440466, "learning_rate": 4.094697206812175e-06, "loss": 0.10093612670898437, "step": 88150 }, { "epoch": 0.7622502183292837, "grad_norm": 8.89103623914524, "learning_rate": 4.0945075191140295e-06, "loss": 0.07777938842773438, "step": 88155 }, { "epoch": 0.7622934518508271, "grad_norm": 2.312646752699992, "learning_rate": 4.094317826368111e-06, "loss": 0.26107940673828123, "step": 88160 }, { "epoch": 0.7623366853723703, "grad_norm": 1.3264203919848607, "learning_rate": 4.094128128575296e-06, "loss": 0.23668441772460938, "step": 88165 }, { "epoch": 0.7623799188939135, "grad_norm": 11.1197743192118, "learning_rate": 4.093938425736459e-06, "loss": 0.23857498168945312, "step": 88170 }, { "epoch": 0.7624231524154569, "grad_norm": 2.8032020323389446, "learning_rate": 4.093748717852474e-06, "loss": 0.14988632202148439, "step": 88175 }, { "epoch": 0.7624663859370001, "grad_norm": 0.05472685793293271, "learning_rate": 4.093559004924217e-06, "loss": 0.17306938171386718, "step": 88180 }, { "epoch": 0.7625096194585433, "grad_norm": 48.17472555749264, "learning_rate": 4.093369286952563e-06, "loss": 0.5504280090332031, "step": 88185 }, { "epoch": 0.7625528529800867, "grad_norm": 0.047948152422368905, "learning_rate": 4.093179563938385e-06, "loss": 0.11340141296386719, "step": 88190 }, { "epoch": 0.7625960865016299, "grad_norm": 15.0113019284496, "learning_rate": 4.092989835882561e-06, "loss": 0.4784088134765625, "step": 88195 }, { "epoch": 0.7626393200231731, "grad_norm": 7.31004097249342, "learning_rate": 4.092800102785964e-06, "loss": 0.02872772216796875, "step": 88200 }, { "epoch": 0.7626825535447165, "grad_norm": 25.677290082233554, "learning_rate": 4.092610364649469e-06, "loss": 0.121392822265625, "step": 88205 }, { "epoch": 0.7627257870662597, "grad_norm": 16.667360252600997, "learning_rate": 4.0924206214739515e-06, "loss": 0.3907470703125, "step": 88210 }, { "epoch": 0.7627690205878029, "grad_norm": 4.1205774178401136, "learning_rate": 4.0922308732602865e-06, "loss": 0.203997802734375, "step": 88215 }, { "epoch": 0.7628122541093463, "grad_norm": 25.929873875036446, "learning_rate": 4.09204112000935e-06, "loss": 0.23928565979003907, "step": 88220 }, { "epoch": 0.7628554876308895, "grad_norm": 12.85074259867361, "learning_rate": 4.091851361722016e-06, "loss": 0.120513916015625, "step": 88225 }, { "epoch": 0.7628987211524327, "grad_norm": 4.131064812646353, "learning_rate": 4.091661598399161e-06, "loss": 0.33769378662109373, "step": 88230 }, { "epoch": 0.762941954673976, "grad_norm": 0.9887374570647189, "learning_rate": 4.091471830041657e-06, "loss": 0.208123779296875, "step": 88235 }, { "epoch": 0.7629851881955193, "grad_norm": 1.2144977439925775, "learning_rate": 4.091282056650383e-06, "loss": 0.12266082763671875, "step": 88240 }, { "epoch": 0.7630284217170625, "grad_norm": 26.5288890900108, "learning_rate": 4.091092278226212e-06, "loss": 0.2036651611328125, "step": 88245 }, { "epoch": 0.7630716552386058, "grad_norm": 1.1477511306165593, "learning_rate": 4.09090249477002e-06, "loss": 0.16252670288085938, "step": 88250 }, { "epoch": 0.7631148887601491, "grad_norm": 1.0042096623764938, "learning_rate": 4.090712706282681e-06, "loss": 0.25631027221679686, "step": 88255 }, { "epoch": 0.7631581222816923, "grad_norm": 1.9179715338829493, "learning_rate": 4.090522912765073e-06, "loss": 0.02263641357421875, "step": 88260 }, { "epoch": 0.7632013558032356, "grad_norm": 0.04466159983478387, "learning_rate": 4.090333114218069e-06, "loss": 0.17094078063964843, "step": 88265 }, { "epoch": 0.7632445893247789, "grad_norm": 2.512345315985398, "learning_rate": 4.090143310642545e-06, "loss": 0.1156158447265625, "step": 88270 }, { "epoch": 0.7632878228463221, "grad_norm": 0.4523114392767762, "learning_rate": 4.089953502039376e-06, "loss": 0.06665802001953125, "step": 88275 }, { "epoch": 0.7633310563678654, "grad_norm": 35.52014339979111, "learning_rate": 4.0897636884094386e-06, "loss": 0.1726470947265625, "step": 88280 }, { "epoch": 0.7633742898894087, "grad_norm": 13.556639373823037, "learning_rate": 4.089573869753607e-06, "loss": 0.20626506805419922, "step": 88285 }, { "epoch": 0.7634175234109519, "grad_norm": 1.0583721517506792, "learning_rate": 4.089384046072757e-06, "loss": 0.0501495361328125, "step": 88290 }, { "epoch": 0.7634607569324952, "grad_norm": 1.1060657897943003, "learning_rate": 4.089194217367763e-06, "loss": 0.37361679077148435, "step": 88295 }, { "epoch": 0.7635039904540385, "grad_norm": 0.8543765405969937, "learning_rate": 4.089004383639503e-06, "loss": 0.04571380615234375, "step": 88300 }, { "epoch": 0.7635472239755817, "grad_norm": 12.416954416378559, "learning_rate": 4.088814544888851e-06, "loss": 0.49027099609375, "step": 88305 }, { "epoch": 0.763590457497125, "grad_norm": 23.023468350203686, "learning_rate": 4.088624701116682e-06, "loss": 0.30175628662109377, "step": 88310 }, { "epoch": 0.7636336910186682, "grad_norm": 5.772129219392071, "learning_rate": 4.088434852323872e-06, "loss": 0.063592529296875, "step": 88315 }, { "epoch": 0.7636769245402115, "grad_norm": 23.47910091317815, "learning_rate": 4.088244998511296e-06, "loss": 0.1917510986328125, "step": 88320 }, { "epoch": 0.7637201580617548, "grad_norm": 0.6989253166063701, "learning_rate": 4.088055139679832e-06, "loss": 0.01125640869140625, "step": 88325 }, { "epoch": 0.763763391583298, "grad_norm": 22.3773018118117, "learning_rate": 4.087865275830352e-06, "loss": 0.239154052734375, "step": 88330 }, { "epoch": 0.7638066251048413, "grad_norm": 3.8295957467381223, "learning_rate": 4.087675406963734e-06, "loss": 0.16855621337890625, "step": 88335 }, { "epoch": 0.7638498586263845, "grad_norm": 10.25491710706629, "learning_rate": 4.0874855330808546e-06, "loss": 0.15001792907714845, "step": 88340 }, { "epoch": 0.7638930921479278, "grad_norm": 3.297502409254878, "learning_rate": 4.087295654182586e-06, "loss": 0.028066253662109374, "step": 88345 }, { "epoch": 0.7639363256694711, "grad_norm": 13.583235456127948, "learning_rate": 4.087105770269807e-06, "loss": 0.08200759887695312, "step": 88350 }, { "epoch": 0.7639795591910143, "grad_norm": 2.8084548893549703, "learning_rate": 4.086915881343392e-06, "loss": 0.1956512451171875, "step": 88355 }, { "epoch": 0.7640227927125576, "grad_norm": 16.82705870773304, "learning_rate": 4.086725987404217e-06, "loss": 0.06440887451171876, "step": 88360 }, { "epoch": 0.7640660262341009, "grad_norm": 4.198197082685626, "learning_rate": 4.086536088453158e-06, "loss": 0.20842208862304687, "step": 88365 }, { "epoch": 0.7641092597556441, "grad_norm": 9.189068791045985, "learning_rate": 4.08634618449109e-06, "loss": 0.111627197265625, "step": 88370 }, { "epoch": 0.7641524932771874, "grad_norm": 14.377681405987692, "learning_rate": 4.08615627551889e-06, "loss": 0.06991500854492187, "step": 88375 }, { "epoch": 0.7641957267987307, "grad_norm": 15.430427342075552, "learning_rate": 4.085966361537432e-06, "loss": 0.0946441650390625, "step": 88380 }, { "epoch": 0.7642389603202739, "grad_norm": 0.5839747594214372, "learning_rate": 4.085776442547595e-06, "loss": 0.0386688232421875, "step": 88385 }, { "epoch": 0.7642821938418172, "grad_norm": 13.697364259131582, "learning_rate": 4.085586518550252e-06, "loss": 0.34368743896484377, "step": 88390 }, { "epoch": 0.7643254273633605, "grad_norm": 2.9509589865029513, "learning_rate": 4.08539658954628e-06, "loss": 0.15479888916015624, "step": 88395 }, { "epoch": 0.7643686608849037, "grad_norm": 0.7832256505727672, "learning_rate": 4.085206655536554e-06, "loss": 0.046876144409179685, "step": 88400 }, { "epoch": 0.764411894406447, "grad_norm": 4.222565817385386, "learning_rate": 4.085016716521952e-06, "loss": 0.07853660583496094, "step": 88405 }, { "epoch": 0.7644551279279902, "grad_norm": 9.592700319212822, "learning_rate": 4.084826772503349e-06, "loss": 0.220513916015625, "step": 88410 }, { "epoch": 0.7644983614495335, "grad_norm": 52.70691124545338, "learning_rate": 4.084636823481619e-06, "loss": 0.2763916015625, "step": 88415 }, { "epoch": 0.7645415949710768, "grad_norm": 5.103617129061796, "learning_rate": 4.084446869457642e-06, "loss": 0.0603485107421875, "step": 88420 }, { "epoch": 0.76458482849262, "grad_norm": 9.63511041952545, "learning_rate": 4.084256910432291e-06, "loss": 0.27801361083984377, "step": 88425 }, { "epoch": 0.7646280620141633, "grad_norm": 22.540420703515455, "learning_rate": 4.0840669464064426e-06, "loss": 0.2219940185546875, "step": 88430 }, { "epoch": 0.7646712955357066, "grad_norm": 0.7116449641027637, "learning_rate": 4.083876977380972e-06, "loss": 0.06632270812988281, "step": 88435 }, { "epoch": 0.7647145290572498, "grad_norm": 6.622223383110608, "learning_rate": 4.083687003356759e-06, "loss": 0.159405517578125, "step": 88440 }, { "epoch": 0.7647577625787931, "grad_norm": 6.811804694472518, "learning_rate": 4.083497024334676e-06, "loss": 0.43880157470703124, "step": 88445 }, { "epoch": 0.7648009961003364, "grad_norm": 0.1823433594520553, "learning_rate": 4.083307040315601e-06, "loss": 0.03882293701171875, "step": 88450 }, { "epoch": 0.7648442296218796, "grad_norm": 23.902081469454483, "learning_rate": 4.083117051300409e-06, "loss": 0.2495880126953125, "step": 88455 }, { "epoch": 0.7648874631434229, "grad_norm": 6.54549460022931, "learning_rate": 4.082927057289977e-06, "loss": 0.06724357604980469, "step": 88460 }, { "epoch": 0.7649306966649662, "grad_norm": 0.22568586258779894, "learning_rate": 4.082737058285181e-06, "loss": 0.3123291015625, "step": 88465 }, { "epoch": 0.7649739301865094, "grad_norm": 0.26688101830009386, "learning_rate": 4.082547054286899e-06, "loss": 0.0333648681640625, "step": 88470 }, { "epoch": 0.7650171637080527, "grad_norm": 2.2707968686449016, "learning_rate": 4.082357045296003e-06, "loss": 0.12317008972167968, "step": 88475 }, { "epoch": 0.765060397229596, "grad_norm": 7.894526285220393, "learning_rate": 4.082167031313373e-06, "loss": 0.07462654113769532, "step": 88480 }, { "epoch": 0.7651036307511392, "grad_norm": 9.63688045407826, "learning_rate": 4.081977012339886e-06, "loss": 0.03615264892578125, "step": 88485 }, { "epoch": 0.7651468642726824, "grad_norm": 6.865884601413144, "learning_rate": 4.081786988376414e-06, "loss": 0.059112548828125, "step": 88490 }, { "epoch": 0.7651900977942258, "grad_norm": 6.810304550027398, "learning_rate": 4.081596959423836e-06, "loss": 0.11797103881835938, "step": 88495 }, { "epoch": 0.765233331315769, "grad_norm": 15.033456475008395, "learning_rate": 4.081406925483031e-06, "loss": 0.06833877563476562, "step": 88500 }, { "epoch": 0.7652765648373122, "grad_norm": 3.6975996051656805, "learning_rate": 4.0812168865548714e-06, "loss": 0.04427623748779297, "step": 88505 }, { "epoch": 0.7653197983588556, "grad_norm": 1.8108756965613413, "learning_rate": 4.081026842640235e-06, "loss": 0.1518646240234375, "step": 88510 }, { "epoch": 0.7653630318803988, "grad_norm": 8.148736238385633, "learning_rate": 4.0808367937399975e-06, "loss": 0.143316650390625, "step": 88515 }, { "epoch": 0.765406265401942, "grad_norm": 25.45441736685452, "learning_rate": 4.080646739855038e-06, "loss": 0.4404327392578125, "step": 88520 }, { "epoch": 0.7654494989234853, "grad_norm": 0.2719219055727268, "learning_rate": 4.08045668098623e-06, "loss": 0.058481597900390626, "step": 88525 }, { "epoch": 0.7654927324450286, "grad_norm": 7.200652462181781, "learning_rate": 4.080266617134451e-06, "loss": 0.03913459777832031, "step": 88530 }, { "epoch": 0.7655359659665718, "grad_norm": 10.887944934740894, "learning_rate": 4.080076548300579e-06, "loss": 0.43805389404296874, "step": 88535 }, { "epoch": 0.7655791994881151, "grad_norm": 0.5122324000720735, "learning_rate": 4.079886474485488e-06, "loss": 0.026507568359375, "step": 88540 }, { "epoch": 0.7656224330096584, "grad_norm": 1.5256424166513523, "learning_rate": 4.079696395690056e-06, "loss": 0.0793304443359375, "step": 88545 }, { "epoch": 0.7656656665312016, "grad_norm": 4.293838467329688, "learning_rate": 4.079506311915162e-06, "loss": 0.19877243041992188, "step": 88550 }, { "epoch": 0.765708900052745, "grad_norm": 8.820863889892257, "learning_rate": 4.0793162231616776e-06, "loss": 0.1054962158203125, "step": 88555 }, { "epoch": 0.7657521335742882, "grad_norm": 3.7830184044743413, "learning_rate": 4.079126129430484e-06, "loss": 0.29591522216796873, "step": 88560 }, { "epoch": 0.7657953670958314, "grad_norm": 3.3599506293816526, "learning_rate": 4.078936030722455e-06, "loss": 0.070697021484375, "step": 88565 }, { "epoch": 0.7658386006173747, "grad_norm": 0.7774471665652, "learning_rate": 4.078745927038469e-06, "loss": 0.6363418579101563, "step": 88570 }, { "epoch": 0.765881834138918, "grad_norm": 24.15832978297043, "learning_rate": 4.0785558183794e-06, "loss": 0.2882511138916016, "step": 88575 }, { "epoch": 0.7659250676604612, "grad_norm": 0.51887695317654, "learning_rate": 4.07836570474613e-06, "loss": 0.1802398681640625, "step": 88580 }, { "epoch": 0.7659683011820044, "grad_norm": 3.70422776362004, "learning_rate": 4.078175586139531e-06, "loss": 0.1060546875, "step": 88585 }, { "epoch": 0.7660115347035478, "grad_norm": 3.8316765277700715, "learning_rate": 4.0779854625604814e-06, "loss": 0.08932609558105468, "step": 88590 }, { "epoch": 0.766054768225091, "grad_norm": 3.666254818165526, "learning_rate": 4.077795334009859e-06, "loss": 0.5143692016601562, "step": 88595 }, { "epoch": 0.7660980017466342, "grad_norm": 2.982577550202131, "learning_rate": 4.077605200488538e-06, "loss": 0.235986328125, "step": 88600 }, { "epoch": 0.7661412352681776, "grad_norm": 9.429647004692216, "learning_rate": 4.077415061997399e-06, "loss": 0.05883331298828125, "step": 88605 }, { "epoch": 0.7661844687897208, "grad_norm": 1.3180932239689516, "learning_rate": 4.077224918537316e-06, "loss": 0.038698577880859376, "step": 88610 }, { "epoch": 0.766227702311264, "grad_norm": 4.386876535955838, "learning_rate": 4.077034770109167e-06, "loss": 0.03957939147949219, "step": 88615 }, { "epoch": 0.7662709358328074, "grad_norm": 18.84361513503119, "learning_rate": 4.076844616713829e-06, "loss": 0.07990226745605469, "step": 88620 }, { "epoch": 0.7663141693543506, "grad_norm": 2.6599235665501126, "learning_rate": 4.0766544583521786e-06, "loss": 0.09790802001953125, "step": 88625 }, { "epoch": 0.7663574028758938, "grad_norm": 13.98465304444216, "learning_rate": 4.076464295025093e-06, "loss": 0.08612747192382812, "step": 88630 }, { "epoch": 0.7664006363974372, "grad_norm": 0.451712621273286, "learning_rate": 4.076274126733448e-06, "loss": 0.20446243286132812, "step": 88635 }, { "epoch": 0.7664438699189804, "grad_norm": 9.540176509660041, "learning_rate": 4.076083953478123e-06, "loss": 0.21952667236328124, "step": 88640 }, { "epoch": 0.7664871034405236, "grad_norm": 7.716946879912023, "learning_rate": 4.075893775259994e-06, "loss": 0.0956451416015625, "step": 88645 }, { "epoch": 0.766530336962067, "grad_norm": 2.060944606217714, "learning_rate": 4.075703592079938e-06, "loss": 0.11698036193847657, "step": 88650 }, { "epoch": 0.7665735704836102, "grad_norm": 1.4036622672688857, "learning_rate": 4.0755134039388325e-06, "loss": 0.10289306640625, "step": 88655 }, { "epoch": 0.7666168040051534, "grad_norm": 2.9264483681666267, "learning_rate": 4.075323210837553e-06, "loss": 0.11362800598144532, "step": 88660 }, { "epoch": 0.7666600375266966, "grad_norm": 0.16484105333640145, "learning_rate": 4.075133012776979e-06, "loss": 0.08588104248046875, "step": 88665 }, { "epoch": 0.76670327104824, "grad_norm": 70.56549973503687, "learning_rate": 4.0749428097579855e-06, "loss": 0.34661102294921875, "step": 88670 }, { "epoch": 0.7667465045697832, "grad_norm": 10.879324182568608, "learning_rate": 4.074752601781451e-06, "loss": 0.06640892028808594, "step": 88675 }, { "epoch": 0.7667897380913264, "grad_norm": 15.043220665578678, "learning_rate": 4.074562388848254e-06, "loss": 0.2690338134765625, "step": 88680 }, { "epoch": 0.7668329716128698, "grad_norm": 22.254717460329736, "learning_rate": 4.074372170959268e-06, "loss": 0.3881988525390625, "step": 88685 }, { "epoch": 0.766876205134413, "grad_norm": 4.298740266267062, "learning_rate": 4.0741819481153736e-06, "loss": 0.18646469116210937, "step": 88690 }, { "epoch": 0.7669194386559562, "grad_norm": 3.523940179607365, "learning_rate": 4.073991720317447e-06, "loss": 0.04113922119140625, "step": 88695 }, { "epoch": 0.7669626721774996, "grad_norm": 3.7030639001331407, "learning_rate": 4.073801487566365e-06, "loss": 0.07197151184082032, "step": 88700 }, { "epoch": 0.7670059056990428, "grad_norm": 1.2705457671142182, "learning_rate": 4.073611249863005e-06, "loss": 0.1175323486328125, "step": 88705 }, { "epoch": 0.767049139220586, "grad_norm": 0.821921961526278, "learning_rate": 4.073421007208247e-06, "loss": 0.20909194946289061, "step": 88710 }, { "epoch": 0.7670923727421294, "grad_norm": 45.53459360630102, "learning_rate": 4.0732307596029635e-06, "loss": 0.5385696411132812, "step": 88715 }, { "epoch": 0.7671356062636726, "grad_norm": 11.333787240803852, "learning_rate": 4.073040507048036e-06, "loss": 0.2838565826416016, "step": 88720 }, { "epoch": 0.7671788397852158, "grad_norm": 0.6332459250677049, "learning_rate": 4.0728502495443405e-06, "loss": 0.052356719970703125, "step": 88725 }, { "epoch": 0.7672220733067592, "grad_norm": 0.8242145741852579, "learning_rate": 4.072659987092753e-06, "loss": 0.16197547912597657, "step": 88730 }, { "epoch": 0.7672653068283024, "grad_norm": 0.5400451839729956, "learning_rate": 4.072469719694155e-06, "loss": 0.10502052307128906, "step": 88735 }, { "epoch": 0.7673085403498456, "grad_norm": 9.648511815718722, "learning_rate": 4.072279447349419e-06, "loss": 0.27523651123046877, "step": 88740 }, { "epoch": 0.7673517738713889, "grad_norm": 0.33754444744405127, "learning_rate": 4.072089170059426e-06, "loss": 0.019792938232421876, "step": 88745 }, { "epoch": 0.7673950073929322, "grad_norm": 2.7901568638805636, "learning_rate": 4.071898887825052e-06, "loss": 0.08518199920654297, "step": 88750 }, { "epoch": 0.7674382409144754, "grad_norm": 5.903747070781686, "learning_rate": 4.0717086006471766e-06, "loss": 0.08018951416015625, "step": 88755 }, { "epoch": 0.7674814744360187, "grad_norm": 56.9880419972909, "learning_rate": 4.071518308526675e-06, "loss": 0.3866889953613281, "step": 88760 }, { "epoch": 0.767524707957562, "grad_norm": 3.9381929769013877, "learning_rate": 4.071328011464424e-06, "loss": 0.14537353515625, "step": 88765 }, { "epoch": 0.7675679414791052, "grad_norm": 3.4489850118667906, "learning_rate": 4.071137709461305e-06, "loss": 0.2313446044921875, "step": 88770 }, { "epoch": 0.7676111750006485, "grad_norm": 5.302392699647011, "learning_rate": 4.070947402518193e-06, "loss": 0.27090911865234374, "step": 88775 }, { "epoch": 0.7676544085221918, "grad_norm": 41.067038013649075, "learning_rate": 4.070757090635965e-06, "loss": 0.2809356689453125, "step": 88780 }, { "epoch": 0.767697642043735, "grad_norm": 2.3434282318305604, "learning_rate": 4.070566773815501e-06, "loss": 0.15230865478515626, "step": 88785 }, { "epoch": 0.7677408755652783, "grad_norm": 2.0888211139492574, "learning_rate": 4.0703764520576784e-06, "loss": 0.09104232788085938, "step": 88790 }, { "epoch": 0.7677841090868216, "grad_norm": 1.0393515905285853, "learning_rate": 4.070186125363373e-06, "loss": 0.11739540100097656, "step": 88795 }, { "epoch": 0.7678273426083648, "grad_norm": 1.7329122440826663, "learning_rate": 4.069995793733464e-06, "loss": 0.13266448974609374, "step": 88800 }, { "epoch": 0.7678705761299081, "grad_norm": 8.790055236034934, "learning_rate": 4.069805457168829e-06, "loss": 0.06142120361328125, "step": 88805 }, { "epoch": 0.7679138096514514, "grad_norm": 19.090453529539328, "learning_rate": 4.069615115670346e-06, "loss": 0.03917999267578125, "step": 88810 }, { "epoch": 0.7679570431729946, "grad_norm": 21.593239933797697, "learning_rate": 4.069424769238892e-06, "loss": 0.225714111328125, "step": 88815 }, { "epoch": 0.7680002766945379, "grad_norm": 0.505743659025484, "learning_rate": 4.069234417875346e-06, "loss": 0.0912240982055664, "step": 88820 }, { "epoch": 0.7680435102160812, "grad_norm": 1.0410751888563732, "learning_rate": 4.069044061580585e-06, "loss": 0.09217529296875, "step": 88825 }, { "epoch": 0.7680867437376244, "grad_norm": 20.875681954389634, "learning_rate": 4.068853700355487e-06, "loss": 0.34829254150390626, "step": 88830 }, { "epoch": 0.7681299772591677, "grad_norm": 61.886971578677105, "learning_rate": 4.0686633342009305e-06, "loss": 0.323345947265625, "step": 88835 }, { "epoch": 0.7681732107807109, "grad_norm": 8.357360528412139, "learning_rate": 4.068472963117794e-06, "loss": 0.0918670654296875, "step": 88840 }, { "epoch": 0.7682164443022542, "grad_norm": 0.484329014935575, "learning_rate": 4.068282587106953e-06, "loss": 0.07001113891601562, "step": 88845 }, { "epoch": 0.7682596778237974, "grad_norm": 3.8549070042320612, "learning_rate": 4.068092206169289e-06, "loss": 0.16617202758789062, "step": 88850 }, { "epoch": 0.7683029113453407, "grad_norm": 0.31573677940520073, "learning_rate": 4.067901820305676e-06, "loss": 0.3488361358642578, "step": 88855 }, { "epoch": 0.768346144866884, "grad_norm": 11.970281377206241, "learning_rate": 4.067711429516995e-06, "loss": 0.08647842407226562, "step": 88860 }, { "epoch": 0.7683893783884272, "grad_norm": 2.8474887221811462, "learning_rate": 4.067521033804124e-06, "loss": 0.13642349243164062, "step": 88865 }, { "epoch": 0.7684326119099705, "grad_norm": 7.8240421003329494, "learning_rate": 4.06733063316794e-06, "loss": 0.09572677612304688, "step": 88870 }, { "epoch": 0.7684758454315138, "grad_norm": 2.0089954740671723, "learning_rate": 4.067140227609321e-06, "loss": 0.1959014892578125, "step": 88875 }, { "epoch": 0.768519078953057, "grad_norm": 1.8439998574045229, "learning_rate": 4.066949817129146e-06, "loss": 0.16513099670410156, "step": 88880 }, { "epoch": 0.7685623124746003, "grad_norm": 9.681202933898081, "learning_rate": 4.066759401728292e-06, "loss": 0.29974365234375, "step": 88885 }, { "epoch": 0.7686055459961436, "grad_norm": 6.116667800153793, "learning_rate": 4.066568981407638e-06, "loss": 0.12253303527832031, "step": 88890 }, { "epoch": 0.7686487795176868, "grad_norm": 53.955323783659814, "learning_rate": 4.0663785561680635e-06, "loss": 0.28920440673828124, "step": 88895 }, { "epoch": 0.7686920130392301, "grad_norm": 29.38039922292704, "learning_rate": 4.066188126010444e-06, "loss": 0.2875640869140625, "step": 88900 }, { "epoch": 0.7687352465607734, "grad_norm": 0.06890283773420544, "learning_rate": 4.065997690935659e-06, "loss": 0.11976737976074218, "step": 88905 }, { "epoch": 0.7687784800823166, "grad_norm": 1.0532519504819402, "learning_rate": 4.065807250944587e-06, "loss": 0.052575302124023435, "step": 88910 }, { "epoch": 0.7688217136038599, "grad_norm": 20.909069070665144, "learning_rate": 4.065616806038106e-06, "loss": 0.09281349182128906, "step": 88915 }, { "epoch": 0.7688649471254031, "grad_norm": 7.0344289102834585, "learning_rate": 4.065426356217096e-06, "loss": 0.30728683471679685, "step": 88920 }, { "epoch": 0.7689081806469464, "grad_norm": 0.07101874564593313, "learning_rate": 4.065235901482432e-06, "loss": 0.19368972778320312, "step": 88925 }, { "epoch": 0.7689514141684897, "grad_norm": 1.3523786139315617, "learning_rate": 4.065045441834995e-06, "loss": 0.02066497802734375, "step": 88930 }, { "epoch": 0.7689946476900329, "grad_norm": 29.708022539863105, "learning_rate": 4.064854977275662e-06, "loss": 0.28450736999511717, "step": 88935 }, { "epoch": 0.7690378812115762, "grad_norm": 1.2531955990764825, "learning_rate": 4.064664507805312e-06, "loss": 0.06057815551757813, "step": 88940 }, { "epoch": 0.7690811147331195, "grad_norm": 2.648166639248711, "learning_rate": 4.064474033424824e-06, "loss": 0.053461456298828126, "step": 88945 }, { "epoch": 0.7691243482546627, "grad_norm": 1.765610309414859, "learning_rate": 4.064283554135076e-06, "loss": 0.2899810791015625, "step": 88950 }, { "epoch": 0.769167581776206, "grad_norm": 0.08895257500691489, "learning_rate": 4.0640930699369446e-06, "loss": 0.11270523071289062, "step": 88955 }, { "epoch": 0.7692108152977493, "grad_norm": 2.334609031774952, "learning_rate": 4.063902580831312e-06, "loss": 0.06326446533203126, "step": 88960 }, { "epoch": 0.7692540488192925, "grad_norm": 22.948190781791826, "learning_rate": 4.0637120868190535e-06, "loss": 0.13563690185546876, "step": 88965 }, { "epoch": 0.7692972823408358, "grad_norm": 5.648767070796486, "learning_rate": 4.063521587901048e-06, "loss": 0.13037109375, "step": 88970 }, { "epoch": 0.7693405158623791, "grad_norm": 2.4772550493606253, "learning_rate": 4.063331084078176e-06, "loss": 0.21497802734375, "step": 88975 }, { "epoch": 0.7693837493839223, "grad_norm": 11.401300511951234, "learning_rate": 4.063140575351316e-06, "loss": 0.1205596923828125, "step": 88980 }, { "epoch": 0.7694269829054656, "grad_norm": 8.742344971959763, "learning_rate": 4.062950061721344e-06, "loss": 0.1582763671875, "step": 88985 }, { "epoch": 0.7694702164270089, "grad_norm": 6.96381776795118, "learning_rate": 4.062759543189141e-06, "loss": 0.0802978515625, "step": 88990 }, { "epoch": 0.7695134499485521, "grad_norm": 17.37366851098706, "learning_rate": 4.062569019755585e-06, "loss": 0.24535369873046875, "step": 88995 }, { "epoch": 0.7695566834700954, "grad_norm": 11.929182507880526, "learning_rate": 4.062378491421554e-06, "loss": 0.10734100341796875, "step": 89000 }, { "epoch": 0.7695999169916387, "grad_norm": 6.547497724376033, "learning_rate": 4.062187958187927e-06, "loss": 0.17220458984375, "step": 89005 }, { "epoch": 0.7696431505131819, "grad_norm": 1.0849474655144475, "learning_rate": 4.061997420055585e-06, "loss": 0.33078460693359374, "step": 89010 }, { "epoch": 0.7696863840347251, "grad_norm": 1.2528022214757508, "learning_rate": 4.061806877025403e-06, "loss": 0.128704833984375, "step": 89015 }, { "epoch": 0.7697296175562685, "grad_norm": 0.1663511800272005, "learning_rate": 4.061616329098262e-06, "loss": 0.13615875244140624, "step": 89020 }, { "epoch": 0.7697728510778117, "grad_norm": 7.100897270237209, "learning_rate": 4.061425776275039e-06, "loss": 0.058075332641601564, "step": 89025 }, { "epoch": 0.7698160845993549, "grad_norm": 21.964201325932574, "learning_rate": 4.0612352185566165e-06, "loss": 0.17750778198242187, "step": 89030 }, { "epoch": 0.7698593181208983, "grad_norm": 5.248728618147769, "learning_rate": 4.06104465594387e-06, "loss": 0.147088623046875, "step": 89035 }, { "epoch": 0.7699025516424415, "grad_norm": 25.06521169700159, "learning_rate": 4.060854088437679e-06, "loss": 0.21502265930175782, "step": 89040 }, { "epoch": 0.7699457851639847, "grad_norm": 1.8556328973406047, "learning_rate": 4.060663516038922e-06, "loss": 0.04215316772460938, "step": 89045 }, { "epoch": 0.769989018685528, "grad_norm": 8.363957695184244, "learning_rate": 4.060472938748479e-06, "loss": 0.05966644287109375, "step": 89050 }, { "epoch": 0.7700322522070713, "grad_norm": 10.091681310004331, "learning_rate": 4.060282356567228e-06, "loss": 0.1268035888671875, "step": 89055 }, { "epoch": 0.7700754857286145, "grad_norm": 12.430198138274754, "learning_rate": 4.06009176949605e-06, "loss": 0.1457122802734375, "step": 89060 }, { "epoch": 0.7701187192501578, "grad_norm": 3.9287439696470203, "learning_rate": 4.059901177535821e-06, "loss": 0.10858612060546875, "step": 89065 }, { "epoch": 0.7701619527717011, "grad_norm": 0.3935550174158176, "learning_rate": 4.059710580687423e-06, "loss": 0.062245750427246095, "step": 89070 }, { "epoch": 0.7702051862932443, "grad_norm": 3.4652224237399296, "learning_rate": 4.059519978951733e-06, "loss": 0.18287429809570313, "step": 89075 }, { "epoch": 0.7702484198147876, "grad_norm": 1.735072023155438, "learning_rate": 4.059329372329629e-06, "loss": 0.43524169921875, "step": 89080 }, { "epoch": 0.7702916533363309, "grad_norm": 0.12408812028463778, "learning_rate": 4.059138760821993e-06, "loss": 0.154656982421875, "step": 89085 }, { "epoch": 0.7703348868578741, "grad_norm": 2.444557444719117, "learning_rate": 4.058948144429702e-06, "loss": 0.0528533935546875, "step": 89090 }, { "epoch": 0.7703781203794173, "grad_norm": 8.926060016156582, "learning_rate": 4.058757523153637e-06, "loss": 0.1529205322265625, "step": 89095 }, { "epoch": 0.7704213539009607, "grad_norm": 30.97960745809664, "learning_rate": 4.058566896994674e-06, "loss": 0.12233161926269531, "step": 89100 }, { "epoch": 0.7704645874225039, "grad_norm": 43.286417748582835, "learning_rate": 4.058376265953695e-06, "loss": 0.26270294189453125, "step": 89105 }, { "epoch": 0.7705078209440471, "grad_norm": 4.734321840157398, "learning_rate": 4.0581856300315775e-06, "loss": 0.10637969970703125, "step": 89110 }, { "epoch": 0.7705510544655905, "grad_norm": 7.9428306276483855, "learning_rate": 4.0579949892292015e-06, "loss": 0.0763641357421875, "step": 89115 }, { "epoch": 0.7705942879871337, "grad_norm": 60.85277092620337, "learning_rate": 4.057804343547446e-06, "loss": 0.2989166259765625, "step": 89120 }, { "epoch": 0.7706375215086769, "grad_norm": 3.078818042269713, "learning_rate": 4.0576136929871915e-06, "loss": 0.2439697265625, "step": 89125 }, { "epoch": 0.7706807550302203, "grad_norm": 8.714254779571935, "learning_rate": 4.057423037549315e-06, "loss": 0.2098252296447754, "step": 89130 }, { "epoch": 0.7707239885517635, "grad_norm": 7.131845745507535, "learning_rate": 4.057232377234697e-06, "loss": 0.04615707397460937, "step": 89135 }, { "epoch": 0.7707672220733067, "grad_norm": 2.457008417886658, "learning_rate": 4.057041712044217e-06, "loss": 0.138262939453125, "step": 89140 }, { "epoch": 0.7708104555948501, "grad_norm": 4.178373824110165, "learning_rate": 4.056851041978754e-06, "loss": 0.12503509521484374, "step": 89145 }, { "epoch": 0.7708536891163933, "grad_norm": 2.533114875822806, "learning_rate": 4.0566603670391875e-06, "loss": 0.3158290863037109, "step": 89150 }, { "epoch": 0.7708969226379365, "grad_norm": 39.934760854690495, "learning_rate": 4.056469687226397e-06, "loss": 0.1641326904296875, "step": 89155 }, { "epoch": 0.7709401561594799, "grad_norm": 45.053640082156, "learning_rate": 4.05627900254126e-06, "loss": 0.4195526123046875, "step": 89160 }, { "epoch": 0.7709833896810231, "grad_norm": 24.22945496021944, "learning_rate": 4.05608831298466e-06, "loss": 0.3908817291259766, "step": 89165 }, { "epoch": 0.7710266232025663, "grad_norm": 3.477527299357147, "learning_rate": 4.055897618557472e-06, "loss": 0.0916259765625, "step": 89170 }, { "epoch": 0.7710698567241097, "grad_norm": 8.731346337855905, "learning_rate": 4.055706919260578e-06, "loss": 0.07755126953125, "step": 89175 }, { "epoch": 0.7711130902456529, "grad_norm": 0.045420730935175894, "learning_rate": 4.055516215094858e-06, "loss": 0.016263389587402345, "step": 89180 }, { "epoch": 0.7711563237671961, "grad_norm": 5.206502090921397, "learning_rate": 4.0553255060611904e-06, "loss": 0.0439300537109375, "step": 89185 }, { "epoch": 0.7711995572887393, "grad_norm": 4.372918044921832, "learning_rate": 4.055134792160454e-06, "loss": 0.026497650146484374, "step": 89190 }, { "epoch": 0.7712427908102827, "grad_norm": 2.973387617889132, "learning_rate": 4.054944073393529e-06, "loss": 0.1108062744140625, "step": 89195 }, { "epoch": 0.7712860243318259, "grad_norm": 24.100701599410545, "learning_rate": 4.054753349761295e-06, "loss": 0.44046669006347655, "step": 89200 }, { "epoch": 0.7713292578533691, "grad_norm": 5.823376485936311, "learning_rate": 4.054562621264633e-06, "loss": 0.0376373291015625, "step": 89205 }, { "epoch": 0.7713724913749125, "grad_norm": 26.92169696123405, "learning_rate": 4.05437188790442e-06, "loss": 0.247998046875, "step": 89210 }, { "epoch": 0.7714157248964557, "grad_norm": 17.200180022932948, "learning_rate": 4.054181149681538e-06, "loss": 0.08901824951171874, "step": 89215 }, { "epoch": 0.7714589584179989, "grad_norm": 26.347322584782155, "learning_rate": 4.053990406596866e-06, "loss": 0.09766845703125, "step": 89220 }, { "epoch": 0.7715021919395423, "grad_norm": 32.309872871696875, "learning_rate": 4.053799658651281e-06, "loss": 0.49254150390625, "step": 89225 }, { "epoch": 0.7715454254610855, "grad_norm": 5.262234795827126, "learning_rate": 4.0536089058456676e-06, "loss": 0.2433563232421875, "step": 89230 }, { "epoch": 0.7715886589826287, "grad_norm": 6.085850887211275, "learning_rate": 4.053418148180903e-06, "loss": 0.04717559814453125, "step": 89235 }, { "epoch": 0.7716318925041721, "grad_norm": 3.5987850036381674, "learning_rate": 4.053227385657865e-06, "loss": 0.2049652099609375, "step": 89240 }, { "epoch": 0.7716751260257153, "grad_norm": 9.17152838588526, "learning_rate": 4.053036618277438e-06, "loss": 0.12301177978515625, "step": 89245 }, { "epoch": 0.7717183595472585, "grad_norm": 4.272214850528067, "learning_rate": 4.052845846040497e-06, "loss": 0.1455169677734375, "step": 89250 }, { "epoch": 0.7717615930688019, "grad_norm": 4.1695394781855075, "learning_rate": 4.052655068947924e-06, "loss": 0.08811569213867188, "step": 89255 }, { "epoch": 0.7718048265903451, "grad_norm": 11.128187206724142, "learning_rate": 4.0524642870006e-06, "loss": 0.17962703704833985, "step": 89260 }, { "epoch": 0.7718480601118883, "grad_norm": 25.68175858318674, "learning_rate": 4.0522735001994036e-06, "loss": 0.19988861083984374, "step": 89265 }, { "epoch": 0.7718912936334316, "grad_norm": 1.0813378523335322, "learning_rate": 4.052082708545215e-06, "loss": 0.09633522033691407, "step": 89270 }, { "epoch": 0.7719345271549749, "grad_norm": 4.97149878928036, "learning_rate": 4.051891912038912e-06, "loss": 0.06049957275390625, "step": 89275 }, { "epoch": 0.7719777606765181, "grad_norm": 0.9330174628829386, "learning_rate": 4.051701110681378e-06, "loss": 0.1423095703125, "step": 89280 }, { "epoch": 0.7720209941980614, "grad_norm": 25.232979216610175, "learning_rate": 4.051510304473492e-06, "loss": 0.15855331420898439, "step": 89285 }, { "epoch": 0.7720642277196047, "grad_norm": 2.1190626759451914, "learning_rate": 4.051319493416132e-06, "loss": 0.03086700439453125, "step": 89290 }, { "epoch": 0.7721074612411479, "grad_norm": 3.390526921921936, "learning_rate": 4.0511286775101804e-06, "loss": 0.0373870849609375, "step": 89295 }, { "epoch": 0.7721506947626912, "grad_norm": 32.38842096707518, "learning_rate": 4.050937856756516e-06, "loss": 0.20147781372070311, "step": 89300 }, { "epoch": 0.7721939282842345, "grad_norm": 34.62595397969164, "learning_rate": 4.0507470311560184e-06, "loss": 0.19531421661376952, "step": 89305 }, { "epoch": 0.7722371618057777, "grad_norm": 2.4959359329893984, "learning_rate": 4.050556200709569e-06, "loss": 0.17042617797851561, "step": 89310 }, { "epoch": 0.772280395327321, "grad_norm": 2.2632204571500862, "learning_rate": 4.050365365418047e-06, "loss": 0.20966339111328125, "step": 89315 }, { "epoch": 0.7723236288488643, "grad_norm": 0.7996620221192157, "learning_rate": 4.050174525282333e-06, "loss": 0.22619895935058593, "step": 89320 }, { "epoch": 0.7723668623704075, "grad_norm": 4.3399320043444725, "learning_rate": 4.049983680303307e-06, "loss": 0.04087905883789063, "step": 89325 }, { "epoch": 0.7724100958919508, "grad_norm": 4.249110983878906, "learning_rate": 4.049792830481849e-06, "loss": 0.22786903381347656, "step": 89330 }, { "epoch": 0.7724533294134941, "grad_norm": 13.326583639726946, "learning_rate": 4.049601975818838e-06, "loss": 0.10020599365234376, "step": 89335 }, { "epoch": 0.7724965629350373, "grad_norm": 6.981619175032935, "learning_rate": 4.049411116315157e-06, "loss": 0.09132957458496094, "step": 89340 }, { "epoch": 0.7725397964565806, "grad_norm": 27.898079628915724, "learning_rate": 4.049220251971684e-06, "loss": 0.3253021240234375, "step": 89345 }, { "epoch": 0.7725830299781239, "grad_norm": 26.392037319856062, "learning_rate": 4.0490293827893e-06, "loss": 0.19853878021240234, "step": 89350 }, { "epoch": 0.7726262634996671, "grad_norm": 16.20365405587738, "learning_rate": 4.0488385087688844e-06, "loss": 0.095086669921875, "step": 89355 }, { "epoch": 0.7726694970212103, "grad_norm": 0.3359048816687715, "learning_rate": 4.04864762991132e-06, "loss": 0.1130828857421875, "step": 89360 }, { "epoch": 0.7727127305427536, "grad_norm": 1.750505387637951, "learning_rate": 4.048456746217484e-06, "loss": 0.16826324462890624, "step": 89365 }, { "epoch": 0.7727559640642969, "grad_norm": 0.16892942724338297, "learning_rate": 4.048265857688258e-06, "loss": 0.12897815704345703, "step": 89370 }, { "epoch": 0.7727991975858401, "grad_norm": 13.586919856655385, "learning_rate": 4.048074964324523e-06, "loss": 0.20617475509643554, "step": 89375 }, { "epoch": 0.7728424311073834, "grad_norm": 8.662584765088242, "learning_rate": 4.04788406612716e-06, "loss": 0.238134765625, "step": 89380 }, { "epoch": 0.7728856646289267, "grad_norm": 0.052479397731685765, "learning_rate": 4.047693163097047e-06, "loss": 0.14777145385742188, "step": 89385 }, { "epoch": 0.77292889815047, "grad_norm": 16.481770866333964, "learning_rate": 4.047502255235065e-06, "loss": 0.12277069091796874, "step": 89390 }, { "epoch": 0.7729721316720132, "grad_norm": 5.049183273659607, "learning_rate": 4.047311342542096e-06, "loss": 0.049869537353515625, "step": 89395 }, { "epoch": 0.7730153651935565, "grad_norm": 1.0732173230548883, "learning_rate": 4.04712042501902e-06, "loss": 0.06610260009765626, "step": 89400 }, { "epoch": 0.7730585987150997, "grad_norm": 50.00787290048067, "learning_rate": 4.046929502666717e-06, "loss": 0.26668720245361327, "step": 89405 }, { "epoch": 0.773101832236643, "grad_norm": 1.9059837488786677, "learning_rate": 4.046738575486069e-06, "loss": 0.1215484619140625, "step": 89410 }, { "epoch": 0.7731450657581863, "grad_norm": 0.9592551318426336, "learning_rate": 4.046547643477952e-06, "loss": 0.04926166534423828, "step": 89415 }, { "epoch": 0.7731882992797295, "grad_norm": 0.07034366008447182, "learning_rate": 4.0463567066432514e-06, "loss": 0.19003219604492189, "step": 89420 }, { "epoch": 0.7732315328012728, "grad_norm": 0.9914744251720683, "learning_rate": 4.0461657649828464e-06, "loss": 0.17730712890625, "step": 89425 }, { "epoch": 0.7732747663228161, "grad_norm": 0.1788276186187289, "learning_rate": 4.045974818497616e-06, "loss": 0.04573822021484375, "step": 89430 }, { "epoch": 0.7733179998443593, "grad_norm": 0.22065654001032337, "learning_rate": 4.045783867188444e-06, "loss": 0.12324447631835937, "step": 89435 }, { "epoch": 0.7733612333659026, "grad_norm": 0.3986677273278298, "learning_rate": 4.045592911056209e-06, "loss": 0.08403396606445312, "step": 89440 }, { "epoch": 0.7734044668874458, "grad_norm": 12.819917437816471, "learning_rate": 4.04540195010179e-06, "loss": 0.110888671875, "step": 89445 }, { "epoch": 0.7734477004089891, "grad_norm": 57.90710094836365, "learning_rate": 4.045210984326071e-06, "loss": 0.177593994140625, "step": 89450 }, { "epoch": 0.7734909339305324, "grad_norm": 1.8094769456863857, "learning_rate": 4.045020013729931e-06, "loss": 0.2259613037109375, "step": 89455 }, { "epoch": 0.7735341674520756, "grad_norm": 17.631899411240443, "learning_rate": 4.044829038314252e-06, "loss": 0.608270263671875, "step": 89460 }, { "epoch": 0.7735774009736189, "grad_norm": 34.29401372540524, "learning_rate": 4.0446380580799125e-06, "loss": 0.224029541015625, "step": 89465 }, { "epoch": 0.7736206344951622, "grad_norm": 8.7575377470859, "learning_rate": 4.044447073027795e-06, "loss": 0.20949478149414064, "step": 89470 }, { "epoch": 0.7736638680167054, "grad_norm": 4.904438915745895, "learning_rate": 4.044256083158781e-06, "loss": 0.05249481201171875, "step": 89475 }, { "epoch": 0.7737071015382487, "grad_norm": 12.042833304103944, "learning_rate": 4.044065088473748e-06, "loss": 0.16230621337890624, "step": 89480 }, { "epoch": 0.773750335059792, "grad_norm": 16.255008796972522, "learning_rate": 4.043874088973581e-06, "loss": 0.08937263488769531, "step": 89485 }, { "epoch": 0.7737935685813352, "grad_norm": 61.947658081953854, "learning_rate": 4.0436830846591585e-06, "loss": 0.42072525024414065, "step": 89490 }, { "epoch": 0.7738368021028785, "grad_norm": 0.3526761329389765, "learning_rate": 4.043492075531361e-06, "loss": 0.1810699462890625, "step": 89495 }, { "epoch": 0.7738800356244218, "grad_norm": 15.904479057744673, "learning_rate": 4.043301061591071e-06, "loss": 0.4623558044433594, "step": 89500 }, { "epoch": 0.773923269145965, "grad_norm": 13.124323200729336, "learning_rate": 4.043110042839168e-06, "loss": 0.03603630065917969, "step": 89505 }, { "epoch": 0.7739665026675083, "grad_norm": 8.703048764200126, "learning_rate": 4.042919019276535e-06, "loss": 0.11440696716308593, "step": 89510 }, { "epoch": 0.7740097361890516, "grad_norm": 19.111821401092367, "learning_rate": 4.04272799090405e-06, "loss": 0.13040847778320314, "step": 89515 }, { "epoch": 0.7740529697105948, "grad_norm": 29.004470672975717, "learning_rate": 4.042536957722597e-06, "loss": 0.2509735107421875, "step": 89520 }, { "epoch": 0.7740962032321381, "grad_norm": 7.553139049680001, "learning_rate": 4.042345919733055e-06, "loss": 0.08824615478515625, "step": 89525 }, { "epoch": 0.7741394367536814, "grad_norm": 2.4437557164924755, "learning_rate": 4.042154876936305e-06, "loss": 0.12661209106445312, "step": 89530 }, { "epoch": 0.7741826702752246, "grad_norm": 4.276364237365816, "learning_rate": 4.04196382933323e-06, "loss": 0.03892822265625, "step": 89535 }, { "epoch": 0.7742259037967678, "grad_norm": 5.4918430806111145, "learning_rate": 4.04177277692471e-06, "loss": 0.1720458984375, "step": 89540 }, { "epoch": 0.7742691373183112, "grad_norm": 4.835717992591145, "learning_rate": 4.041581719711625e-06, "loss": 0.06235809326171875, "step": 89545 }, { "epoch": 0.7743123708398544, "grad_norm": 4.0282338698285125, "learning_rate": 4.041390657694858e-06, "loss": 0.1719646453857422, "step": 89550 }, { "epoch": 0.7743556043613976, "grad_norm": 96.42024325950992, "learning_rate": 4.041199590875288e-06, "loss": 0.43415679931640627, "step": 89555 }, { "epoch": 0.774398837882941, "grad_norm": 0.10232055651861642, "learning_rate": 4.041008519253798e-06, "loss": 0.19375152587890626, "step": 89560 }, { "epoch": 0.7744420714044842, "grad_norm": 4.851153515654956, "learning_rate": 4.04081744283127e-06, "loss": 0.09882049560546875, "step": 89565 }, { "epoch": 0.7744853049260274, "grad_norm": 0.16291090682381085, "learning_rate": 4.040626361608583e-06, "loss": 0.17324295043945312, "step": 89570 }, { "epoch": 0.7745285384475707, "grad_norm": 0.4907796137647234, "learning_rate": 4.040435275586619e-06, "loss": 0.1894287109375, "step": 89575 }, { "epoch": 0.774571771969114, "grad_norm": 9.289085696703063, "learning_rate": 4.04024418476626e-06, "loss": 0.049176025390625, "step": 89580 }, { "epoch": 0.7746150054906572, "grad_norm": 2.0800674268757633, "learning_rate": 4.040053089148386e-06, "loss": 0.056539154052734374, "step": 89585 }, { "epoch": 0.7746582390122005, "grad_norm": 13.750677540375806, "learning_rate": 4.039861988733879e-06, "loss": 0.18097763061523436, "step": 89590 }, { "epoch": 0.7747014725337438, "grad_norm": 0.9649233787048592, "learning_rate": 4.039670883523621e-06, "loss": 0.034931182861328125, "step": 89595 }, { "epoch": 0.774744706055287, "grad_norm": 0.9198999505236554, "learning_rate": 4.039479773518493e-06, "loss": 0.04921417236328125, "step": 89600 }, { "epoch": 0.7747879395768303, "grad_norm": 6.97544149934753, "learning_rate": 4.0392886587193755e-06, "loss": 0.07959709167480469, "step": 89605 }, { "epoch": 0.7748311730983736, "grad_norm": 2.305595130850205, "learning_rate": 4.039097539127151e-06, "loss": 0.1913055419921875, "step": 89610 }, { "epoch": 0.7748744066199168, "grad_norm": 2.3473777199725427, "learning_rate": 4.0389064147427e-06, "loss": 0.19421844482421874, "step": 89615 }, { "epoch": 0.77491764014146, "grad_norm": 2.9914299152138124, "learning_rate": 4.038715285566905e-06, "loss": 0.170550537109375, "step": 89620 }, { "epoch": 0.7749608736630034, "grad_norm": 2.1873509907854904, "learning_rate": 4.038524151600647e-06, "loss": 0.0960723876953125, "step": 89625 }, { "epoch": 0.7750041071845466, "grad_norm": 2.312638484389046, "learning_rate": 4.038333012844806e-06, "loss": 0.10865478515625, "step": 89630 }, { "epoch": 0.7750473407060898, "grad_norm": 6.635951405318223, "learning_rate": 4.038141869300267e-06, "loss": 0.08370513916015625, "step": 89635 }, { "epoch": 0.7750905742276332, "grad_norm": 1.9313294348320111, "learning_rate": 4.037950720967907e-06, "loss": 0.04669189453125, "step": 89640 }, { "epoch": 0.7751338077491764, "grad_norm": 18.235526194562073, "learning_rate": 4.037759567848611e-06, "loss": 0.122064208984375, "step": 89645 }, { "epoch": 0.7751770412707196, "grad_norm": 3.299857530724204, "learning_rate": 4.03756840994326e-06, "loss": 0.077972412109375, "step": 89650 }, { "epoch": 0.775220274792263, "grad_norm": 11.704269740523763, "learning_rate": 4.037377247252735e-06, "loss": 0.1395599365234375, "step": 89655 }, { "epoch": 0.7752635083138062, "grad_norm": 7.57796360347271, "learning_rate": 4.037186079777918e-06, "loss": 0.21177520751953124, "step": 89660 }, { "epoch": 0.7753067418353494, "grad_norm": 13.52189694703643, "learning_rate": 4.03699490751969e-06, "loss": 0.049313926696777345, "step": 89665 }, { "epoch": 0.7753499753568928, "grad_norm": 9.82033379367376, "learning_rate": 4.036803730478933e-06, "loss": 0.07964324951171875, "step": 89670 }, { "epoch": 0.775393208878436, "grad_norm": 10.085374055127339, "learning_rate": 4.036612548656529e-06, "loss": 0.227655029296875, "step": 89675 }, { "epoch": 0.7754364423999792, "grad_norm": 32.267359455210084, "learning_rate": 4.03642136205336e-06, "loss": 0.1788543701171875, "step": 89680 }, { "epoch": 0.7754796759215226, "grad_norm": 0.07945590093689187, "learning_rate": 4.036230170670307e-06, "loss": 0.07098312377929687, "step": 89685 }, { "epoch": 0.7755229094430658, "grad_norm": 12.32444982143851, "learning_rate": 4.036038974508252e-06, "loss": 0.22163467407226561, "step": 89690 }, { "epoch": 0.775566142964609, "grad_norm": 6.285243569618019, "learning_rate": 4.035847773568078e-06, "loss": 0.110919189453125, "step": 89695 }, { "epoch": 0.7756093764861524, "grad_norm": 2.941902027410412, "learning_rate": 4.035656567850663e-06, "loss": 0.39492340087890626, "step": 89700 }, { "epoch": 0.7756526100076956, "grad_norm": 4.197026233368696, "learning_rate": 4.035465357356892e-06, "loss": 0.39849853515625, "step": 89705 }, { "epoch": 0.7756958435292388, "grad_norm": 13.64384085742075, "learning_rate": 4.035274142087648e-06, "loss": 0.14866294860839843, "step": 89710 }, { "epoch": 0.775739077050782, "grad_norm": 2.0580097193644296, "learning_rate": 4.0350829220438105e-06, "loss": 0.0340576171875, "step": 89715 }, { "epoch": 0.7757823105723254, "grad_norm": 2.7422936416280534, "learning_rate": 4.034891697226261e-06, "loss": 0.03804168701171875, "step": 89720 }, { "epoch": 0.7758255440938686, "grad_norm": 4.362491069774828, "learning_rate": 4.0347004676358835e-06, "loss": 0.068426513671875, "step": 89725 }, { "epoch": 0.7758687776154118, "grad_norm": 7.1040259299902955, "learning_rate": 4.034509233273559e-06, "loss": 0.09713668823242187, "step": 89730 }, { "epoch": 0.7759120111369552, "grad_norm": 3.2997981862389336, "learning_rate": 4.034317994140167e-06, "loss": 0.11321563720703125, "step": 89735 }, { "epoch": 0.7759552446584984, "grad_norm": 28.058777246111557, "learning_rate": 4.034126750236595e-06, "loss": 0.19422454833984376, "step": 89740 }, { "epoch": 0.7759984781800416, "grad_norm": 0.3053930527329936, "learning_rate": 4.03393550156372e-06, "loss": 0.07965545654296875, "step": 89745 }, { "epoch": 0.776041711701585, "grad_norm": 2.8735712733948144, "learning_rate": 4.033744248122426e-06, "loss": 0.12122917175292969, "step": 89750 }, { "epoch": 0.7760849452231282, "grad_norm": 0.02257175252122341, "learning_rate": 4.033552989913596e-06, "loss": 0.06265754699707031, "step": 89755 }, { "epoch": 0.7761281787446714, "grad_norm": 3.5423127239405807, "learning_rate": 4.033361726938109e-06, "loss": 0.0860198974609375, "step": 89760 }, { "epoch": 0.7761714122662148, "grad_norm": 6.692538819608655, "learning_rate": 4.033170459196851e-06, "loss": 0.1729351043701172, "step": 89765 }, { "epoch": 0.776214645787758, "grad_norm": 11.382077701264517, "learning_rate": 4.032979186690701e-06, "loss": 0.34414825439453123, "step": 89770 }, { "epoch": 0.7762578793093012, "grad_norm": 3.5557628684244884, "learning_rate": 4.032787909420542e-06, "loss": 0.05856170654296875, "step": 89775 }, { "epoch": 0.7763011128308446, "grad_norm": 0.2405116188152741, "learning_rate": 4.032596627387257e-06, "loss": 0.21330490112304687, "step": 89780 }, { "epoch": 0.7763443463523878, "grad_norm": 7.284200102751113, "learning_rate": 4.032405340591727e-06, "loss": 0.24761962890625, "step": 89785 }, { "epoch": 0.776387579873931, "grad_norm": 1.3671109185909336, "learning_rate": 4.032214049034836e-06, "loss": 0.16925430297851562, "step": 89790 }, { "epoch": 0.7764308133954743, "grad_norm": 0.4023500103900879, "learning_rate": 4.032022752717464e-06, "loss": 0.18482666015625, "step": 89795 }, { "epoch": 0.7764740469170176, "grad_norm": 21.973328519102697, "learning_rate": 4.031831451640494e-06, "loss": 0.15189208984375, "step": 89800 }, { "epoch": 0.7765172804385608, "grad_norm": 2.791547976772107, "learning_rate": 4.03164014580481e-06, "loss": 0.16727294921875, "step": 89805 }, { "epoch": 0.7765605139601041, "grad_norm": 25.46131504215371, "learning_rate": 4.031448835211292e-06, "loss": 0.41467819213867185, "step": 89810 }, { "epoch": 0.7766037474816474, "grad_norm": 7.088199447851076, "learning_rate": 4.031257519860822e-06, "loss": 0.13566131591796876, "step": 89815 }, { "epoch": 0.7766469810031906, "grad_norm": 20.840027042493478, "learning_rate": 4.031066199754285e-06, "loss": 0.0689584732055664, "step": 89820 }, { "epoch": 0.7766902145247339, "grad_norm": 1.1632814183746572, "learning_rate": 4.030874874892561e-06, "loss": 0.169329833984375, "step": 89825 }, { "epoch": 0.7767334480462772, "grad_norm": 13.199341169274588, "learning_rate": 4.0306835452765335e-06, "loss": 0.196685791015625, "step": 89830 }, { "epoch": 0.7767766815678204, "grad_norm": 0.22671731893051597, "learning_rate": 4.030492210907084e-06, "loss": 0.12790946960449218, "step": 89835 }, { "epoch": 0.7768199150893637, "grad_norm": 4.998529725916609, "learning_rate": 4.030300871785097e-06, "loss": 0.06474609375, "step": 89840 }, { "epoch": 0.776863148610907, "grad_norm": 4.474254990784737, "learning_rate": 4.030109527911451e-06, "loss": 0.0341064453125, "step": 89845 }, { "epoch": 0.7769063821324502, "grad_norm": 26.806842279468892, "learning_rate": 4.029918179287033e-06, "loss": 0.21387939453125, "step": 89850 }, { "epoch": 0.7769496156539935, "grad_norm": 7.76278941017395, "learning_rate": 4.029726825912723e-06, "loss": 0.44197540283203124, "step": 89855 }, { "epoch": 0.7769928491755368, "grad_norm": 11.157973196753714, "learning_rate": 4.029535467789403e-06, "loss": 0.0550537109375, "step": 89860 }, { "epoch": 0.77703608269708, "grad_norm": 1.7006342499092182, "learning_rate": 4.029344104917957e-06, "loss": 0.32506504058837893, "step": 89865 }, { "epoch": 0.7770793162186233, "grad_norm": 4.247491638748256, "learning_rate": 4.029152737299267e-06, "loss": 0.14842643737792968, "step": 89870 }, { "epoch": 0.7771225497401666, "grad_norm": 3.656990124004697, "learning_rate": 4.0289613649342146e-06, "loss": 0.046844482421875, "step": 89875 }, { "epoch": 0.7771657832617098, "grad_norm": 21.876841843604367, "learning_rate": 4.028769987823684e-06, "loss": 0.1397735595703125, "step": 89880 }, { "epoch": 0.777209016783253, "grad_norm": 13.77299010892285, "learning_rate": 4.028578605968558e-06, "loss": 0.08543472290039063, "step": 89885 }, { "epoch": 0.7772522503047963, "grad_norm": 2.093395650934871, "learning_rate": 4.028387219369717e-06, "loss": 0.15078125, "step": 89890 }, { "epoch": 0.7772954838263396, "grad_norm": 24.91867538243898, "learning_rate": 4.028195828028046e-06, "loss": 0.10738677978515625, "step": 89895 }, { "epoch": 0.7773387173478828, "grad_norm": 0.7185243699765683, "learning_rate": 4.0280044319444255e-06, "loss": 0.1058441162109375, "step": 89900 }, { "epoch": 0.7773819508694261, "grad_norm": 4.73836026428108, "learning_rate": 4.027813031119741e-06, "loss": 0.07109527587890625, "step": 89905 }, { "epoch": 0.7774251843909694, "grad_norm": 7.727510712701479, "learning_rate": 4.027621625554872e-06, "loss": 0.13565216064453126, "step": 89910 }, { "epoch": 0.7774684179125126, "grad_norm": 7.157646891828516, "learning_rate": 4.027430215250704e-06, "loss": 0.15050201416015624, "step": 89915 }, { "epoch": 0.7775116514340559, "grad_norm": 0.706939378555173, "learning_rate": 4.027238800208119e-06, "loss": 0.29024581909179686, "step": 89920 }, { "epoch": 0.7775548849555992, "grad_norm": 4.4462797979109245, "learning_rate": 4.027047380427997e-06, "loss": 0.060396575927734376, "step": 89925 }, { "epoch": 0.7775981184771424, "grad_norm": 14.369123911315382, "learning_rate": 4.0268559559112244e-06, "loss": 0.30904083251953124, "step": 89930 }, { "epoch": 0.7776413519986857, "grad_norm": 42.17829879165709, "learning_rate": 4.026664526658684e-06, "loss": 0.22992095947265626, "step": 89935 }, { "epoch": 0.777684585520229, "grad_norm": 5.194226389897368, "learning_rate": 4.026473092671257e-06, "loss": 0.1900604248046875, "step": 89940 }, { "epoch": 0.7777278190417722, "grad_norm": 15.806033458781421, "learning_rate": 4.0262816539498265e-06, "loss": 0.1393503189086914, "step": 89945 }, { "epoch": 0.7777710525633155, "grad_norm": 3.9833056613033273, "learning_rate": 4.026090210495276e-06, "loss": 0.025587749481201173, "step": 89950 }, { "epoch": 0.7778142860848588, "grad_norm": 1.7819548307853634, "learning_rate": 4.025898762308488e-06, "loss": 0.29073944091796877, "step": 89955 }, { "epoch": 0.777857519606402, "grad_norm": 5.597811232444296, "learning_rate": 4.025707309390345e-06, "loss": 0.3187591552734375, "step": 89960 }, { "epoch": 0.7779007531279453, "grad_norm": 0.30766945170501164, "learning_rate": 4.025515851741731e-06, "loss": 0.20105972290039062, "step": 89965 }, { "epoch": 0.7779439866494885, "grad_norm": 6.862021510349754, "learning_rate": 4.025324389363529e-06, "loss": 0.08834381103515625, "step": 89970 }, { "epoch": 0.7779872201710318, "grad_norm": 17.15412021451746, "learning_rate": 4.025132922256621e-06, "loss": 0.062299346923828124, "step": 89975 }, { "epoch": 0.7780304536925751, "grad_norm": 9.976677265222449, "learning_rate": 4.024941450421891e-06, "loss": 0.15247230529785155, "step": 89980 }, { "epoch": 0.7780736872141183, "grad_norm": 3.083746309475198, "learning_rate": 4.024749973860221e-06, "loss": 0.683502197265625, "step": 89985 }, { "epoch": 0.7781169207356616, "grad_norm": 92.18765492258115, "learning_rate": 4.0245584925724945e-06, "loss": 0.32356605529785154, "step": 89990 }, { "epoch": 0.7781601542572049, "grad_norm": 3.3323777732896214, "learning_rate": 4.024367006559596e-06, "loss": 0.35447998046875, "step": 89995 }, { "epoch": 0.7782033877787481, "grad_norm": 6.791090799470336, "learning_rate": 4.024175515822406e-06, "loss": 0.0665863037109375, "step": 90000 }, { "epoch": 0.7782466213002914, "grad_norm": 18.214309443078943, "learning_rate": 4.023984020361809e-06, "loss": 0.1319305419921875, "step": 90005 }, { "epoch": 0.7782898548218347, "grad_norm": 2.7549205634906877, "learning_rate": 4.023792520178689e-06, "loss": 0.0948883056640625, "step": 90010 }, { "epoch": 0.7783330883433779, "grad_norm": 2.295870137452126, "learning_rate": 4.023601015273928e-06, "loss": 0.29939117431640627, "step": 90015 }, { "epoch": 0.7783763218649212, "grad_norm": 34.35031085057971, "learning_rate": 4.02340950564841e-06, "loss": 0.18319549560546874, "step": 90020 }, { "epoch": 0.7784195553864645, "grad_norm": 0.5121101092036668, "learning_rate": 4.023217991303017e-06, "loss": 0.04513473510742187, "step": 90025 }, { "epoch": 0.7784627889080077, "grad_norm": 8.335028962266316, "learning_rate": 4.0230264722386335e-06, "loss": 0.48308792114257815, "step": 90030 }, { "epoch": 0.778506022429551, "grad_norm": 3.5489281464741773, "learning_rate": 4.0228349484561425e-06, "loss": 0.111114501953125, "step": 90035 }, { "epoch": 0.7785492559510943, "grad_norm": 21.151777401214336, "learning_rate": 4.022643419956427e-06, "loss": 0.18666038513183594, "step": 90040 }, { "epoch": 0.7785924894726375, "grad_norm": 19.215406908059055, "learning_rate": 4.0224518867403705e-06, "loss": 0.22699451446533203, "step": 90045 }, { "epoch": 0.7786357229941808, "grad_norm": 1.088137371267477, "learning_rate": 4.022260348808855e-06, "loss": 0.16427459716796874, "step": 90050 }, { "epoch": 0.778678956515724, "grad_norm": 13.073893916513454, "learning_rate": 4.022068806162767e-06, "loss": 0.07429275512695313, "step": 90055 }, { "epoch": 0.7787221900372673, "grad_norm": 0.17692081722454125, "learning_rate": 4.021877258802987e-06, "loss": 0.054144668579101565, "step": 90060 }, { "epoch": 0.7787654235588105, "grad_norm": 34.724573146505705, "learning_rate": 4.021685706730399e-06, "loss": 0.1622823715209961, "step": 90065 }, { "epoch": 0.7788086570803538, "grad_norm": 7.204481943179964, "learning_rate": 4.021494149945887e-06, "loss": 0.11497650146484376, "step": 90070 }, { "epoch": 0.7788518906018971, "grad_norm": 6.096701042576602, "learning_rate": 4.021302588450335e-06, "loss": 0.12570343017578126, "step": 90075 }, { "epoch": 0.7788951241234403, "grad_norm": 5.463103008363254, "learning_rate": 4.0211110222446255e-06, "loss": 0.21851348876953125, "step": 90080 }, { "epoch": 0.7789383576449836, "grad_norm": 13.319701051108597, "learning_rate": 4.020919451329642e-06, "loss": 0.16288166046142577, "step": 90085 }, { "epoch": 0.7789815911665269, "grad_norm": 1.0259252884253114, "learning_rate": 4.020727875706268e-06, "loss": 0.06859130859375, "step": 90090 }, { "epoch": 0.7790248246880701, "grad_norm": 2.8457736804887794, "learning_rate": 4.020536295375387e-06, "loss": 0.0759796142578125, "step": 90095 }, { "epoch": 0.7790680582096134, "grad_norm": 0.3312269114259954, "learning_rate": 4.020344710337883e-06, "loss": 0.13067855834960937, "step": 90100 }, { "epoch": 0.7791112917311567, "grad_norm": 9.273648482690277, "learning_rate": 4.020153120594639e-06, "loss": 0.12382659912109376, "step": 90105 }, { "epoch": 0.7791545252526999, "grad_norm": 0.11765493974312419, "learning_rate": 4.019961526146541e-06, "loss": 0.13220500946044922, "step": 90110 }, { "epoch": 0.7791977587742432, "grad_norm": 0.133338982270571, "learning_rate": 4.019769926994468e-06, "loss": 0.04839324951171875, "step": 90115 }, { "epoch": 0.7792409922957865, "grad_norm": 0.35093347364566724, "learning_rate": 4.019578323139307e-06, "loss": 0.46089839935302734, "step": 90120 }, { "epoch": 0.7792842258173297, "grad_norm": 12.97088089583464, "learning_rate": 4.019386714581941e-06, "loss": 0.4031988143920898, "step": 90125 }, { "epoch": 0.779327459338873, "grad_norm": 2.4083374592925173, "learning_rate": 4.0191951013232535e-06, "loss": 0.022934913635253906, "step": 90130 }, { "epoch": 0.7793706928604163, "grad_norm": 1.604668291303044, "learning_rate": 4.019003483364129e-06, "loss": 0.17473983764648438, "step": 90135 }, { "epoch": 0.7794139263819595, "grad_norm": 14.650173001921031, "learning_rate": 4.0188118607054495e-06, "loss": 0.4770782470703125, "step": 90140 }, { "epoch": 0.7794571599035027, "grad_norm": 0.45268229977296004, "learning_rate": 4.018620233348101e-06, "loss": 0.17351760864257812, "step": 90145 }, { "epoch": 0.7795003934250461, "grad_norm": 11.196676479589724, "learning_rate": 4.018428601292964e-06, "loss": 0.32128753662109377, "step": 90150 }, { "epoch": 0.7795436269465893, "grad_norm": 1.7135352679331046, "learning_rate": 4.018236964540925e-06, "loss": 0.08582344055175781, "step": 90155 }, { "epoch": 0.7795868604681325, "grad_norm": 0.7654436879181044, "learning_rate": 4.018045323092868e-06, "loss": 0.09039039611816406, "step": 90160 }, { "epoch": 0.7796300939896759, "grad_norm": 1.2406265603914022, "learning_rate": 4.017853676949675e-06, "loss": 0.128631591796875, "step": 90165 }, { "epoch": 0.7796733275112191, "grad_norm": 16.769806868927457, "learning_rate": 4.017662026112232e-06, "loss": 0.34011306762695315, "step": 90170 }, { "epoch": 0.7797165610327623, "grad_norm": 18.377624733325064, "learning_rate": 4.01747037058142e-06, "loss": 0.3064472198486328, "step": 90175 }, { "epoch": 0.7797597945543057, "grad_norm": 16.148536750777613, "learning_rate": 4.017278710358125e-06, "loss": 0.049483108520507815, "step": 90180 }, { "epoch": 0.7798030280758489, "grad_norm": 0.68161383780767, "learning_rate": 4.0170870454432315e-06, "loss": 0.05170440673828125, "step": 90185 }, { "epoch": 0.7798462615973921, "grad_norm": 6.232425003524604, "learning_rate": 4.016895375837622e-06, "loss": 0.016605377197265625, "step": 90190 }, { "epoch": 0.7798894951189355, "grad_norm": 2.0570683265541065, "learning_rate": 4.0167037015421805e-06, "loss": 0.0232177734375, "step": 90195 }, { "epoch": 0.7799327286404787, "grad_norm": 29.40483430724805, "learning_rate": 4.016512022557792e-06, "loss": 0.47507190704345703, "step": 90200 }, { "epoch": 0.7799759621620219, "grad_norm": 1.5847390205094523, "learning_rate": 4.016320338885338e-06, "loss": 0.08984527587890626, "step": 90205 }, { "epoch": 0.7800191956835653, "grad_norm": 11.681918752061142, "learning_rate": 4.016128650525706e-06, "loss": 0.09548187255859375, "step": 90210 }, { "epoch": 0.7800624292051085, "grad_norm": 0.2924143487716702, "learning_rate": 4.015936957479779e-06, "loss": 0.10879478454589844, "step": 90215 }, { "epoch": 0.7801056627266517, "grad_norm": 0.13975346169440767, "learning_rate": 4.0157452597484395e-06, "loss": 0.1961080551147461, "step": 90220 }, { "epoch": 0.780148896248195, "grad_norm": 2.326218073783739, "learning_rate": 4.015553557332573e-06, "loss": 0.27303409576416016, "step": 90225 }, { "epoch": 0.7801921297697383, "grad_norm": 0.08765032793757695, "learning_rate": 4.015361850233063e-06, "loss": 0.13812179565429689, "step": 90230 }, { "epoch": 0.7802353632912815, "grad_norm": 47.33580823357272, "learning_rate": 4.0151701384507935e-06, "loss": 0.3012275695800781, "step": 90235 }, { "epoch": 0.7802785968128247, "grad_norm": 24.328456901708826, "learning_rate": 4.0149784219866495e-06, "loss": 0.1028106689453125, "step": 90240 }, { "epoch": 0.7803218303343681, "grad_norm": 2.505713561396147, "learning_rate": 4.014786700841514e-06, "loss": 0.0820953369140625, "step": 90245 }, { "epoch": 0.7803650638559113, "grad_norm": 10.297071611181204, "learning_rate": 4.014594975016273e-06, "loss": 0.06337814331054688, "step": 90250 }, { "epoch": 0.7804082973774545, "grad_norm": 19.364774249412243, "learning_rate": 4.01440324451181e-06, "loss": 0.29486846923828125, "step": 90255 }, { "epoch": 0.7804515308989979, "grad_norm": 12.492618434770613, "learning_rate": 4.014211509329008e-06, "loss": 0.07815093994140625, "step": 90260 }, { "epoch": 0.7804947644205411, "grad_norm": 6.817816026852265, "learning_rate": 4.0140197694687515e-06, "loss": 0.08948688507080078, "step": 90265 }, { "epoch": 0.7805379979420843, "grad_norm": 1.4847910081700482, "learning_rate": 4.013828024931926e-06, "loss": 0.19560394287109376, "step": 90270 }, { "epoch": 0.7805812314636277, "grad_norm": 27.788017805715906, "learning_rate": 4.013636275719415e-06, "loss": 0.22892494201660157, "step": 90275 }, { "epoch": 0.7806244649851709, "grad_norm": 3.907529772629744, "learning_rate": 4.0134445218321044e-06, "loss": 0.0693511962890625, "step": 90280 }, { "epoch": 0.7806676985067141, "grad_norm": 20.255159441188532, "learning_rate": 4.013252763270876e-06, "loss": 0.11596641540527344, "step": 90285 }, { "epoch": 0.7807109320282575, "grad_norm": 6.092160950232582, "learning_rate": 4.013061000036615e-06, "loss": 0.04783935546875, "step": 90290 }, { "epoch": 0.7807541655498007, "grad_norm": 12.287937222929434, "learning_rate": 4.012869232130207e-06, "loss": 0.10988407135009766, "step": 90295 }, { "epoch": 0.7807973990713439, "grad_norm": 0.5743819339359952, "learning_rate": 4.012677459552536e-06, "loss": 0.32213478088378905, "step": 90300 }, { "epoch": 0.7808406325928873, "grad_norm": 1.5072399598438924, "learning_rate": 4.012485682304484e-06, "loss": 0.15868988037109374, "step": 90305 }, { "epoch": 0.7808838661144305, "grad_norm": 7.053062232065506, "learning_rate": 4.01229390038694e-06, "loss": 0.223828125, "step": 90310 }, { "epoch": 0.7809270996359737, "grad_norm": 21.87224859407309, "learning_rate": 4.0121021138007846e-06, "loss": 0.409710693359375, "step": 90315 }, { "epoch": 0.780970333157517, "grad_norm": 70.51956012646667, "learning_rate": 4.011910322546904e-06, "loss": 0.4106964111328125, "step": 90320 }, { "epoch": 0.7810135666790603, "grad_norm": 16.350932956870473, "learning_rate": 4.011718526626182e-06, "loss": 0.10355682373046875, "step": 90325 }, { "epoch": 0.7810568002006035, "grad_norm": 2.083282150274714, "learning_rate": 4.011526726039505e-06, "loss": 0.10692825317382812, "step": 90330 }, { "epoch": 0.7811000337221468, "grad_norm": 2.2567912048373673, "learning_rate": 4.011334920787755e-06, "loss": 0.111090087890625, "step": 90335 }, { "epoch": 0.7811432672436901, "grad_norm": 4.913758596859692, "learning_rate": 4.0111431108718175e-06, "loss": 0.05275382995605469, "step": 90340 }, { "epoch": 0.7811865007652333, "grad_norm": 0.965191434353829, "learning_rate": 4.010951296292578e-06, "loss": 0.09029388427734375, "step": 90345 }, { "epoch": 0.7812297342867766, "grad_norm": 40.25189110108268, "learning_rate": 4.0107594770509194e-06, "loss": 0.29041366577148436, "step": 90350 }, { "epoch": 0.7812729678083199, "grad_norm": 13.837943012123713, "learning_rate": 4.010567653147728e-06, "loss": 0.12945556640625, "step": 90355 }, { "epoch": 0.7813162013298631, "grad_norm": 1.0516026909944007, "learning_rate": 4.010375824583889e-06, "loss": 0.12261123657226562, "step": 90360 }, { "epoch": 0.7813594348514064, "grad_norm": 1.06638852061575, "learning_rate": 4.010183991360284e-06, "loss": 0.1159027099609375, "step": 90365 }, { "epoch": 0.7814026683729497, "grad_norm": 7.212933339160301, "learning_rate": 4.009992153477801e-06, "loss": 0.18177642822265624, "step": 90370 }, { "epoch": 0.7814459018944929, "grad_norm": 0.1493173629362914, "learning_rate": 4.009800310937323e-06, "loss": 0.09041519165039062, "step": 90375 }, { "epoch": 0.7814891354160362, "grad_norm": 1.1546775169461316, "learning_rate": 4.0096084637397355e-06, "loss": 0.05145339965820313, "step": 90380 }, { "epoch": 0.7815323689375795, "grad_norm": 26.19554997661166, "learning_rate": 4.009416611885921e-06, "loss": 0.1072967529296875, "step": 90385 }, { "epoch": 0.7815756024591227, "grad_norm": 15.254936539727511, "learning_rate": 4.009224755376769e-06, "loss": 0.14724884033203126, "step": 90390 }, { "epoch": 0.781618835980666, "grad_norm": 3.859444768217611, "learning_rate": 4.009032894213161e-06, "loss": 0.08732528686523437, "step": 90395 }, { "epoch": 0.7816620695022092, "grad_norm": 3.7135879314216242, "learning_rate": 4.008841028395983e-06, "loss": 0.06053466796875, "step": 90400 }, { "epoch": 0.7817053030237525, "grad_norm": 11.290083326352125, "learning_rate": 4.0086491579261166e-06, "loss": 0.2322052001953125, "step": 90405 }, { "epoch": 0.7817485365452957, "grad_norm": 45.43840868867651, "learning_rate": 4.008457282804451e-06, "loss": 0.16092681884765625, "step": 90410 }, { "epoch": 0.781791770066839, "grad_norm": 15.92862307257568, "learning_rate": 4.008265403031871e-06, "loss": 0.11818695068359375, "step": 90415 }, { "epoch": 0.7818350035883823, "grad_norm": 3.8049661071536796, "learning_rate": 4.008073518609258e-06, "loss": 0.032706069946289065, "step": 90420 }, { "epoch": 0.7818782371099255, "grad_norm": 0.08070086494302207, "learning_rate": 4.0078816295374995e-06, "loss": 0.09896697998046874, "step": 90425 }, { "epoch": 0.7819214706314688, "grad_norm": 8.39448170247019, "learning_rate": 4.00768973581748e-06, "loss": 0.07362213134765624, "step": 90430 }, { "epoch": 0.7819647041530121, "grad_norm": 10.789224348701088, "learning_rate": 4.007497837450085e-06, "loss": 0.05089111328125, "step": 90435 }, { "epoch": 0.7820079376745553, "grad_norm": 1.207256989830795, "learning_rate": 4.0073059344361985e-06, "loss": 0.16277999877929689, "step": 90440 }, { "epoch": 0.7820511711960986, "grad_norm": 32.924562399923275, "learning_rate": 4.007114026776707e-06, "loss": 0.17047042846679689, "step": 90445 }, { "epoch": 0.7820944047176419, "grad_norm": 3.5385070170087083, "learning_rate": 4.006922114472494e-06, "loss": 0.06864242553710938, "step": 90450 }, { "epoch": 0.7821376382391851, "grad_norm": 1.5368426425112691, "learning_rate": 4.006730197524445e-06, "loss": 0.05593185424804688, "step": 90455 }, { "epoch": 0.7821808717607284, "grad_norm": 2.449877329360508, "learning_rate": 4.006538275933444e-06, "loss": 0.0353424072265625, "step": 90460 }, { "epoch": 0.7822241052822717, "grad_norm": 6.553539815033921, "learning_rate": 4.00634634970038e-06, "loss": 0.16443862915039062, "step": 90465 }, { "epoch": 0.7822673388038149, "grad_norm": 20.035111171289, "learning_rate": 4.0061544188261336e-06, "loss": 0.08084602355957031, "step": 90470 }, { "epoch": 0.7823105723253582, "grad_norm": 26.38919890119286, "learning_rate": 4.005962483311594e-06, "loss": 0.17704925537109376, "step": 90475 }, { "epoch": 0.7823538058469015, "grad_norm": 0.6285107674774141, "learning_rate": 4.0057705431576415e-06, "loss": 0.1225830078125, "step": 90480 }, { "epoch": 0.7823970393684447, "grad_norm": 6.688996770196111, "learning_rate": 4.005578598365166e-06, "loss": 0.21350555419921874, "step": 90485 }, { "epoch": 0.782440272889988, "grad_norm": 45.115793121557935, "learning_rate": 4.00538664893505e-06, "loss": 0.3116889953613281, "step": 90490 }, { "epoch": 0.7824835064115312, "grad_norm": 1.574445946999853, "learning_rate": 4.005194694868181e-06, "loss": 0.2849029541015625, "step": 90495 }, { "epoch": 0.7825267399330745, "grad_norm": 0.2793539357071149, "learning_rate": 4.005002736165441e-06, "loss": 0.0600341796875, "step": 90500 }, { "epoch": 0.7825699734546178, "grad_norm": 2.589554164063927, "learning_rate": 4.004810772827719e-06, "loss": 0.06023101806640625, "step": 90505 }, { "epoch": 0.782613206976161, "grad_norm": 0.45298479838807826, "learning_rate": 4.004618804855898e-06, "loss": 0.027822113037109374, "step": 90510 }, { "epoch": 0.7826564404977043, "grad_norm": 11.108972143670908, "learning_rate": 4.0044268322508624e-06, "loss": 0.07160797119140624, "step": 90515 }, { "epoch": 0.7826996740192476, "grad_norm": 0.09882785990384974, "learning_rate": 4.0042348550135e-06, "loss": 0.11857681274414063, "step": 90520 }, { "epoch": 0.7827429075407908, "grad_norm": 32.01155925181465, "learning_rate": 4.004042873144695e-06, "loss": 0.4250762939453125, "step": 90525 }, { "epoch": 0.7827861410623341, "grad_norm": 3.007536323469683, "learning_rate": 4.003850886645334e-06, "loss": 0.23029327392578125, "step": 90530 }, { "epoch": 0.7828293745838774, "grad_norm": 5.538066555321034, "learning_rate": 4.0036588955163e-06, "loss": 0.024664306640625, "step": 90535 }, { "epoch": 0.7828726081054206, "grad_norm": 4.453638277756146, "learning_rate": 4.003466899758481e-06, "loss": 0.2783653259277344, "step": 90540 }, { "epoch": 0.7829158416269639, "grad_norm": 9.801615379084668, "learning_rate": 4.0032748993727605e-06, "loss": 0.13019752502441406, "step": 90545 }, { "epoch": 0.7829590751485072, "grad_norm": 5.462581120584908, "learning_rate": 4.003082894360024e-06, "loss": 0.1060150146484375, "step": 90550 }, { "epoch": 0.7830023086700504, "grad_norm": 0.9194815332422278, "learning_rate": 4.002890884721159e-06, "loss": 0.010027694702148437, "step": 90555 }, { "epoch": 0.7830455421915937, "grad_norm": 20.809328558551684, "learning_rate": 4.002698870457051e-06, "loss": 0.399591064453125, "step": 90560 }, { "epoch": 0.783088775713137, "grad_norm": 2.5379333643124684, "learning_rate": 4.0025068515685825e-06, "loss": 0.15049285888671876, "step": 90565 }, { "epoch": 0.7831320092346802, "grad_norm": 1.6340282217159818, "learning_rate": 4.0023148280566426e-06, "loss": 0.08217697143554688, "step": 90570 }, { "epoch": 0.7831752427562234, "grad_norm": 5.4174724824187, "learning_rate": 4.002122799922114e-06, "loss": 0.27319908142089844, "step": 90575 }, { "epoch": 0.7832184762777668, "grad_norm": 4.05178270747302, "learning_rate": 4.001930767165884e-06, "loss": 0.09560546875, "step": 90580 }, { "epoch": 0.78326170979931, "grad_norm": 1.2679486378844427, "learning_rate": 4.001738729788838e-06, "loss": 0.031322479248046875, "step": 90585 }, { "epoch": 0.7833049433208532, "grad_norm": 1.2726487725149414, "learning_rate": 4.001546687791862e-06, "loss": 0.5012344360351563, "step": 90590 }, { "epoch": 0.7833481768423965, "grad_norm": 2.2580456834348794, "learning_rate": 4.001354641175839e-06, "loss": 0.09144668579101563, "step": 90595 }, { "epoch": 0.7833914103639398, "grad_norm": 1.4959092228161637, "learning_rate": 4.00116258994166e-06, "loss": 0.09307441711425782, "step": 90600 }, { "epoch": 0.783434643885483, "grad_norm": 0.8190986623078921, "learning_rate": 4.000970534090205e-06, "loss": 0.27287979125976564, "step": 90605 }, { "epoch": 0.7834778774070263, "grad_norm": 2.1596730829431063, "learning_rate": 4.0007784736223626e-06, "loss": 0.049626922607421874, "step": 90610 }, { "epoch": 0.7835211109285696, "grad_norm": 16.96057737104628, "learning_rate": 4.00058640853902e-06, "loss": 0.5697128295898437, "step": 90615 }, { "epoch": 0.7835643444501128, "grad_norm": 4.699440591267996, "learning_rate": 4.00039433884106e-06, "loss": 0.3913902282714844, "step": 90620 }, { "epoch": 0.7836075779716561, "grad_norm": 26.88204890147355, "learning_rate": 4.00020226452937e-06, "loss": 0.20261363983154296, "step": 90625 }, { "epoch": 0.7836508114931994, "grad_norm": 6.245015907665309, "learning_rate": 4.000010185604835e-06, "loss": 0.2954986572265625, "step": 90630 }, { "epoch": 0.7836940450147426, "grad_norm": 28.688615995224804, "learning_rate": 3.999818102068342e-06, "loss": 0.1638580322265625, "step": 90635 }, { "epoch": 0.7837372785362859, "grad_norm": 36.126155271617804, "learning_rate": 3.9996260139207765e-06, "loss": 0.5602985382080078, "step": 90640 }, { "epoch": 0.7837805120578292, "grad_norm": 9.671662299592953, "learning_rate": 3.999433921163024e-06, "loss": 0.4033515930175781, "step": 90645 }, { "epoch": 0.7838237455793724, "grad_norm": 7.87087666346306, "learning_rate": 3.999241823795971e-06, "loss": 0.0513641357421875, "step": 90650 }, { "epoch": 0.7838669791009157, "grad_norm": 15.530631209621323, "learning_rate": 3.999049721820502e-06, "loss": 0.07274818420410156, "step": 90655 }, { "epoch": 0.783910212622459, "grad_norm": 2.0395737275294388, "learning_rate": 3.998857615237504e-06, "loss": 0.22672462463378906, "step": 90660 }, { "epoch": 0.7839534461440022, "grad_norm": 45.99705460185188, "learning_rate": 3.998665504047863e-06, "loss": 0.5990219116210938, "step": 90665 }, { "epoch": 0.7839966796655454, "grad_norm": 23.47768186724206, "learning_rate": 3.998473388252466e-06, "loss": 0.3524656295776367, "step": 90670 }, { "epoch": 0.7840399131870888, "grad_norm": 0.5335639053956388, "learning_rate": 3.998281267852197e-06, "loss": 0.26055755615234377, "step": 90675 }, { "epoch": 0.784083146708632, "grad_norm": 0.6556720736949628, "learning_rate": 3.998089142847942e-06, "loss": 0.64024658203125, "step": 90680 }, { "epoch": 0.7841263802301752, "grad_norm": 0.5227683501200041, "learning_rate": 3.99789701324059e-06, "loss": 0.09536361694335938, "step": 90685 }, { "epoch": 0.7841696137517186, "grad_norm": 3.2184920691652406, "learning_rate": 3.997704879031024e-06, "loss": 0.23834457397460937, "step": 90690 }, { "epoch": 0.7842128472732618, "grad_norm": 0.050780783981035076, "learning_rate": 3.997512740220132e-06, "loss": 0.045251083374023435, "step": 90695 }, { "epoch": 0.784256080794805, "grad_norm": 8.150133577571287, "learning_rate": 3.997320596808799e-06, "loss": 0.06240921020507813, "step": 90700 }, { "epoch": 0.7842993143163484, "grad_norm": 59.79861218043674, "learning_rate": 3.997128448797912e-06, "loss": 0.1651885986328125, "step": 90705 }, { "epoch": 0.7843425478378916, "grad_norm": 2.171272882211009, "learning_rate": 3.996936296188357e-06, "loss": 0.05201568603515625, "step": 90710 }, { "epoch": 0.7843857813594348, "grad_norm": 5.25031682543348, "learning_rate": 3.996744138981018e-06, "loss": 0.148529052734375, "step": 90715 }, { "epoch": 0.7844290148809782, "grad_norm": 15.836052151437904, "learning_rate": 3.996551977176784e-06, "loss": 0.1261322021484375, "step": 90720 }, { "epoch": 0.7844722484025214, "grad_norm": 153.94663348954637, "learning_rate": 3.996359810776541e-06, "loss": 0.16779632568359376, "step": 90725 }, { "epoch": 0.7845154819240646, "grad_norm": 0.5649021639548997, "learning_rate": 3.9961676397811744e-06, "loss": 0.2512969970703125, "step": 90730 }, { "epoch": 0.784558715445608, "grad_norm": 25.767366904346076, "learning_rate": 3.99597546419157e-06, "loss": 0.21050338745117186, "step": 90735 }, { "epoch": 0.7846019489671512, "grad_norm": 38.8794779881181, "learning_rate": 3.995783284008616e-06, "loss": 0.5002166748046875, "step": 90740 }, { "epoch": 0.7846451824886944, "grad_norm": 3.305913399285433, "learning_rate": 3.995591099233196e-06, "loss": 0.042439842224121095, "step": 90745 }, { "epoch": 0.7846884160102376, "grad_norm": 55.677654180337726, "learning_rate": 3.995398909866199e-06, "loss": 0.611529541015625, "step": 90750 }, { "epoch": 0.784731649531781, "grad_norm": 1.2472567047452658, "learning_rate": 3.99520671590851e-06, "loss": 0.1470874786376953, "step": 90755 }, { "epoch": 0.7847748830533242, "grad_norm": 1.0568924514381277, "learning_rate": 3.995014517361015e-06, "loss": 0.045467376708984375, "step": 90760 }, { "epoch": 0.7848181165748674, "grad_norm": 34.31718457394804, "learning_rate": 3.9948223142246015e-06, "loss": 0.30886077880859375, "step": 90765 }, { "epoch": 0.7848613500964108, "grad_norm": 5.805867508180952, "learning_rate": 3.994630106500156e-06, "loss": 0.33228759765625, "step": 90770 }, { "epoch": 0.784904583617954, "grad_norm": 9.294184022244371, "learning_rate": 3.994437894188562e-06, "loss": 0.11511802673339844, "step": 90775 }, { "epoch": 0.7849478171394972, "grad_norm": 22.576552089369383, "learning_rate": 3.99424567729071e-06, "loss": 0.2413818359375, "step": 90780 }, { "epoch": 0.7849910506610406, "grad_norm": 1.071565565584803, "learning_rate": 3.994053455807484e-06, "loss": 0.0942718505859375, "step": 90785 }, { "epoch": 0.7850342841825838, "grad_norm": 4.564435525110332, "learning_rate": 3.993861229739773e-06, "loss": 0.21370010375976561, "step": 90790 }, { "epoch": 0.785077517704127, "grad_norm": 1.0991401772657718, "learning_rate": 3.993668999088461e-06, "loss": 0.08862457275390626, "step": 90795 }, { "epoch": 0.7851207512256704, "grad_norm": 13.018373927345804, "learning_rate": 3.993476763854434e-06, "loss": 0.1986572265625, "step": 90800 }, { "epoch": 0.7851639847472136, "grad_norm": 19.312708031980147, "learning_rate": 3.993284524038581e-06, "loss": 0.20254440307617189, "step": 90805 }, { "epoch": 0.7852072182687568, "grad_norm": 5.570263613425525, "learning_rate": 3.993092279641788e-06, "loss": 0.1875223159790039, "step": 90810 }, { "epoch": 0.7852504517903002, "grad_norm": 0.8863609683684784, "learning_rate": 3.99290003066494e-06, "loss": 0.1059356689453125, "step": 90815 }, { "epoch": 0.7852936853118434, "grad_norm": 7.476213647691735, "learning_rate": 3.992707777108927e-06, "loss": 0.27857666015625, "step": 90820 }, { "epoch": 0.7853369188333866, "grad_norm": 8.588108090755325, "learning_rate": 3.992515518974632e-06, "loss": 0.26814727783203124, "step": 90825 }, { "epoch": 0.78538015235493, "grad_norm": 8.215113294319357, "learning_rate": 3.992323256262942e-06, "loss": 0.11776885986328126, "step": 90830 }, { "epoch": 0.7854233858764732, "grad_norm": 17.100362654025204, "learning_rate": 3.9921309889747465e-06, "loss": 0.09985198974609374, "step": 90835 }, { "epoch": 0.7854666193980164, "grad_norm": 4.911228453559641, "learning_rate": 3.99193871711093e-06, "loss": 0.035592842102050784, "step": 90840 }, { "epoch": 0.7855098529195597, "grad_norm": 17.949996741464453, "learning_rate": 3.991746440672381e-06, "loss": 0.4025543212890625, "step": 90845 }, { "epoch": 0.785553086441103, "grad_norm": 6.575891955637586, "learning_rate": 3.991554159659983e-06, "loss": 0.09403610229492188, "step": 90850 }, { "epoch": 0.7855963199626462, "grad_norm": 97.06017830443135, "learning_rate": 3.9913618740746244e-06, "loss": 0.2137542724609375, "step": 90855 }, { "epoch": 0.7856395534841895, "grad_norm": 17.423866078957087, "learning_rate": 3.991169583917195e-06, "loss": 0.22339954376220703, "step": 90860 }, { "epoch": 0.7856827870057328, "grad_norm": 7.503516665696581, "learning_rate": 3.990977289188577e-06, "loss": 0.04156494140625, "step": 90865 }, { "epoch": 0.785726020527276, "grad_norm": 47.25212327907897, "learning_rate": 3.99078498988966e-06, "loss": 0.195074462890625, "step": 90870 }, { "epoch": 0.7857692540488193, "grad_norm": 47.45053482054707, "learning_rate": 3.990592686021331e-06, "loss": 0.21599884033203126, "step": 90875 }, { "epoch": 0.7858124875703626, "grad_norm": 7.92079347256476, "learning_rate": 3.990400377584475e-06, "loss": 0.19674835205078126, "step": 90880 }, { "epoch": 0.7858557210919058, "grad_norm": 0.6570848574911309, "learning_rate": 3.99020806457998e-06, "loss": 0.1071807861328125, "step": 90885 }, { "epoch": 0.785898954613449, "grad_norm": 7.568620203870754, "learning_rate": 3.990015747008733e-06, "loss": 0.18911170959472656, "step": 90890 }, { "epoch": 0.7859421881349924, "grad_norm": 1.2671571337760792, "learning_rate": 3.98982342487162e-06, "loss": 0.08176355361938477, "step": 90895 }, { "epoch": 0.7859854216565356, "grad_norm": 8.07383417204101, "learning_rate": 3.989631098169531e-06, "loss": 0.11978759765625, "step": 90900 }, { "epoch": 0.7860286551780788, "grad_norm": 19.416637495645958, "learning_rate": 3.98943876690335e-06, "loss": 0.13754348754882811, "step": 90905 }, { "epoch": 0.7860718886996222, "grad_norm": 0.5084436574120145, "learning_rate": 3.989246431073964e-06, "loss": 0.0361358642578125, "step": 90910 }, { "epoch": 0.7861151222211654, "grad_norm": 0.5770090483611221, "learning_rate": 3.989054090682261e-06, "loss": 0.13163604736328124, "step": 90915 }, { "epoch": 0.7861583557427086, "grad_norm": 4.5825374588022605, "learning_rate": 3.988861745729129e-06, "loss": 0.3604278564453125, "step": 90920 }, { "epoch": 0.7862015892642519, "grad_norm": 26.526206906723793, "learning_rate": 3.988669396215453e-06, "loss": 0.316455078125, "step": 90925 }, { "epoch": 0.7862448227857952, "grad_norm": 4.590203780682916, "learning_rate": 3.988477042142121e-06, "loss": 0.1010162353515625, "step": 90930 }, { "epoch": 0.7862880563073384, "grad_norm": 0.4522753071018298, "learning_rate": 3.9882846835100214e-06, "loss": 0.134967041015625, "step": 90935 }, { "epoch": 0.7863312898288817, "grad_norm": 26.959129133660777, "learning_rate": 3.98809232032004e-06, "loss": 0.14442596435546876, "step": 90940 }, { "epoch": 0.786374523350425, "grad_norm": 5.199203965353083, "learning_rate": 3.987899952573062e-06, "loss": 0.07018566131591797, "step": 90945 }, { "epoch": 0.7864177568719682, "grad_norm": 56.0733950662856, "learning_rate": 3.987707580269979e-06, "loss": 0.5080303192138672, "step": 90950 }, { "epoch": 0.7864609903935115, "grad_norm": 1.732125938942808, "learning_rate": 3.987515203411675e-06, "loss": 0.10706253051757812, "step": 90955 }, { "epoch": 0.7865042239150548, "grad_norm": 0.641105874815421, "learning_rate": 3.9873228219990385e-06, "loss": 0.061519622802734375, "step": 90960 }, { "epoch": 0.786547457436598, "grad_norm": 3.3062344068048244, "learning_rate": 3.9871304360329566e-06, "loss": 0.12974853515625, "step": 90965 }, { "epoch": 0.7865906909581413, "grad_norm": 0.05676047951074147, "learning_rate": 3.986938045514315e-06, "loss": 0.07764263153076172, "step": 90970 }, { "epoch": 0.7866339244796846, "grad_norm": 2.3331901877327037, "learning_rate": 3.986745650444003e-06, "loss": 0.1883514404296875, "step": 90975 }, { "epoch": 0.7866771580012278, "grad_norm": 8.371870254083259, "learning_rate": 3.986553250822908e-06, "loss": 0.06501922607421876, "step": 90980 }, { "epoch": 0.7867203915227711, "grad_norm": 2.1904717562832605, "learning_rate": 3.9863608466519165e-06, "loss": 0.304058837890625, "step": 90985 }, { "epoch": 0.7867636250443144, "grad_norm": 25.84045603279721, "learning_rate": 3.986168437931915e-06, "loss": 0.4877952575683594, "step": 90990 }, { "epoch": 0.7868068585658576, "grad_norm": 0.3341920519558477, "learning_rate": 3.9859760246637925e-06, "loss": 0.1499267578125, "step": 90995 }, { "epoch": 0.7868500920874009, "grad_norm": 5.478373645616109, "learning_rate": 3.985783606848435e-06, "loss": 0.140142822265625, "step": 91000 }, { "epoch": 0.7868933256089442, "grad_norm": 0.11588975443240855, "learning_rate": 3.985591184486731e-06, "loss": 0.02693023681640625, "step": 91005 }, { "epoch": 0.7869365591304874, "grad_norm": 20.461214541548784, "learning_rate": 3.985398757579568e-06, "loss": 0.07443809509277344, "step": 91010 }, { "epoch": 0.7869797926520307, "grad_norm": 34.10727445366172, "learning_rate": 3.985206326127833e-06, "loss": 0.3679088592529297, "step": 91015 }, { "epoch": 0.7870230261735739, "grad_norm": 15.997035860691824, "learning_rate": 3.9850138901324134e-06, "loss": 0.10821304321289063, "step": 91020 }, { "epoch": 0.7870662596951172, "grad_norm": 16.20439687394996, "learning_rate": 3.984821449594197e-06, "loss": 0.1208953857421875, "step": 91025 }, { "epoch": 0.7871094932166605, "grad_norm": 1.8324625517874724, "learning_rate": 3.98462900451407e-06, "loss": 0.08988761901855469, "step": 91030 }, { "epoch": 0.7871527267382037, "grad_norm": 0.1950311218731155, "learning_rate": 3.9844365548929215e-06, "loss": 0.09451141357421874, "step": 91035 }, { "epoch": 0.787195960259747, "grad_norm": 3.1962239623052864, "learning_rate": 3.9842441007316385e-06, "loss": 0.2783172607421875, "step": 91040 }, { "epoch": 0.7872391937812903, "grad_norm": 4.937374140657688, "learning_rate": 3.98405164203111e-06, "loss": 0.2820220947265625, "step": 91045 }, { "epoch": 0.7872824273028335, "grad_norm": 1.4631224475213007, "learning_rate": 3.983859178792221e-06, "loss": 0.053985595703125, "step": 91050 }, { "epoch": 0.7873256608243768, "grad_norm": 5.318758006839767, "learning_rate": 3.9836667110158596e-06, "loss": 0.340869140625, "step": 91055 }, { "epoch": 0.7873688943459201, "grad_norm": 6.14149427654213, "learning_rate": 3.983474238702915e-06, "loss": 0.08966712951660157, "step": 91060 }, { "epoch": 0.7874121278674633, "grad_norm": 6.3862238334274375, "learning_rate": 3.983281761854276e-06, "loss": 0.14412384033203124, "step": 91065 }, { "epoch": 0.7874553613890066, "grad_norm": 43.57837729186897, "learning_rate": 3.983089280470826e-06, "loss": 0.39306640625, "step": 91070 }, { "epoch": 0.7874985949105499, "grad_norm": 33.51270726838796, "learning_rate": 3.982896794553456e-06, "loss": 0.2915199279785156, "step": 91075 }, { "epoch": 0.7875418284320931, "grad_norm": 19.354080817744112, "learning_rate": 3.982704304103052e-06, "loss": 0.090576171875, "step": 91080 }, { "epoch": 0.7875850619536364, "grad_norm": 1.0834540817826304, "learning_rate": 3.9825118091205035e-06, "loss": 0.05582275390625, "step": 91085 }, { "epoch": 0.7876282954751797, "grad_norm": 19.28997563136317, "learning_rate": 3.982319309606697e-06, "loss": 0.2516204833984375, "step": 91090 }, { "epoch": 0.7876715289967229, "grad_norm": 4.956290489433566, "learning_rate": 3.9821268055625215e-06, "loss": 0.057323455810546875, "step": 91095 }, { "epoch": 0.7877147625182661, "grad_norm": 3.73209389210799, "learning_rate": 3.981934296988863e-06, "loss": 0.41878280639648435, "step": 91100 }, { "epoch": 0.7877579960398094, "grad_norm": 3.2483289857260473, "learning_rate": 3.9817417838866105e-06, "loss": 0.39466552734375, "step": 91105 }, { "epoch": 0.7878012295613527, "grad_norm": 11.987110165395682, "learning_rate": 3.9815492662566515e-06, "loss": 0.1601862907409668, "step": 91110 }, { "epoch": 0.7878444630828959, "grad_norm": 14.372858576898546, "learning_rate": 3.981356744099875e-06, "loss": 0.07247581481933593, "step": 91115 }, { "epoch": 0.7878876966044392, "grad_norm": 0.4144956986256364, "learning_rate": 3.9811642174171665e-06, "loss": 0.42449951171875, "step": 91120 }, { "epoch": 0.7879309301259825, "grad_norm": 1.89030584233363, "learning_rate": 3.980971686209416e-06, "loss": 0.10518970489501953, "step": 91125 }, { "epoch": 0.7879741636475257, "grad_norm": 23.929176394843186, "learning_rate": 3.9807791504775115e-06, "loss": 0.2270751953125, "step": 91130 }, { "epoch": 0.788017397169069, "grad_norm": 3.592175451752823, "learning_rate": 3.9805866102223385e-06, "loss": 0.18453407287597656, "step": 91135 }, { "epoch": 0.7880606306906123, "grad_norm": 12.470642112715167, "learning_rate": 3.980394065444787e-06, "loss": 0.30116729736328124, "step": 91140 }, { "epoch": 0.7881038642121555, "grad_norm": 7.052243315295674, "learning_rate": 3.980201516145746e-06, "loss": 0.03837509155273437, "step": 91145 }, { "epoch": 0.7881470977336988, "grad_norm": 23.625308028857518, "learning_rate": 3.980008962326101e-06, "loss": 0.21044158935546875, "step": 91150 }, { "epoch": 0.7881903312552421, "grad_norm": 12.577102629398322, "learning_rate": 3.9798164039867424e-06, "loss": 0.08471832275390626, "step": 91155 }, { "epoch": 0.7882335647767853, "grad_norm": 0.9584411335551059, "learning_rate": 3.979623841128557e-06, "loss": 0.028966522216796874, "step": 91160 }, { "epoch": 0.7882767982983286, "grad_norm": 26.916420308194635, "learning_rate": 3.979431273752432e-06, "loss": 0.24544830322265626, "step": 91165 }, { "epoch": 0.7883200318198719, "grad_norm": 17.68146108114808, "learning_rate": 3.979238701859257e-06, "loss": 0.22431488037109376, "step": 91170 }, { "epoch": 0.7883632653414151, "grad_norm": 35.97159189911296, "learning_rate": 3.97904612544992e-06, "loss": 0.3497016906738281, "step": 91175 }, { "epoch": 0.7884064988629584, "grad_norm": 0.22719955228506938, "learning_rate": 3.978853544525308e-06, "loss": 0.39442901611328124, "step": 91180 }, { "epoch": 0.7884497323845017, "grad_norm": 9.34155198570733, "learning_rate": 3.978660959086311e-06, "loss": 0.09355010986328124, "step": 91185 }, { "epoch": 0.7884929659060449, "grad_norm": 5.699179066347239, "learning_rate": 3.978468369133815e-06, "loss": 0.059438133239746095, "step": 91190 }, { "epoch": 0.7885361994275881, "grad_norm": 0.645165833227507, "learning_rate": 3.97827577466871e-06, "loss": 0.010140419006347656, "step": 91195 }, { "epoch": 0.7885794329491315, "grad_norm": 31.207584276878595, "learning_rate": 3.978083175691883e-06, "loss": 0.489776611328125, "step": 91200 }, { "epoch": 0.7886226664706747, "grad_norm": 2.70948804795337, "learning_rate": 3.977890572204224e-06, "loss": 0.17509765625, "step": 91205 }, { "epoch": 0.7886658999922179, "grad_norm": 5.209584584591741, "learning_rate": 3.977697964206619e-06, "loss": 0.26005401611328127, "step": 91210 }, { "epoch": 0.7887091335137613, "grad_norm": 7.896374612731596, "learning_rate": 3.977505351699958e-06, "loss": 0.08586044311523437, "step": 91215 }, { "epoch": 0.7887523670353045, "grad_norm": 4.786199357391181, "learning_rate": 3.977312734685129e-06, "loss": 0.17370338439941407, "step": 91220 }, { "epoch": 0.7887956005568477, "grad_norm": 0.7398303969401675, "learning_rate": 3.977120113163019e-06, "loss": 0.04054107666015625, "step": 91225 }, { "epoch": 0.7888388340783911, "grad_norm": 8.415748410442516, "learning_rate": 3.976927487134517e-06, "loss": 0.474713134765625, "step": 91230 }, { "epoch": 0.7888820675999343, "grad_norm": 0.36645733233273886, "learning_rate": 3.976734856600513e-06, "loss": 0.028873443603515625, "step": 91235 }, { "epoch": 0.7889253011214775, "grad_norm": 8.142539194430245, "learning_rate": 3.976542221561894e-06, "loss": 0.10324325561523437, "step": 91240 }, { "epoch": 0.7889685346430209, "grad_norm": 3.492920138252785, "learning_rate": 3.976349582019548e-06, "loss": 0.09098739624023437, "step": 91245 }, { "epoch": 0.7890117681645641, "grad_norm": 3.5033132927955752, "learning_rate": 3.976156937974364e-06, "loss": 0.23230667114257814, "step": 91250 }, { "epoch": 0.7890550016861073, "grad_norm": 2.3253227545754447, "learning_rate": 3.975964289427231e-06, "loss": 0.11231689453125, "step": 91255 }, { "epoch": 0.7890982352076507, "grad_norm": 8.916734410435133, "learning_rate": 3.975771636379036e-06, "loss": 0.3384185791015625, "step": 91260 }, { "epoch": 0.7891414687291939, "grad_norm": 4.22299713448698, "learning_rate": 3.97557897883067e-06, "loss": 0.16018447875976563, "step": 91265 }, { "epoch": 0.7891847022507371, "grad_norm": 4.329326833254968, "learning_rate": 3.975386316783019e-06, "loss": 0.13240280151367187, "step": 91270 }, { "epoch": 0.7892279357722803, "grad_norm": 1.331920500931994, "learning_rate": 3.975193650236972e-06, "loss": 0.1160888671875, "step": 91275 }, { "epoch": 0.7892711692938237, "grad_norm": 29.307664835994004, "learning_rate": 3.975000979193419e-06, "loss": 0.22346649169921876, "step": 91280 }, { "epoch": 0.7893144028153669, "grad_norm": 3.7934197165969095, "learning_rate": 3.974808303653247e-06, "loss": 0.37958450317382814, "step": 91285 }, { "epoch": 0.7893576363369101, "grad_norm": 10.196221294727764, "learning_rate": 3.974615623617346e-06, "loss": 0.1200164794921875, "step": 91290 }, { "epoch": 0.7894008698584535, "grad_norm": 0.5136184034852781, "learning_rate": 3.974422939086603e-06, "loss": 0.26840362548828123, "step": 91295 }, { "epoch": 0.7894441033799967, "grad_norm": 7.387877845805678, "learning_rate": 3.974230250061908e-06, "loss": 0.27532958984375, "step": 91300 }, { "epoch": 0.7894873369015399, "grad_norm": 4.545234459928419, "learning_rate": 3.974037556544149e-06, "loss": 0.15801849365234374, "step": 91305 }, { "epoch": 0.7895305704230833, "grad_norm": 0.7571662607215093, "learning_rate": 3.973844858534215e-06, "loss": 0.07035751342773437, "step": 91310 }, { "epoch": 0.7895738039446265, "grad_norm": 12.165697976641079, "learning_rate": 3.973652156032994e-06, "loss": 0.07068252563476562, "step": 91315 }, { "epoch": 0.7896170374661697, "grad_norm": 5.414592621943014, "learning_rate": 3.973459449041376e-06, "loss": 0.05556640625, "step": 91320 }, { "epoch": 0.7896602709877131, "grad_norm": 49.951580055760616, "learning_rate": 3.973266737560248e-06, "loss": 0.31055450439453125, "step": 91325 }, { "epoch": 0.7897035045092563, "grad_norm": 0.35464474803516144, "learning_rate": 3.973074021590501e-06, "loss": 0.0471893310546875, "step": 91330 }, { "epoch": 0.7897467380307995, "grad_norm": 43.06516548825522, "learning_rate": 3.972881301133022e-06, "loss": 0.293780517578125, "step": 91335 }, { "epoch": 0.7897899715523429, "grad_norm": 2.5047755150813433, "learning_rate": 3.9726885761887005e-06, "loss": 0.036553955078125, "step": 91340 }, { "epoch": 0.7898332050738861, "grad_norm": 5.4427918462054565, "learning_rate": 3.972495846758426e-06, "loss": 0.124493408203125, "step": 91345 }, { "epoch": 0.7898764385954293, "grad_norm": 1.902391859473415, "learning_rate": 3.972303112843086e-06, "loss": 0.25262908935546874, "step": 91350 }, { "epoch": 0.7899196721169727, "grad_norm": 15.801058731369862, "learning_rate": 3.972110374443569e-06, "loss": 0.1827178955078125, "step": 91355 }, { "epoch": 0.7899629056385159, "grad_norm": 17.975866091831097, "learning_rate": 3.9719176315607665e-06, "loss": 0.177227783203125, "step": 91360 }, { "epoch": 0.7900061391600591, "grad_norm": 12.618618081008206, "learning_rate": 3.971724884195564e-06, "loss": 0.12458877563476563, "step": 91365 }, { "epoch": 0.7900493726816024, "grad_norm": 0.41277032605727576, "learning_rate": 3.971532132348854e-06, "loss": 0.1937164306640625, "step": 91370 }, { "epoch": 0.7900926062031457, "grad_norm": 0.9721220843725449, "learning_rate": 3.971339376021522e-06, "loss": 0.051250457763671875, "step": 91375 }, { "epoch": 0.7901358397246889, "grad_norm": 2.662187150411551, "learning_rate": 3.9711466152144605e-06, "loss": 0.0671630859375, "step": 91380 }, { "epoch": 0.7901790732462322, "grad_norm": 0.8558042592558373, "learning_rate": 3.970953849928555e-06, "loss": 0.06662559509277344, "step": 91385 }, { "epoch": 0.7902223067677755, "grad_norm": 3.5476311772600884, "learning_rate": 3.9707610801646975e-06, "loss": 0.0917327880859375, "step": 91390 }, { "epoch": 0.7902655402893187, "grad_norm": 6.453640485026824, "learning_rate": 3.9705683059237736e-06, "loss": 0.3445220947265625, "step": 91395 }, { "epoch": 0.790308773810862, "grad_norm": 6.729755761059664, "learning_rate": 3.970375527206677e-06, "loss": 0.11634807586669922, "step": 91400 }, { "epoch": 0.7903520073324053, "grad_norm": 0.30046426753298, "learning_rate": 3.970182744014292e-06, "loss": 0.07432708740234376, "step": 91405 }, { "epoch": 0.7903952408539485, "grad_norm": 26.161255068578512, "learning_rate": 3.969989956347512e-06, "loss": 0.535943603515625, "step": 91410 }, { "epoch": 0.7904384743754918, "grad_norm": 15.27295071145674, "learning_rate": 3.969797164207222e-06, "loss": 0.07388381958007813, "step": 91415 }, { "epoch": 0.7904817078970351, "grad_norm": 1.575032470263727, "learning_rate": 3.969604367594314e-06, "loss": 0.3687347412109375, "step": 91420 }, { "epoch": 0.7905249414185783, "grad_norm": 10.189104361709338, "learning_rate": 3.969411566509676e-06, "loss": 0.1290863037109375, "step": 91425 }, { "epoch": 0.7905681749401215, "grad_norm": 5.15781839761641, "learning_rate": 3.969218760954198e-06, "loss": 0.03765602111816406, "step": 91430 }, { "epoch": 0.7906114084616649, "grad_norm": 10.828065979500735, "learning_rate": 3.969025950928768e-06, "loss": 0.16377487182617187, "step": 91435 }, { "epoch": 0.7906546419832081, "grad_norm": 9.232460514344323, "learning_rate": 3.968833136434277e-06, "loss": 0.24477825164794922, "step": 91440 }, { "epoch": 0.7906978755047513, "grad_norm": 3.5981986480970063, "learning_rate": 3.968640317471614e-06, "loss": 0.257177734375, "step": 91445 }, { "epoch": 0.7907411090262946, "grad_norm": 3.4318646818560725, "learning_rate": 3.968447494041665e-06, "loss": 0.24447021484375, "step": 91450 }, { "epoch": 0.7907843425478379, "grad_norm": 11.881783690538475, "learning_rate": 3.968254666145322e-06, "loss": 0.05813159942626953, "step": 91455 }, { "epoch": 0.7908275760693811, "grad_norm": 1.1996591586763445, "learning_rate": 3.968061833783476e-06, "loss": 0.15767364501953124, "step": 91460 }, { "epoch": 0.7908708095909244, "grad_norm": 10.430799052562659, "learning_rate": 3.967868996957013e-06, "loss": 0.12139968872070313, "step": 91465 }, { "epoch": 0.7909140431124677, "grad_norm": 44.24948137837492, "learning_rate": 3.967676155666824e-06, "loss": 0.5793479919433594, "step": 91470 }, { "epoch": 0.7909572766340109, "grad_norm": 0.9614479259631442, "learning_rate": 3.967483309913799e-06, "loss": 0.01674652099609375, "step": 91475 }, { "epoch": 0.7910005101555542, "grad_norm": 3.3563849618159054, "learning_rate": 3.967290459698825e-06, "loss": 0.1881744384765625, "step": 91480 }, { "epoch": 0.7910437436770975, "grad_norm": 6.351080782828436, "learning_rate": 3.967097605022793e-06, "loss": 0.020298004150390625, "step": 91485 }, { "epoch": 0.7910869771986407, "grad_norm": 31.576782886873833, "learning_rate": 3.9669047458865934e-06, "loss": 0.1718017578125, "step": 91490 }, { "epoch": 0.791130210720184, "grad_norm": 6.044284109439682, "learning_rate": 3.9667118822911136e-06, "loss": 0.091192626953125, "step": 91495 }, { "epoch": 0.7911734442417273, "grad_norm": 4.510715738446663, "learning_rate": 3.966519014237244e-06, "loss": 0.08173294067382812, "step": 91500 }, { "epoch": 0.7912166777632705, "grad_norm": 9.645836413082494, "learning_rate": 3.9663261417258745e-06, "loss": 0.167645263671875, "step": 91505 }, { "epoch": 0.7912599112848138, "grad_norm": 36.19774885315131, "learning_rate": 3.966133264757894e-06, "loss": 0.16673583984375, "step": 91510 }, { "epoch": 0.7913031448063571, "grad_norm": 2.0128870040952234, "learning_rate": 3.965940383334193e-06, "loss": 0.029930496215820314, "step": 91515 }, { "epoch": 0.7913463783279003, "grad_norm": 8.720024523313631, "learning_rate": 3.965747497455659e-06, "loss": 0.107269287109375, "step": 91520 }, { "epoch": 0.7913896118494436, "grad_norm": 9.931960421758962, "learning_rate": 3.965554607123184e-06, "loss": 0.16300029754638673, "step": 91525 }, { "epoch": 0.7914328453709869, "grad_norm": 1.2852451833007512, "learning_rate": 3.9653617123376555e-06, "loss": 0.03531646728515625, "step": 91530 }, { "epoch": 0.7914760788925301, "grad_norm": 1.985082364791297, "learning_rate": 3.965168813099964e-06, "loss": 0.040679931640625, "step": 91535 }, { "epoch": 0.7915193124140734, "grad_norm": 28.42294578274519, "learning_rate": 3.964975909411001e-06, "loss": 0.18140754699707032, "step": 91540 }, { "epoch": 0.7915625459356166, "grad_norm": 11.7555259822462, "learning_rate": 3.964783001271653e-06, "loss": 0.14832763671875, "step": 91545 }, { "epoch": 0.7916057794571599, "grad_norm": 3.279745764183928, "learning_rate": 3.964590088682811e-06, "loss": 0.062404632568359375, "step": 91550 }, { "epoch": 0.7916490129787032, "grad_norm": 0.43926732034674587, "learning_rate": 3.964397171645365e-06, "loss": 0.1508758544921875, "step": 91555 }, { "epoch": 0.7916922465002464, "grad_norm": 16.34566795592865, "learning_rate": 3.9642042501602045e-06, "loss": 0.14966392517089844, "step": 91560 }, { "epoch": 0.7917354800217897, "grad_norm": 2.531247377751099, "learning_rate": 3.964011324228218e-06, "loss": 0.2173431396484375, "step": 91565 }, { "epoch": 0.791778713543333, "grad_norm": 16.262413130504356, "learning_rate": 3.9638183938502984e-06, "loss": 0.41796340942382815, "step": 91570 }, { "epoch": 0.7918219470648762, "grad_norm": 0.07668845568066333, "learning_rate": 3.9636254590273335e-06, "loss": 0.07015457153320312, "step": 91575 }, { "epoch": 0.7918651805864195, "grad_norm": 4.810082405512035, "learning_rate": 3.963432519760212e-06, "loss": 0.20470924377441407, "step": 91580 }, { "epoch": 0.7919084141079628, "grad_norm": 27.003125319042585, "learning_rate": 3.963239576049825e-06, "loss": 0.5227092742919922, "step": 91585 }, { "epoch": 0.791951647629506, "grad_norm": 5.910762474523012, "learning_rate": 3.963046627897062e-06, "loss": 0.04796905517578125, "step": 91590 }, { "epoch": 0.7919948811510493, "grad_norm": 1.65284433490429, "learning_rate": 3.962853675302813e-06, "loss": 0.1126678466796875, "step": 91595 }, { "epoch": 0.7920381146725926, "grad_norm": 0.6471330717152527, "learning_rate": 3.962660718267969e-06, "loss": 0.07863349914550781, "step": 91600 }, { "epoch": 0.7920813481941358, "grad_norm": 4.263734566089053, "learning_rate": 3.962467756793418e-06, "loss": 0.203857421875, "step": 91605 }, { "epoch": 0.7921245817156791, "grad_norm": 0.09012535414071107, "learning_rate": 3.9622747908800515e-06, "loss": 0.055757331848144534, "step": 91610 }, { "epoch": 0.7921678152372224, "grad_norm": 0.08153489891406154, "learning_rate": 3.962081820528758e-06, "loss": 0.2725028991699219, "step": 91615 }, { "epoch": 0.7922110487587656, "grad_norm": 21.353460141148407, "learning_rate": 3.961888845740428e-06, "loss": 0.1030548095703125, "step": 91620 }, { "epoch": 0.7922542822803088, "grad_norm": 7.49504599102257, "learning_rate": 3.961695866515952e-06, "loss": 0.2548797607421875, "step": 91625 }, { "epoch": 0.7922975158018521, "grad_norm": 23.65321205801235, "learning_rate": 3.961502882856219e-06, "loss": 0.1477386474609375, "step": 91630 }, { "epoch": 0.7923407493233954, "grad_norm": 16.861256081009913, "learning_rate": 3.9613098947621215e-06, "loss": 0.1966114044189453, "step": 91635 }, { "epoch": 0.7923839828449386, "grad_norm": 25.666470086723525, "learning_rate": 3.961116902234546e-06, "loss": 0.439202880859375, "step": 91640 }, { "epoch": 0.792427216366482, "grad_norm": 4.660077768635317, "learning_rate": 3.960923905274385e-06, "loss": 0.13492965698242188, "step": 91645 }, { "epoch": 0.7924704498880252, "grad_norm": 6.216152276113638, "learning_rate": 3.960730903882526e-06, "loss": 0.2209320068359375, "step": 91650 }, { "epoch": 0.7925136834095684, "grad_norm": 37.50441842635564, "learning_rate": 3.960537898059864e-06, "loss": 0.5791618347167968, "step": 91655 }, { "epoch": 0.7925569169311117, "grad_norm": 0.8558071064010078, "learning_rate": 3.960344887807284e-06, "loss": 0.08660354614257812, "step": 91660 }, { "epoch": 0.792600150452655, "grad_norm": 34.75558791810738, "learning_rate": 3.960151873125679e-06, "loss": 0.2762184143066406, "step": 91665 }, { "epoch": 0.7926433839741982, "grad_norm": 7.263016339307066, "learning_rate": 3.959958854015938e-06, "loss": 0.11201591491699218, "step": 91670 }, { "epoch": 0.7926866174957415, "grad_norm": 1.1121219520099226, "learning_rate": 3.959765830478952e-06, "loss": 0.112213134765625, "step": 91675 }, { "epoch": 0.7927298510172848, "grad_norm": 7.127657564920615, "learning_rate": 3.9595728025156105e-06, "loss": 0.32028350830078123, "step": 91680 }, { "epoch": 0.792773084538828, "grad_norm": 6.451571165231276, "learning_rate": 3.959379770126804e-06, "loss": 0.1359720230102539, "step": 91685 }, { "epoch": 0.7928163180603713, "grad_norm": 0.632365783599009, "learning_rate": 3.959186733313423e-06, "loss": 0.293511962890625, "step": 91690 }, { "epoch": 0.7928595515819146, "grad_norm": 14.7083311546929, "learning_rate": 3.958993692076358e-06, "loss": 0.2610599517822266, "step": 91695 }, { "epoch": 0.7929027851034578, "grad_norm": 3.0329547679519617, "learning_rate": 3.958800646416498e-06, "loss": 0.06611480712890624, "step": 91700 }, { "epoch": 0.792946018625001, "grad_norm": 4.133734033636611, "learning_rate": 3.958607596334734e-06, "loss": 0.08902816772460938, "step": 91705 }, { "epoch": 0.7929892521465444, "grad_norm": 12.947444026735376, "learning_rate": 3.958414541831957e-06, "loss": 0.222918701171875, "step": 91710 }, { "epoch": 0.7930324856680876, "grad_norm": 11.53967964023931, "learning_rate": 3.958221482909057e-06, "loss": 0.1233734130859375, "step": 91715 }, { "epoch": 0.7930757191896308, "grad_norm": 12.322378685439531, "learning_rate": 3.958028419566924e-06, "loss": 0.10623779296875, "step": 91720 }, { "epoch": 0.7931189527111742, "grad_norm": 0.315279857995572, "learning_rate": 3.957835351806449e-06, "loss": 0.12517852783203126, "step": 91725 }, { "epoch": 0.7931621862327174, "grad_norm": 4.964172661836047, "learning_rate": 3.957642279628521e-06, "loss": 0.316375732421875, "step": 91730 }, { "epoch": 0.7932054197542606, "grad_norm": 5.945264331835072, "learning_rate": 3.957449203034032e-06, "loss": 0.07961044311523438, "step": 91735 }, { "epoch": 0.793248653275804, "grad_norm": 17.144032362363042, "learning_rate": 3.957256122023871e-06, "loss": 0.165447998046875, "step": 91740 }, { "epoch": 0.7932918867973472, "grad_norm": 5.376519900059838, "learning_rate": 3.957063036598931e-06, "loss": 0.059635353088378903, "step": 91745 }, { "epoch": 0.7933351203188904, "grad_norm": 5.032148711869542, "learning_rate": 3.9568699467601005e-06, "loss": 0.084503173828125, "step": 91750 }, { "epoch": 0.7933783538404338, "grad_norm": 3.767143114606515, "learning_rate": 3.956676852508269e-06, "loss": 0.15135650634765624, "step": 91755 }, { "epoch": 0.793421587361977, "grad_norm": 7.635294140602445, "learning_rate": 3.956483753844329e-06, "loss": 0.11248779296875, "step": 91760 }, { "epoch": 0.7934648208835202, "grad_norm": 12.038453477835342, "learning_rate": 3.9562906507691714e-06, "loss": 0.08369789123535157, "step": 91765 }, { "epoch": 0.7935080544050636, "grad_norm": 6.89794085689632, "learning_rate": 3.956097543283685e-06, "loss": 0.05982131958007812, "step": 91770 }, { "epoch": 0.7935512879266068, "grad_norm": 4.113408722269233, "learning_rate": 3.955904431388761e-06, "loss": 0.045970916748046875, "step": 91775 }, { "epoch": 0.79359452144815, "grad_norm": 4.942107068738371, "learning_rate": 3.955711315085291e-06, "loss": 0.15320243835449218, "step": 91780 }, { "epoch": 0.7936377549696934, "grad_norm": 2.4192297264583633, "learning_rate": 3.955518194374164e-06, "loss": 0.286065673828125, "step": 91785 }, { "epoch": 0.7936809884912366, "grad_norm": 37.377024939046606, "learning_rate": 3.955325069256272e-06, "loss": 0.36750946044921873, "step": 91790 }, { "epoch": 0.7937242220127798, "grad_norm": 23.15582959594257, "learning_rate": 3.955131939732505e-06, "loss": 0.24381141662597655, "step": 91795 }, { "epoch": 0.793767455534323, "grad_norm": 12.389876868586741, "learning_rate": 3.954938805803755e-06, "loss": 0.07929763793945313, "step": 91800 }, { "epoch": 0.7938106890558664, "grad_norm": 1.4508810827976637, "learning_rate": 3.95474566747091e-06, "loss": 0.10367774963378906, "step": 91805 }, { "epoch": 0.7938539225774096, "grad_norm": 1.7595199816906013, "learning_rate": 3.954552524734863e-06, "loss": 0.2486663818359375, "step": 91810 }, { "epoch": 0.7938971560989528, "grad_norm": 5.043576982771422, "learning_rate": 3.954359377596504e-06, "loss": 0.03490505218505859, "step": 91815 }, { "epoch": 0.7939403896204962, "grad_norm": 0.8109945670979237, "learning_rate": 3.9541662260567235e-06, "loss": 0.15265045166015626, "step": 91820 }, { "epoch": 0.7939836231420394, "grad_norm": 3.5080848088576713, "learning_rate": 3.953973070116413e-06, "loss": 0.09666271209716797, "step": 91825 }, { "epoch": 0.7940268566635826, "grad_norm": 1.7401947607023165, "learning_rate": 3.953779909776463e-06, "loss": 0.04992218017578125, "step": 91830 }, { "epoch": 0.794070090185126, "grad_norm": 0.32914944262528534, "learning_rate": 3.953586745037765e-06, "loss": 0.06738739013671875, "step": 91835 }, { "epoch": 0.7941133237066692, "grad_norm": 3.1551206792793502, "learning_rate": 3.9533935759012085e-06, "loss": 0.06061477661132812, "step": 91840 }, { "epoch": 0.7941565572282124, "grad_norm": 1.1313387175605298, "learning_rate": 3.953200402367685e-06, "loss": 0.13895950317382813, "step": 91845 }, { "epoch": 0.7941997907497558, "grad_norm": 3.971824282479991, "learning_rate": 3.9530072244380855e-06, "loss": 0.32372360229492186, "step": 91850 }, { "epoch": 0.794243024271299, "grad_norm": 24.472783894475246, "learning_rate": 3.952814042113302e-06, "loss": 0.10040283203125, "step": 91855 }, { "epoch": 0.7942862577928422, "grad_norm": 0.8301797305419089, "learning_rate": 3.9526208553942236e-06, "loss": 0.2682370185852051, "step": 91860 }, { "epoch": 0.7943294913143856, "grad_norm": 6.423574244515549, "learning_rate": 3.952427664281741e-06, "loss": 0.032201385498046874, "step": 91865 }, { "epoch": 0.7943727248359288, "grad_norm": 6.554964514215882, "learning_rate": 3.952234468776747e-06, "loss": 0.0794952392578125, "step": 91870 }, { "epoch": 0.794415958357472, "grad_norm": 10.940407285190375, "learning_rate": 3.952041268880131e-06, "loss": 0.0885009765625, "step": 91875 }, { "epoch": 0.7944591918790153, "grad_norm": 1.283455570543051, "learning_rate": 3.9518480645927866e-06, "loss": 0.06786079406738281, "step": 91880 }, { "epoch": 0.7945024254005586, "grad_norm": 18.872282708230465, "learning_rate": 3.951654855915602e-06, "loss": 0.246142578125, "step": 91885 }, { "epoch": 0.7945456589221018, "grad_norm": 11.911498726337436, "learning_rate": 3.95146164284947e-06, "loss": 0.22452926635742188, "step": 91890 }, { "epoch": 0.7945888924436451, "grad_norm": 0.3034218469934945, "learning_rate": 3.95126842539528e-06, "loss": 0.12020339965820312, "step": 91895 }, { "epoch": 0.7946321259651884, "grad_norm": 3.044585585010917, "learning_rate": 3.951075203553923e-06, "loss": 0.4515777587890625, "step": 91900 }, { "epoch": 0.7946753594867316, "grad_norm": 1.1197196775384302, "learning_rate": 3.950881977326293e-06, "loss": 0.14112415313720703, "step": 91905 }, { "epoch": 0.7947185930082749, "grad_norm": 2.165591379280854, "learning_rate": 3.950688746713279e-06, "loss": 0.20786552429199218, "step": 91910 }, { "epoch": 0.7947618265298182, "grad_norm": 7.090345322075544, "learning_rate": 3.950495511715772e-06, "loss": 0.1498199462890625, "step": 91915 }, { "epoch": 0.7948050600513614, "grad_norm": 2.044184555330944, "learning_rate": 3.950302272334664e-06, "loss": 0.05496406555175781, "step": 91920 }, { "epoch": 0.7948482935729047, "grad_norm": 11.68663273441717, "learning_rate": 3.950109028570846e-06, "loss": 0.3538238525390625, "step": 91925 }, { "epoch": 0.794891527094448, "grad_norm": 13.05128583101453, "learning_rate": 3.949915780425209e-06, "loss": 0.04172534942626953, "step": 91930 }, { "epoch": 0.7949347606159912, "grad_norm": 17.940368344239577, "learning_rate": 3.949722527898645e-06, "loss": 0.061132621765136716, "step": 91935 }, { "epoch": 0.7949779941375344, "grad_norm": 26.267137811968777, "learning_rate": 3.949529270992044e-06, "loss": 0.1794647216796875, "step": 91940 }, { "epoch": 0.7950212276590778, "grad_norm": 24.634151188149296, "learning_rate": 3.949336009706297e-06, "loss": 0.19557228088378906, "step": 91945 }, { "epoch": 0.795064461180621, "grad_norm": 0.47195786863129396, "learning_rate": 3.9491427440422975e-06, "loss": 0.14387435913085939, "step": 91950 }, { "epoch": 0.7951076947021642, "grad_norm": 8.42704713514622, "learning_rate": 3.948949474000935e-06, "loss": 0.2842529296875, "step": 91955 }, { "epoch": 0.7951509282237076, "grad_norm": 8.61961547616732, "learning_rate": 3.948756199583101e-06, "loss": 0.0787811279296875, "step": 91960 }, { "epoch": 0.7951941617452508, "grad_norm": 1.4395801582192107, "learning_rate": 3.948562920789688e-06, "loss": 0.0744293212890625, "step": 91965 }, { "epoch": 0.795237395266794, "grad_norm": 3.1822455403646184, "learning_rate": 3.948369637621587e-06, "loss": 0.04863128662109375, "step": 91970 }, { "epoch": 0.7952806287883373, "grad_norm": 3.519630229205879, "learning_rate": 3.9481763500796886e-06, "loss": 0.1580352783203125, "step": 91975 }, { "epoch": 0.7953238623098806, "grad_norm": 1.7531322885944813, "learning_rate": 3.947983058164885e-06, "loss": 0.07666053771972656, "step": 91980 }, { "epoch": 0.7953670958314238, "grad_norm": 37.42820523534794, "learning_rate": 3.947789761878066e-06, "loss": 0.5689834594726563, "step": 91985 }, { "epoch": 0.7954103293529671, "grad_norm": 39.5406662924775, "learning_rate": 3.947596461220125e-06, "loss": 0.7550537109375, "step": 91990 }, { "epoch": 0.7954535628745104, "grad_norm": 24.836022053805536, "learning_rate": 3.947403156191953e-06, "loss": 0.25864219665527344, "step": 91995 }, { "epoch": 0.7954967963960536, "grad_norm": 17.04352538971263, "learning_rate": 3.947209846794441e-06, "loss": 0.12861404418945313, "step": 92000 }, { "epoch": 0.7955400299175969, "grad_norm": 18.8951076120696, "learning_rate": 3.947016533028482e-06, "loss": 0.29153594970703123, "step": 92005 }, { "epoch": 0.7955832634391402, "grad_norm": 0.5710015320576659, "learning_rate": 3.9468232148949655e-06, "loss": 0.05738525390625, "step": 92010 }, { "epoch": 0.7956264969606834, "grad_norm": 0.8650558893219394, "learning_rate": 3.9466298923947835e-06, "loss": 0.010145187377929688, "step": 92015 }, { "epoch": 0.7956697304822267, "grad_norm": 0.7967576353056932, "learning_rate": 3.946436565528829e-06, "loss": 0.15005836486816407, "step": 92020 }, { "epoch": 0.79571296400377, "grad_norm": 6.342254080345691, "learning_rate": 3.9462432342979925e-06, "loss": 0.20083217620849608, "step": 92025 }, { "epoch": 0.7957561975253132, "grad_norm": 3.923468673740947, "learning_rate": 3.9460498987031665e-06, "loss": 0.03329925537109375, "step": 92030 }, { "epoch": 0.7957994310468565, "grad_norm": 7.496010591659415, "learning_rate": 3.945856558745241e-06, "loss": 0.2939056396484375, "step": 92035 }, { "epoch": 0.7958426645683998, "grad_norm": 2.5491697033919722, "learning_rate": 3.945663214425109e-06, "loss": 0.0554351806640625, "step": 92040 }, { "epoch": 0.795885898089943, "grad_norm": 1.4423084093871463, "learning_rate": 3.945469865743661e-06, "loss": 0.1406890869140625, "step": 92045 }, { "epoch": 0.7959291316114863, "grad_norm": 47.43040219759182, "learning_rate": 3.94527651270179e-06, "loss": 0.1950042724609375, "step": 92050 }, { "epoch": 0.7959723651330295, "grad_norm": 30.586692505118396, "learning_rate": 3.945083155300388e-06, "loss": 0.45824127197265624, "step": 92055 }, { "epoch": 0.7960155986545728, "grad_norm": 7.775913038333262, "learning_rate": 3.944889793540345e-06, "loss": 0.44871826171875, "step": 92060 }, { "epoch": 0.7960588321761161, "grad_norm": 24.51594280023347, "learning_rate": 3.944696427422555e-06, "loss": 0.04997711181640625, "step": 92065 }, { "epoch": 0.7961020656976593, "grad_norm": 1.4399764640941761, "learning_rate": 3.944503056947907e-06, "loss": 0.033948516845703124, "step": 92070 }, { "epoch": 0.7961452992192026, "grad_norm": 1.8543949279237057, "learning_rate": 3.944309682117295e-06, "loss": 0.3985687255859375, "step": 92075 }, { "epoch": 0.7961885327407459, "grad_norm": 5.5473956488403, "learning_rate": 3.944116302931611e-06, "loss": 0.3039398193359375, "step": 92080 }, { "epoch": 0.7962317662622891, "grad_norm": 0.8693657690614326, "learning_rate": 3.943922919391745e-06, "loss": 0.04728145599365234, "step": 92085 }, { "epoch": 0.7962749997838324, "grad_norm": 30.541051527524075, "learning_rate": 3.9437295314985895e-06, "loss": 0.5672515869140625, "step": 92090 }, { "epoch": 0.7963182333053757, "grad_norm": 13.062838714646, "learning_rate": 3.943536139253039e-06, "loss": 0.09567489624023437, "step": 92095 }, { "epoch": 0.7963614668269189, "grad_norm": 0.25169210698401573, "learning_rate": 3.943342742655981e-06, "loss": 0.02458019256591797, "step": 92100 }, { "epoch": 0.7964047003484622, "grad_norm": 18.82371327375405, "learning_rate": 3.94314934170831e-06, "loss": 0.174176025390625, "step": 92105 }, { "epoch": 0.7964479338700055, "grad_norm": 0.1700514891090162, "learning_rate": 3.942955936410918e-06, "loss": 0.06942634582519532, "step": 92110 }, { "epoch": 0.7964911673915487, "grad_norm": 2.7465491757968166, "learning_rate": 3.942762526764697e-06, "loss": 0.05448455810546875, "step": 92115 }, { "epoch": 0.796534400913092, "grad_norm": 16.64438205391051, "learning_rate": 3.942569112770537e-06, "loss": 0.2946758270263672, "step": 92120 }, { "epoch": 0.7965776344346353, "grad_norm": 15.39654738296953, "learning_rate": 3.942375694429333e-06, "loss": 0.13652496337890624, "step": 92125 }, { "epoch": 0.7966208679561785, "grad_norm": 6.130436450424739, "learning_rate": 3.942182271741975e-06, "loss": 0.08203926086425781, "step": 92130 }, { "epoch": 0.7966641014777218, "grad_norm": 5.022092499586856, "learning_rate": 3.941988844709356e-06, "loss": 0.09210205078125, "step": 92135 }, { "epoch": 0.796707334999265, "grad_norm": 12.84425479599877, "learning_rate": 3.9417954133323676e-06, "loss": 0.15334625244140626, "step": 92140 }, { "epoch": 0.7967505685208083, "grad_norm": 1.5310726696603565, "learning_rate": 3.941601977611902e-06, "loss": 0.07553749084472657, "step": 92145 }, { "epoch": 0.7967938020423515, "grad_norm": 16.673427945982706, "learning_rate": 3.941408537548851e-06, "loss": 0.18210830688476562, "step": 92150 }, { "epoch": 0.7968370355638948, "grad_norm": 8.158875397536875, "learning_rate": 3.941215093144107e-06, "loss": 0.1012664794921875, "step": 92155 }, { "epoch": 0.7968802690854381, "grad_norm": 1.219051730373007, "learning_rate": 3.941021644398562e-06, "loss": 0.0462738037109375, "step": 92160 }, { "epoch": 0.7969235026069813, "grad_norm": 0.23665954420388458, "learning_rate": 3.940828191313108e-06, "loss": 0.07619895935058593, "step": 92165 }, { "epoch": 0.7969667361285246, "grad_norm": 7.427965147923779, "learning_rate": 3.940634733888638e-06, "loss": 0.157464599609375, "step": 92170 }, { "epoch": 0.7970099696500679, "grad_norm": 13.324048622453013, "learning_rate": 3.940441272126045e-06, "loss": 0.302557373046875, "step": 92175 }, { "epoch": 0.7970532031716111, "grad_norm": 1.3748705998659572, "learning_rate": 3.940247806026218e-06, "loss": 0.14338531494140624, "step": 92180 }, { "epoch": 0.7970964366931544, "grad_norm": 5.112514247645268, "learning_rate": 3.9400543355900514e-06, "loss": 0.20716552734375, "step": 92185 }, { "epoch": 0.7971396702146977, "grad_norm": 1.2349288084941377, "learning_rate": 3.939860860818438e-06, "loss": 0.34558868408203125, "step": 92190 }, { "epoch": 0.7971829037362409, "grad_norm": 25.625072812437786, "learning_rate": 3.93966738171227e-06, "loss": 0.07572021484375, "step": 92195 }, { "epoch": 0.7972261372577842, "grad_norm": 5.902177583681141, "learning_rate": 3.939473898272438e-06, "loss": 0.07974281311035156, "step": 92200 }, { "epoch": 0.7972693707793275, "grad_norm": 9.13450594310885, "learning_rate": 3.939280410499835e-06, "loss": 0.1359100341796875, "step": 92205 }, { "epoch": 0.7973126043008707, "grad_norm": 12.038466131093267, "learning_rate": 3.939086918395355e-06, "loss": 0.08989677429199219, "step": 92210 }, { "epoch": 0.797355837822414, "grad_norm": 4.064042132945264, "learning_rate": 3.938893421959888e-06, "loss": 0.0321075439453125, "step": 92215 }, { "epoch": 0.7973990713439573, "grad_norm": 1.7636973061850418, "learning_rate": 3.938699921194328e-06, "loss": 0.04251279830932617, "step": 92220 }, { "epoch": 0.7974423048655005, "grad_norm": 15.28357289525006, "learning_rate": 3.938506416099567e-06, "loss": 0.4042167663574219, "step": 92225 }, { "epoch": 0.7974855383870437, "grad_norm": 1.737987920966991, "learning_rate": 3.938312906676497e-06, "loss": 0.19705657958984374, "step": 92230 }, { "epoch": 0.7975287719085871, "grad_norm": 9.421652184774175, "learning_rate": 3.938119392926011e-06, "loss": 0.543115234375, "step": 92235 }, { "epoch": 0.7975720054301303, "grad_norm": 59.53229223359922, "learning_rate": 3.937925874849001e-06, "loss": 0.1609893798828125, "step": 92240 }, { "epoch": 0.7976152389516735, "grad_norm": 0.8498541414410632, "learning_rate": 3.937732352446361e-06, "loss": 0.1240509033203125, "step": 92245 }, { "epoch": 0.7976584724732169, "grad_norm": 7.54731782674105, "learning_rate": 3.93753882571898e-06, "loss": 0.09811439514160156, "step": 92250 }, { "epoch": 0.7977017059947601, "grad_norm": 1.9647907853895734, "learning_rate": 3.937345294667755e-06, "loss": 0.41124420166015624, "step": 92255 }, { "epoch": 0.7977449395163033, "grad_norm": 0.7779764536624654, "learning_rate": 3.937151759293575e-06, "loss": 0.074932861328125, "step": 92260 }, { "epoch": 0.7977881730378467, "grad_norm": 2.841955035384271, "learning_rate": 3.936958219597334e-06, "loss": 0.015451812744140625, "step": 92265 }, { "epoch": 0.7978314065593899, "grad_norm": 0.6585726145656724, "learning_rate": 3.9367646755799245e-06, "loss": 0.1155670166015625, "step": 92270 }, { "epoch": 0.7978746400809331, "grad_norm": 39.6316682704821, "learning_rate": 3.936571127242239e-06, "loss": 0.2736297607421875, "step": 92275 }, { "epoch": 0.7979178736024765, "grad_norm": 14.346871857217266, "learning_rate": 3.936377574585171e-06, "loss": 0.12747802734375, "step": 92280 }, { "epoch": 0.7979611071240197, "grad_norm": 4.119737317126879, "learning_rate": 3.936184017609613e-06, "loss": 0.1461883544921875, "step": 92285 }, { "epoch": 0.7980043406455629, "grad_norm": 31.352936409842115, "learning_rate": 3.935990456316455e-06, "loss": 0.068408203125, "step": 92290 }, { "epoch": 0.7980475741671063, "grad_norm": 0.3209628181752577, "learning_rate": 3.935796890706593e-06, "loss": 0.05291423797607422, "step": 92295 }, { "epoch": 0.7980908076886495, "grad_norm": 1.1642971053874591, "learning_rate": 3.935603320780918e-06, "loss": 0.37959136962890627, "step": 92300 }, { "epoch": 0.7981340412101927, "grad_norm": 1.7028876996945617, "learning_rate": 3.935409746540323e-06, "loss": 0.30817489624023436, "step": 92305 }, { "epoch": 0.798177274731736, "grad_norm": 1.439165308437613, "learning_rate": 3.935216167985701e-06, "loss": 0.08478965759277343, "step": 92310 }, { "epoch": 0.7982205082532793, "grad_norm": 8.710289990907825, "learning_rate": 3.935022585117945e-06, "loss": 0.2690277099609375, "step": 92315 }, { "epoch": 0.7982637417748225, "grad_norm": 12.064028148901597, "learning_rate": 3.934828997937947e-06, "loss": 0.15971832275390624, "step": 92320 }, { "epoch": 0.7983069752963657, "grad_norm": 0.05334734068703679, "learning_rate": 3.934635406446601e-06, "loss": 0.07519111633300782, "step": 92325 }, { "epoch": 0.7983502088179091, "grad_norm": 30.79284464606657, "learning_rate": 3.934441810644798e-06, "loss": 0.4175384521484375, "step": 92330 }, { "epoch": 0.7983934423394523, "grad_norm": 3.801779116537227, "learning_rate": 3.934248210533433e-06, "loss": 0.2319915771484375, "step": 92335 }, { "epoch": 0.7984366758609955, "grad_norm": 24.13800469736774, "learning_rate": 3.934054606113397e-06, "loss": 0.307916259765625, "step": 92340 }, { "epoch": 0.7984799093825389, "grad_norm": 4.506257468537123, "learning_rate": 3.933860997385584e-06, "loss": 0.06680755615234375, "step": 92345 }, { "epoch": 0.7985231429040821, "grad_norm": 0.3259549442746635, "learning_rate": 3.933667384350887e-06, "loss": 0.18569107055664064, "step": 92350 }, { "epoch": 0.7985663764256253, "grad_norm": 5.2124605664563814, "learning_rate": 3.933473767010198e-06, "loss": 0.04595947265625, "step": 92355 }, { "epoch": 0.7986096099471687, "grad_norm": 1.6002999591779297, "learning_rate": 3.93328014536441e-06, "loss": 0.11344451904296875, "step": 92360 }, { "epoch": 0.7986528434687119, "grad_norm": 46.01426342036743, "learning_rate": 3.933086519414418e-06, "loss": 0.2126678466796875, "step": 92365 }, { "epoch": 0.7986960769902551, "grad_norm": 25.361574096558282, "learning_rate": 3.9328928891611124e-06, "loss": 0.19990081787109376, "step": 92370 }, { "epoch": 0.7987393105117985, "grad_norm": 3.3250240829963027, "learning_rate": 3.932699254605388e-06, "loss": 0.20287094116210938, "step": 92375 }, { "epoch": 0.7987825440333417, "grad_norm": 11.58288187379728, "learning_rate": 3.932505615748135e-06, "loss": 0.079949951171875, "step": 92380 }, { "epoch": 0.7988257775548849, "grad_norm": 5.8722131317466735, "learning_rate": 3.932311972590251e-06, "loss": 0.2655181884765625, "step": 92385 }, { "epoch": 0.7988690110764283, "grad_norm": 0.7803866800265802, "learning_rate": 3.9321183251326245e-06, "loss": 0.0449066162109375, "step": 92390 }, { "epoch": 0.7989122445979715, "grad_norm": 39.292922849451735, "learning_rate": 3.9319246733761525e-06, "loss": 0.42749481201171874, "step": 92395 }, { "epoch": 0.7989554781195147, "grad_norm": 13.755140509987424, "learning_rate": 3.931731017321725e-06, "loss": 0.13624267578125, "step": 92400 }, { "epoch": 0.798998711641058, "grad_norm": 1.273970299604474, "learning_rate": 3.931537356970236e-06, "loss": 0.023383331298828126, "step": 92405 }, { "epoch": 0.7990419451626013, "grad_norm": 14.637090427867467, "learning_rate": 3.93134369232258e-06, "loss": 0.24021949768066406, "step": 92410 }, { "epoch": 0.7990851786841445, "grad_norm": 20.05656434451133, "learning_rate": 3.93115002337965e-06, "loss": 0.3741485595703125, "step": 92415 }, { "epoch": 0.7991284122056878, "grad_norm": 0.4467921421900951, "learning_rate": 3.9309563501423375e-06, "loss": 0.05616607666015625, "step": 92420 }, { "epoch": 0.7991716457272311, "grad_norm": 7.65103398587203, "learning_rate": 3.930762672611536e-06, "loss": 0.15691490173339845, "step": 92425 }, { "epoch": 0.7992148792487743, "grad_norm": 18.42180739764779, "learning_rate": 3.930568990788139e-06, "loss": 0.501165771484375, "step": 92430 }, { "epoch": 0.7992581127703176, "grad_norm": 29.405156055602273, "learning_rate": 3.930375304673041e-06, "loss": 0.22984352111816406, "step": 92435 }, { "epoch": 0.7993013462918609, "grad_norm": 0.8857785828366646, "learning_rate": 3.9301816142671335e-06, "loss": 0.09855842590332031, "step": 92440 }, { "epoch": 0.7993445798134041, "grad_norm": 0.17305186977375742, "learning_rate": 3.9299879195713106e-06, "loss": 0.09964351654052735, "step": 92445 }, { "epoch": 0.7993878133349474, "grad_norm": 4.595243731022256, "learning_rate": 3.929794220586467e-06, "loss": 0.1456390380859375, "step": 92450 }, { "epoch": 0.7994310468564907, "grad_norm": 5.942740649580259, "learning_rate": 3.929600517313493e-06, "loss": 0.178472900390625, "step": 92455 }, { "epoch": 0.7994742803780339, "grad_norm": 15.430187798581143, "learning_rate": 3.929406809753285e-06, "loss": 0.15454673767089844, "step": 92460 }, { "epoch": 0.7995175138995771, "grad_norm": 0.22783441132572155, "learning_rate": 3.929213097906734e-06, "loss": 0.14294204711914063, "step": 92465 }, { "epoch": 0.7995607474211205, "grad_norm": 6.0271175497669445, "learning_rate": 3.929019381774734e-06, "loss": 0.08320159912109375, "step": 92470 }, { "epoch": 0.7996039809426637, "grad_norm": 5.437678914011208, "learning_rate": 3.928825661358179e-06, "loss": 0.15954132080078126, "step": 92475 }, { "epoch": 0.799647214464207, "grad_norm": 0.9253612641925588, "learning_rate": 3.928631936657963e-06, "loss": 0.47479896545410155, "step": 92480 }, { "epoch": 0.7996904479857503, "grad_norm": 10.426944056722705, "learning_rate": 3.928438207674977e-06, "loss": 0.1568511962890625, "step": 92485 }, { "epoch": 0.7997336815072935, "grad_norm": 0.909210644618662, "learning_rate": 3.928244474410118e-06, "loss": 0.264678955078125, "step": 92490 }, { "epoch": 0.7997769150288367, "grad_norm": 3.4302851152836875, "learning_rate": 3.928050736864276e-06, "loss": 0.210693359375, "step": 92495 }, { "epoch": 0.79982014855038, "grad_norm": 60.50034220352452, "learning_rate": 3.927856995038347e-06, "loss": 0.26302375793457033, "step": 92500 }, { "epoch": 0.7998633820719233, "grad_norm": 3.7777724742941565, "learning_rate": 3.927663248933222e-06, "loss": 0.034039306640625, "step": 92505 }, { "epoch": 0.7999066155934665, "grad_norm": 23.33922724250579, "learning_rate": 3.927469498549797e-06, "loss": 0.14366607666015624, "step": 92510 }, { "epoch": 0.7999498491150098, "grad_norm": 0.07210953608005104, "learning_rate": 3.927275743888966e-06, "loss": 0.29071731567382814, "step": 92515 }, { "epoch": 0.7999930826365531, "grad_norm": 0.5822423155476799, "learning_rate": 3.9270819849516186e-06, "loss": 0.02685127258300781, "step": 92520 }, { "epoch": 0.8000363161580963, "grad_norm": 1.6916218608749354, "learning_rate": 3.9268882217386525e-06, "loss": 0.16524658203125, "step": 92525 }, { "epoch": 0.8000795496796396, "grad_norm": 22.142822138799215, "learning_rate": 3.926694454250959e-06, "loss": 0.4348182678222656, "step": 92530 }, { "epoch": 0.8001227832011829, "grad_norm": 36.600823601669276, "learning_rate": 3.926500682489433e-06, "loss": 0.6305038452148437, "step": 92535 }, { "epoch": 0.8001660167227261, "grad_norm": 3.078114642542992, "learning_rate": 3.926306906454969e-06, "loss": 0.079742431640625, "step": 92540 }, { "epoch": 0.8002092502442694, "grad_norm": 7.268501895306266, "learning_rate": 3.926113126148458e-06, "loss": 0.0688934326171875, "step": 92545 }, { "epoch": 0.8002524837658127, "grad_norm": 0.6426060544512229, "learning_rate": 3.925919341570794e-06, "loss": 0.040313720703125, "step": 92550 }, { "epoch": 0.8002957172873559, "grad_norm": 5.409806456682416, "learning_rate": 3.925725552722874e-06, "loss": 0.1094573974609375, "step": 92555 }, { "epoch": 0.8003389508088992, "grad_norm": 7.351447214148301, "learning_rate": 3.925531759605589e-06, "loss": 0.1980926513671875, "step": 92560 }, { "epoch": 0.8003821843304425, "grad_norm": 25.42163328835812, "learning_rate": 3.925337962219833e-06, "loss": 0.3294792175292969, "step": 92565 }, { "epoch": 0.8004254178519857, "grad_norm": 14.99158472966388, "learning_rate": 3.9251441605665e-06, "loss": 0.09705772399902343, "step": 92570 }, { "epoch": 0.800468651373529, "grad_norm": 1.372209049818296, "learning_rate": 3.924950354646483e-06, "loss": 0.0439849853515625, "step": 92575 }, { "epoch": 0.8005118848950722, "grad_norm": 0.8953540462281582, "learning_rate": 3.9247565444606775e-06, "loss": 0.11642074584960938, "step": 92580 }, { "epoch": 0.8005551184166155, "grad_norm": 6.757350826701074, "learning_rate": 3.924562730009977e-06, "loss": 0.18437156677246094, "step": 92585 }, { "epoch": 0.8005983519381588, "grad_norm": 33.70799256992889, "learning_rate": 3.924368911295275e-06, "loss": 0.52410888671875, "step": 92590 }, { "epoch": 0.800641585459702, "grad_norm": 1.2823332416116464, "learning_rate": 3.924175088317465e-06, "loss": 0.06194610595703125, "step": 92595 }, { "epoch": 0.8006848189812453, "grad_norm": 0.5635062924624193, "learning_rate": 3.9239812610774406e-06, "loss": 0.06687240600585938, "step": 92600 }, { "epoch": 0.8007280525027886, "grad_norm": 1.80541559381667, "learning_rate": 3.923787429576096e-06, "loss": 0.14883956909179688, "step": 92605 }, { "epoch": 0.8007712860243318, "grad_norm": 16.315970296215383, "learning_rate": 3.923593593814327e-06, "loss": 0.4135009765625, "step": 92610 }, { "epoch": 0.8008145195458751, "grad_norm": 0.6787537504924882, "learning_rate": 3.923399753793025e-06, "loss": 0.06412429809570312, "step": 92615 }, { "epoch": 0.8008577530674184, "grad_norm": 13.689856851874847, "learning_rate": 3.923205909513084e-06, "loss": 0.1080108642578125, "step": 92620 }, { "epoch": 0.8009009865889616, "grad_norm": 4.084245462450997, "learning_rate": 3.9230120609754005e-06, "loss": 0.563348388671875, "step": 92625 }, { "epoch": 0.8009442201105049, "grad_norm": 1.9960631017762054, "learning_rate": 3.922818208180866e-06, "loss": 0.12706737518310546, "step": 92630 }, { "epoch": 0.8009874536320482, "grad_norm": 15.426846102434077, "learning_rate": 3.922624351130376e-06, "loss": 0.4164558410644531, "step": 92635 }, { "epoch": 0.8010306871535914, "grad_norm": 0.5127395067227509, "learning_rate": 3.922430489824824e-06, "loss": 0.2246551513671875, "step": 92640 }, { "epoch": 0.8010739206751347, "grad_norm": 4.522033395134737, "learning_rate": 3.922236624265103e-06, "loss": 0.35846519470214844, "step": 92645 }, { "epoch": 0.801117154196678, "grad_norm": 1.321412291506661, "learning_rate": 3.9220427544521095e-06, "loss": 0.18853759765625, "step": 92650 }, { "epoch": 0.8011603877182212, "grad_norm": 5.791173170118194, "learning_rate": 3.921848880386736e-06, "loss": 0.221112060546875, "step": 92655 }, { "epoch": 0.8012036212397645, "grad_norm": 53.904198461436685, "learning_rate": 3.921655002069876e-06, "loss": 0.23097152709960939, "step": 92660 }, { "epoch": 0.8012468547613077, "grad_norm": 3.2602550902590273, "learning_rate": 3.921461119502426e-06, "loss": 0.18607120513916015, "step": 92665 }, { "epoch": 0.801290088282851, "grad_norm": 17.869585479487768, "learning_rate": 3.921267232685277e-06, "loss": 0.1027252197265625, "step": 92670 }, { "epoch": 0.8013333218043942, "grad_norm": 8.34773747301781, "learning_rate": 3.921073341619327e-06, "loss": 0.2110107421875, "step": 92675 }, { "epoch": 0.8013765553259375, "grad_norm": 4.487732916900986, "learning_rate": 3.9208794463054665e-06, "loss": 0.21533699035644532, "step": 92680 }, { "epoch": 0.8014197888474808, "grad_norm": 1.2614342346238956, "learning_rate": 3.920685546744593e-06, "loss": 0.20234947204589843, "step": 92685 }, { "epoch": 0.801463022369024, "grad_norm": 5.788851131836947, "learning_rate": 3.920491642937597e-06, "loss": 0.13173980712890626, "step": 92690 }, { "epoch": 0.8015062558905673, "grad_norm": 0.7226855643848199, "learning_rate": 3.9202977348853764e-06, "loss": 0.07695770263671875, "step": 92695 }, { "epoch": 0.8015494894121106, "grad_norm": 2.4628015612757816, "learning_rate": 3.920103822588824e-06, "loss": 0.0336212158203125, "step": 92700 }, { "epoch": 0.8015927229336538, "grad_norm": 35.125354116211064, "learning_rate": 3.919909906048834e-06, "loss": 0.445184326171875, "step": 92705 }, { "epoch": 0.8016359564551971, "grad_norm": 1.6227807922175927, "learning_rate": 3.919715985266299e-06, "loss": 0.1874847412109375, "step": 92710 }, { "epoch": 0.8016791899767404, "grad_norm": 1.9577214048566864, "learning_rate": 3.919522060242117e-06, "loss": 0.14758739471435547, "step": 92715 }, { "epoch": 0.8017224234982836, "grad_norm": 6.429515908187052, "learning_rate": 3.9193281309771805e-06, "loss": 0.10803375244140626, "step": 92720 }, { "epoch": 0.8017656570198269, "grad_norm": 0.47656764611757696, "learning_rate": 3.919134197472382e-06, "loss": 0.4406341552734375, "step": 92725 }, { "epoch": 0.8018088905413702, "grad_norm": 0.3647651882349916, "learning_rate": 3.918940259728621e-06, "loss": 0.08083877563476563, "step": 92730 }, { "epoch": 0.8018521240629134, "grad_norm": 3.702912713693849, "learning_rate": 3.918746317746787e-06, "loss": 0.11965103149414062, "step": 92735 }, { "epoch": 0.8018953575844567, "grad_norm": 7.213242240606571, "learning_rate": 3.918552371527775e-06, "loss": 0.24363765716552735, "step": 92740 }, { "epoch": 0.801938591106, "grad_norm": 4.895785217804324, "learning_rate": 3.918358421072482e-06, "loss": 0.0635406494140625, "step": 92745 }, { "epoch": 0.8019818246275432, "grad_norm": 22.167265027651695, "learning_rate": 3.9181644663818015e-06, "loss": 0.23180389404296875, "step": 92750 }, { "epoch": 0.8020250581490864, "grad_norm": 5.683842763535096, "learning_rate": 3.9179705074566264e-06, "loss": 0.3337574005126953, "step": 92755 }, { "epoch": 0.8020682916706298, "grad_norm": 0.8738657003139461, "learning_rate": 3.917776544297853e-06, "loss": 0.030842399597167967, "step": 92760 }, { "epoch": 0.802111525192173, "grad_norm": 3.88843653926837, "learning_rate": 3.917582576906375e-06, "loss": 0.16099853515625, "step": 92765 }, { "epoch": 0.8021547587137162, "grad_norm": 6.946679860468267, "learning_rate": 3.917388605283088e-06, "loss": 0.0734405517578125, "step": 92770 }, { "epoch": 0.8021979922352596, "grad_norm": 34.74940288524373, "learning_rate": 3.917194629428884e-06, "loss": 0.4301136016845703, "step": 92775 }, { "epoch": 0.8022412257568028, "grad_norm": 0.1315037054201113, "learning_rate": 3.917000649344662e-06, "loss": 0.18590593338012695, "step": 92780 }, { "epoch": 0.802284459278346, "grad_norm": 1.4163504165973009, "learning_rate": 3.9168066650313125e-06, "loss": 0.3841072082519531, "step": 92785 }, { "epoch": 0.8023276927998894, "grad_norm": 12.793210915102724, "learning_rate": 3.916612676489731e-06, "loss": 0.32764434814453125, "step": 92790 }, { "epoch": 0.8023709263214326, "grad_norm": 28.79345202113418, "learning_rate": 3.916418683720815e-06, "loss": 0.2957763671875, "step": 92795 }, { "epoch": 0.8024141598429758, "grad_norm": 2.680157793209006, "learning_rate": 3.916224686725456e-06, "loss": 0.07757110595703125, "step": 92800 }, { "epoch": 0.8024573933645192, "grad_norm": 2.7791853119848855, "learning_rate": 3.916030685504548e-06, "loss": 0.19139328002929687, "step": 92805 }, { "epoch": 0.8025006268860624, "grad_norm": 13.134814735097018, "learning_rate": 3.91583668005899e-06, "loss": 0.0387786865234375, "step": 92810 }, { "epoch": 0.8025438604076056, "grad_norm": 7.093799941396712, "learning_rate": 3.915642670389673e-06, "loss": 0.1705230712890625, "step": 92815 }, { "epoch": 0.802587093929149, "grad_norm": 8.057979900457296, "learning_rate": 3.915448656497493e-06, "loss": 0.041335105895996094, "step": 92820 }, { "epoch": 0.8026303274506922, "grad_norm": 29.9459623875706, "learning_rate": 3.915254638383345e-06, "loss": 0.0924224853515625, "step": 92825 }, { "epoch": 0.8026735609722354, "grad_norm": 6.600344770095196, "learning_rate": 3.915060616048123e-06, "loss": 0.14716072082519532, "step": 92830 }, { "epoch": 0.8027167944937788, "grad_norm": 2.4381892409202526, "learning_rate": 3.914866589492722e-06, "loss": 0.023974227905273437, "step": 92835 }, { "epoch": 0.802760028015322, "grad_norm": 10.36787087844707, "learning_rate": 3.914672558718037e-06, "loss": 0.11572265625, "step": 92840 }, { "epoch": 0.8028032615368652, "grad_norm": 18.568983762081935, "learning_rate": 3.914478523724964e-06, "loss": 0.2902618408203125, "step": 92845 }, { "epoch": 0.8028464950584084, "grad_norm": 73.82242988507319, "learning_rate": 3.914284484514397e-06, "loss": 0.158636474609375, "step": 92850 }, { "epoch": 0.8028897285799518, "grad_norm": 15.48611989951293, "learning_rate": 3.91409044108723e-06, "loss": 0.0916168212890625, "step": 92855 }, { "epoch": 0.802932962101495, "grad_norm": 10.130314768553765, "learning_rate": 3.9138963934443576e-06, "loss": 0.0784820556640625, "step": 92860 }, { "epoch": 0.8029761956230382, "grad_norm": 4.348495180295246, "learning_rate": 3.913702341586678e-06, "loss": 0.084429931640625, "step": 92865 }, { "epoch": 0.8030194291445816, "grad_norm": 0.4819830231097809, "learning_rate": 3.913508285515083e-06, "loss": 0.2215362548828125, "step": 92870 }, { "epoch": 0.8030626626661248, "grad_norm": 5.346595201857679, "learning_rate": 3.913314225230468e-06, "loss": 0.06258926391601563, "step": 92875 }, { "epoch": 0.803105896187668, "grad_norm": 21.0906583375151, "learning_rate": 3.913120160733729e-06, "loss": 0.0712249755859375, "step": 92880 }, { "epoch": 0.8031491297092114, "grad_norm": 6.851277178817505, "learning_rate": 3.9129260920257604e-06, "loss": 0.24749755859375, "step": 92885 }, { "epoch": 0.8031923632307546, "grad_norm": 15.47923235503727, "learning_rate": 3.912732019107457e-06, "loss": 0.3617225646972656, "step": 92890 }, { "epoch": 0.8032355967522978, "grad_norm": 7.033865309605283, "learning_rate": 3.912537941979715e-06, "loss": 0.2458881378173828, "step": 92895 }, { "epoch": 0.8032788302738412, "grad_norm": 5.9751787565918875, "learning_rate": 3.912343860643428e-06, "loss": 0.115472412109375, "step": 92900 }, { "epoch": 0.8033220637953844, "grad_norm": 5.928507434830138, "learning_rate": 3.9121497750994925e-06, "loss": 0.1904205322265625, "step": 92905 }, { "epoch": 0.8033652973169276, "grad_norm": 4.160149660705991, "learning_rate": 3.911955685348802e-06, "loss": 0.06718330383300782, "step": 92910 }, { "epoch": 0.803408530838471, "grad_norm": 7.307107645367684, "learning_rate": 3.911761591392253e-06, "loss": 0.1364654541015625, "step": 92915 }, { "epoch": 0.8034517643600142, "grad_norm": 5.6568974950677315, "learning_rate": 3.911567493230739e-06, "loss": 0.121600341796875, "step": 92920 }, { "epoch": 0.8034949978815574, "grad_norm": 0.7841384345480191, "learning_rate": 3.9113733908651575e-06, "loss": 0.0443389892578125, "step": 92925 }, { "epoch": 0.8035382314031007, "grad_norm": 0.4849895747383709, "learning_rate": 3.911179284296403e-06, "loss": 0.16480789184570313, "step": 92930 }, { "epoch": 0.803581464924644, "grad_norm": 12.263102971402436, "learning_rate": 3.910985173525369e-06, "loss": 0.4254547119140625, "step": 92935 }, { "epoch": 0.8036246984461872, "grad_norm": 1.834183742382833, "learning_rate": 3.910791058552952e-06, "loss": 0.1211273193359375, "step": 92940 }, { "epoch": 0.8036679319677305, "grad_norm": 6.659265811320751, "learning_rate": 3.910596939380046e-06, "loss": 0.1618194580078125, "step": 92945 }, { "epoch": 0.8037111654892738, "grad_norm": 5.1845743993271, "learning_rate": 3.910402816007548e-06, "loss": 0.10523529052734375, "step": 92950 }, { "epoch": 0.803754399010817, "grad_norm": 1.9465755781162704, "learning_rate": 3.9102086884363546e-06, "loss": 0.08956985473632813, "step": 92955 }, { "epoch": 0.8037976325323603, "grad_norm": 11.50332816862376, "learning_rate": 3.9100145566673575e-06, "loss": 0.17951202392578125, "step": 92960 }, { "epoch": 0.8038408660539036, "grad_norm": 2.7685899403237397, "learning_rate": 3.909820420701454e-06, "loss": 0.1736133575439453, "step": 92965 }, { "epoch": 0.8038840995754468, "grad_norm": 32.46819877396866, "learning_rate": 3.909626280539538e-06, "loss": 0.1958221435546875, "step": 92970 }, { "epoch": 0.80392733309699, "grad_norm": 72.58669241609543, "learning_rate": 3.9094321361825075e-06, "loss": 0.40289745330810545, "step": 92975 }, { "epoch": 0.8039705666185334, "grad_norm": 2.3594755507153655, "learning_rate": 3.909237987631256e-06, "loss": 0.086614990234375, "step": 92980 }, { "epoch": 0.8040138001400766, "grad_norm": 4.9462867645935304, "learning_rate": 3.909043834886679e-06, "loss": 0.11899185180664062, "step": 92985 }, { "epoch": 0.8040570336616198, "grad_norm": 3.4840662683701176, "learning_rate": 3.9088496779496724e-06, "loss": 0.0849538803100586, "step": 92990 }, { "epoch": 0.8041002671831632, "grad_norm": 0.7976939488115742, "learning_rate": 3.908655516821131e-06, "loss": 0.20948944091796876, "step": 92995 }, { "epoch": 0.8041435007047064, "grad_norm": 1.3270547374338206, "learning_rate": 3.908461351501951e-06, "loss": 0.12018470764160157, "step": 93000 }, { "epoch": 0.8041867342262496, "grad_norm": 8.188181044851857, "learning_rate": 3.908267181993027e-06, "loss": 0.07029190063476562, "step": 93005 }, { "epoch": 0.804229967747793, "grad_norm": 1.4222338893668303, "learning_rate": 3.9080730082952564e-06, "loss": 0.08461761474609375, "step": 93010 }, { "epoch": 0.8042732012693362, "grad_norm": 0.5609570816241793, "learning_rate": 3.907878830409532e-06, "loss": 0.17384490966796876, "step": 93015 }, { "epoch": 0.8043164347908794, "grad_norm": 9.172318802734543, "learning_rate": 3.907684648336752e-06, "loss": 0.0900665283203125, "step": 93020 }, { "epoch": 0.8043596683124227, "grad_norm": 15.025119897125803, "learning_rate": 3.907490462077809e-06, "loss": 0.150115966796875, "step": 93025 }, { "epoch": 0.804402901833966, "grad_norm": 38.590476356526224, "learning_rate": 3.9072962716336e-06, "loss": 0.11543350219726563, "step": 93030 }, { "epoch": 0.8044461353555092, "grad_norm": 1.8107973047197756, "learning_rate": 3.907102077005022e-06, "loss": 0.165899658203125, "step": 93035 }, { "epoch": 0.8044893688770525, "grad_norm": 17.663889775891285, "learning_rate": 3.906907878192969e-06, "loss": 0.08592376708984376, "step": 93040 }, { "epoch": 0.8045326023985958, "grad_norm": 0.6892254635994675, "learning_rate": 3.906713675198337e-06, "loss": 0.12420997619628907, "step": 93045 }, { "epoch": 0.804575835920139, "grad_norm": 12.101268795723719, "learning_rate": 3.906519468022022e-06, "loss": 0.45218048095703123, "step": 93050 }, { "epoch": 0.8046190694416823, "grad_norm": 16.426997915485305, "learning_rate": 3.906325256664919e-06, "loss": 0.219390869140625, "step": 93055 }, { "epoch": 0.8046623029632256, "grad_norm": 49.985006318939234, "learning_rate": 3.906131041127923e-06, "loss": 0.17869110107421876, "step": 93060 }, { "epoch": 0.8047055364847688, "grad_norm": 11.497065016488836, "learning_rate": 3.905936821411931e-06, "loss": 0.15754852294921876, "step": 93065 }, { "epoch": 0.8047487700063121, "grad_norm": 14.313935183101233, "learning_rate": 3.90574259751784e-06, "loss": 0.11242561340332032, "step": 93070 }, { "epoch": 0.8047920035278554, "grad_norm": 24.59239338528484, "learning_rate": 3.905548369446543e-06, "loss": 0.178216552734375, "step": 93075 }, { "epoch": 0.8048352370493986, "grad_norm": 20.189430848548156, "learning_rate": 3.9053541371989374e-06, "loss": 0.06888885498046875, "step": 93080 }, { "epoch": 0.8048784705709419, "grad_norm": 15.854818639137621, "learning_rate": 3.905159900775918e-06, "loss": 0.17513504028320312, "step": 93085 }, { "epoch": 0.8049217040924852, "grad_norm": 1.8664223790526573, "learning_rate": 3.904965660178381e-06, "loss": 0.09412841796875, "step": 93090 }, { "epoch": 0.8049649376140284, "grad_norm": 0.46994916573473977, "learning_rate": 3.9047714154072226e-06, "loss": 0.14540634155273438, "step": 93095 }, { "epoch": 0.8050081711355717, "grad_norm": 18.201502333316448, "learning_rate": 3.904577166463339e-06, "loss": 0.11110916137695312, "step": 93100 }, { "epoch": 0.8050514046571149, "grad_norm": 18.19138875819581, "learning_rate": 3.904382913347625e-06, "loss": 0.13332595825195312, "step": 93105 }, { "epoch": 0.8050946381786582, "grad_norm": 3.657558558770764, "learning_rate": 3.904188656060977e-06, "loss": 0.06890907287597656, "step": 93110 }, { "epoch": 0.8051378717002015, "grad_norm": 34.09556876415769, "learning_rate": 3.90399439460429e-06, "loss": 0.12557945251464844, "step": 93115 }, { "epoch": 0.8051811052217447, "grad_norm": 5.245789843544906, "learning_rate": 3.903800128978461e-06, "loss": 0.1195526123046875, "step": 93120 }, { "epoch": 0.805224338743288, "grad_norm": 44.80565123500588, "learning_rate": 3.903605859184387e-06, "loss": 0.14048004150390625, "step": 93125 }, { "epoch": 0.8052675722648313, "grad_norm": 15.352622717724978, "learning_rate": 3.903411585222962e-06, "loss": 0.07525787353515626, "step": 93130 }, { "epoch": 0.8053108057863745, "grad_norm": 6.003721627816234, "learning_rate": 3.903217307095082e-06, "loss": 0.11541824340820313, "step": 93135 }, { "epoch": 0.8053540393079178, "grad_norm": 4.216642446839631, "learning_rate": 3.903023024801644e-06, "loss": 0.07777481079101563, "step": 93140 }, { "epoch": 0.805397272829461, "grad_norm": 4.4834341757931035, "learning_rate": 3.902828738343543e-06, "loss": 0.11447906494140625, "step": 93145 }, { "epoch": 0.8054405063510043, "grad_norm": 8.614819760387377, "learning_rate": 3.902634447721676e-06, "loss": 0.22160720825195312, "step": 93150 }, { "epoch": 0.8054837398725476, "grad_norm": 33.957181942118865, "learning_rate": 3.902440152936939e-06, "loss": 0.1746673583984375, "step": 93155 }, { "epoch": 0.8055269733940909, "grad_norm": 3.493435037711347, "learning_rate": 3.902245853990228e-06, "loss": 0.11608047485351562, "step": 93160 }, { "epoch": 0.8055702069156341, "grad_norm": 0.37693850428006664, "learning_rate": 3.902051550882438e-06, "loss": 0.037200927734375, "step": 93165 }, { "epoch": 0.8056134404371774, "grad_norm": 0.3563771521949583, "learning_rate": 3.9018572436144655e-06, "loss": 0.0898895263671875, "step": 93170 }, { "epoch": 0.8056566739587206, "grad_norm": 27.96937082056711, "learning_rate": 3.901662932187209e-06, "loss": 0.2642059326171875, "step": 93175 }, { "epoch": 0.8056999074802639, "grad_norm": 0.0424821665256241, "learning_rate": 3.9014686166015614e-06, "loss": 0.07700481414794921, "step": 93180 }, { "epoch": 0.8057431410018071, "grad_norm": 11.666090780322907, "learning_rate": 3.901274296858421e-06, "loss": 0.204150390625, "step": 93185 }, { "epoch": 0.8057863745233504, "grad_norm": 5.012100778103978, "learning_rate": 3.9010799729586825e-06, "loss": 0.037646484375, "step": 93190 }, { "epoch": 0.8058296080448937, "grad_norm": 17.278451496925655, "learning_rate": 3.900885644903242e-06, "loss": 0.251873779296875, "step": 93195 }, { "epoch": 0.8058728415664369, "grad_norm": 0.973228268423112, "learning_rate": 3.900691312692999e-06, "loss": 0.30599250793457033, "step": 93200 }, { "epoch": 0.8059160750879802, "grad_norm": 6.487051384838878, "learning_rate": 3.900496976328844e-06, "loss": 0.17117462158203126, "step": 93205 }, { "epoch": 0.8059593086095235, "grad_norm": 16.62304706560556, "learning_rate": 3.900302635811679e-06, "loss": 0.0844329833984375, "step": 93210 }, { "epoch": 0.8060025421310667, "grad_norm": 20.700645116078235, "learning_rate": 3.900108291142397e-06, "loss": 0.4677764892578125, "step": 93215 }, { "epoch": 0.80604577565261, "grad_norm": 68.91262538488043, "learning_rate": 3.899913942321894e-06, "loss": 0.22470703125, "step": 93220 }, { "epoch": 0.8060890091741533, "grad_norm": 4.896651517944027, "learning_rate": 3.89971958935107e-06, "loss": 0.09070968627929688, "step": 93225 }, { "epoch": 0.8061322426956965, "grad_norm": 0.9715588712840201, "learning_rate": 3.899525232230817e-06, "loss": 0.06591644287109374, "step": 93230 }, { "epoch": 0.8061754762172398, "grad_norm": 3.0097794046780213, "learning_rate": 3.899330870962033e-06, "loss": 0.0855133056640625, "step": 93235 }, { "epoch": 0.8062187097387831, "grad_norm": 2.0759381219710997, "learning_rate": 3.899136505545615e-06, "loss": 0.1917083740234375, "step": 93240 }, { "epoch": 0.8062619432603263, "grad_norm": 31.404515183346692, "learning_rate": 3.898942135982459e-06, "loss": 0.15052108764648436, "step": 93245 }, { "epoch": 0.8063051767818696, "grad_norm": 0.2829210708920107, "learning_rate": 3.89874776227346e-06, "loss": 0.14804534912109374, "step": 93250 }, { "epoch": 0.8063484103034129, "grad_norm": 2.0048835176753674, "learning_rate": 3.898553384419517e-06, "loss": 0.03131256103515625, "step": 93255 }, { "epoch": 0.8063916438249561, "grad_norm": 0.26016927968616166, "learning_rate": 3.898359002421526e-06, "loss": 0.057303237915039065, "step": 93260 }, { "epoch": 0.8064348773464994, "grad_norm": 1.2711658144262832, "learning_rate": 3.89816461628038e-06, "loss": 0.07011260986328124, "step": 93265 }, { "epoch": 0.8064781108680427, "grad_norm": 8.212551889656334, "learning_rate": 3.89797022599698e-06, "loss": 0.18810653686523438, "step": 93270 }, { "epoch": 0.8065213443895859, "grad_norm": 6.643960064958697, "learning_rate": 3.897775831572221e-06, "loss": 0.062786865234375, "step": 93275 }, { "epoch": 0.8065645779111291, "grad_norm": 48.171001326407975, "learning_rate": 3.897581433006999e-06, "loss": 0.16525192260742189, "step": 93280 }, { "epoch": 0.8066078114326725, "grad_norm": 0.34135919010669435, "learning_rate": 3.89738703030221e-06, "loss": 0.14232330322265624, "step": 93285 }, { "epoch": 0.8066510449542157, "grad_norm": 2.2136577180326107, "learning_rate": 3.897192623458752e-06, "loss": 0.0343841552734375, "step": 93290 }, { "epoch": 0.8066942784757589, "grad_norm": 2.690720041694937, "learning_rate": 3.896998212477521e-06, "loss": 0.088702392578125, "step": 93295 }, { "epoch": 0.8067375119973023, "grad_norm": 4.817302869282315, "learning_rate": 3.896803797359413e-06, "loss": 0.04210357666015625, "step": 93300 }, { "epoch": 0.8067807455188455, "grad_norm": 2.478848391662359, "learning_rate": 3.896609378105325e-06, "loss": 0.21518783569335936, "step": 93305 }, { "epoch": 0.8068239790403887, "grad_norm": 5.05308709152808, "learning_rate": 3.896414954716154e-06, "loss": 0.2027923583984375, "step": 93310 }, { "epoch": 0.8068672125619321, "grad_norm": 30.592088716573414, "learning_rate": 3.896220527192796e-06, "loss": 0.22081375122070312, "step": 93315 }, { "epoch": 0.8069104460834753, "grad_norm": 16.60763852998459, "learning_rate": 3.896026095536149e-06, "loss": 0.2583015441894531, "step": 93320 }, { "epoch": 0.8069536796050185, "grad_norm": 23.083779354004953, "learning_rate": 3.895831659747108e-06, "loss": 0.3263336181640625, "step": 93325 }, { "epoch": 0.8069969131265619, "grad_norm": 15.478062010200604, "learning_rate": 3.895637219826571e-06, "loss": 0.1894378662109375, "step": 93330 }, { "epoch": 0.8070401466481051, "grad_norm": 31.210802382605138, "learning_rate": 3.895442775775435e-06, "loss": 0.4121490478515625, "step": 93335 }, { "epoch": 0.8070833801696483, "grad_norm": 47.47092550670514, "learning_rate": 3.895248327594594e-06, "loss": 0.1677001953125, "step": 93340 }, { "epoch": 0.8071266136911917, "grad_norm": 11.574487002670413, "learning_rate": 3.895053875284948e-06, "loss": 0.281011962890625, "step": 93345 }, { "epoch": 0.8071698472127349, "grad_norm": 0.7320712499364199, "learning_rate": 3.8948594188473935e-06, "loss": 0.10705909729003907, "step": 93350 }, { "epoch": 0.8072130807342781, "grad_norm": 14.410205571824557, "learning_rate": 3.8946649582828255e-06, "loss": 0.2079345703125, "step": 93355 }, { "epoch": 0.8072563142558213, "grad_norm": 28.73742991253295, "learning_rate": 3.894470493592142e-06, "loss": 0.2060821533203125, "step": 93360 }, { "epoch": 0.8072995477773647, "grad_norm": 21.456018418496182, "learning_rate": 3.894276024776238e-06, "loss": 0.36858596801757815, "step": 93365 }, { "epoch": 0.8073427812989079, "grad_norm": 1.8110821391270333, "learning_rate": 3.894081551836014e-06, "loss": 0.15630950927734374, "step": 93370 }, { "epoch": 0.8073860148204511, "grad_norm": 12.560553792022706, "learning_rate": 3.893887074772366e-06, "loss": 0.05021820068359375, "step": 93375 }, { "epoch": 0.8074292483419945, "grad_norm": 15.905913924484667, "learning_rate": 3.893692593586188e-06, "loss": 0.08767623901367187, "step": 93380 }, { "epoch": 0.8074724818635377, "grad_norm": 11.842681660085725, "learning_rate": 3.893498108278379e-06, "loss": 0.3031951904296875, "step": 93385 }, { "epoch": 0.8075157153850809, "grad_norm": 2.566694287750582, "learning_rate": 3.893303618849835e-06, "loss": 0.0243133544921875, "step": 93390 }, { "epoch": 0.8075589489066243, "grad_norm": 1.2410912523180408, "learning_rate": 3.893109125301453e-06, "loss": 0.5829540252685547, "step": 93395 }, { "epoch": 0.8076021824281675, "grad_norm": 0.529256969542748, "learning_rate": 3.892914627634133e-06, "loss": 0.02657318115234375, "step": 93400 }, { "epoch": 0.8076454159497107, "grad_norm": 2.303757532122008, "learning_rate": 3.892720125848769e-06, "loss": 0.0281219482421875, "step": 93405 }, { "epoch": 0.8076886494712541, "grad_norm": 8.577748315724474, "learning_rate": 3.892525619946257e-06, "loss": 0.04290618896484375, "step": 93410 }, { "epoch": 0.8077318829927973, "grad_norm": 12.024407024403429, "learning_rate": 3.892331109927497e-06, "loss": 0.132427978515625, "step": 93415 }, { "epoch": 0.8077751165143405, "grad_norm": 12.134906839504316, "learning_rate": 3.892136595793385e-06, "loss": 0.1050872802734375, "step": 93420 }, { "epoch": 0.8078183500358839, "grad_norm": 24.028162244430664, "learning_rate": 3.891942077544817e-06, "loss": 0.242425537109375, "step": 93425 }, { "epoch": 0.8078615835574271, "grad_norm": 9.456108633559916, "learning_rate": 3.891747555182692e-06, "loss": 0.06472091674804688, "step": 93430 }, { "epoch": 0.8079048170789703, "grad_norm": 7.500955108441148, "learning_rate": 3.891553028707906e-06, "loss": 0.180828857421875, "step": 93435 }, { "epoch": 0.8079480506005137, "grad_norm": 0.38433561553908363, "learning_rate": 3.891358498121355e-06, "loss": 0.055941009521484376, "step": 93440 }, { "epoch": 0.8079912841220569, "grad_norm": 5.721727621937552, "learning_rate": 3.891163963423938e-06, "loss": 0.05187530517578125, "step": 93445 }, { "epoch": 0.8080345176436001, "grad_norm": 0.11782998536125992, "learning_rate": 3.890969424616552e-06, "loss": 0.0331298828125, "step": 93450 }, { "epoch": 0.8080777511651434, "grad_norm": 16.961800312322428, "learning_rate": 3.890774881700093e-06, "loss": 0.23558006286621094, "step": 93455 }, { "epoch": 0.8081209846866867, "grad_norm": 2.0245982943309517, "learning_rate": 3.890580334675459e-06, "loss": 0.02208099365234375, "step": 93460 }, { "epoch": 0.8081642182082299, "grad_norm": 4.577207031260676, "learning_rate": 3.890385783543548e-06, "loss": 0.2767822265625, "step": 93465 }, { "epoch": 0.8082074517297732, "grad_norm": 3.7233705636310805, "learning_rate": 3.890191228305256e-06, "loss": 0.08669319152832031, "step": 93470 }, { "epoch": 0.8082506852513165, "grad_norm": 8.80448362658953, "learning_rate": 3.88999666896148e-06, "loss": 0.057137680053710935, "step": 93475 }, { "epoch": 0.8082939187728597, "grad_norm": 2.2160838321531364, "learning_rate": 3.889802105513119e-06, "loss": 0.2926490783691406, "step": 93480 }, { "epoch": 0.808337152294403, "grad_norm": 11.683577192156843, "learning_rate": 3.8896075379610695e-06, "loss": 0.721697998046875, "step": 93485 }, { "epoch": 0.8083803858159463, "grad_norm": 1.161938078486382, "learning_rate": 3.889412966306227e-06, "loss": 0.4952735900878906, "step": 93490 }, { "epoch": 0.8084236193374895, "grad_norm": 8.076876524538699, "learning_rate": 3.889218390549492e-06, "loss": 0.08031768798828125, "step": 93495 }, { "epoch": 0.8084668528590327, "grad_norm": 7.0708898960664195, "learning_rate": 3.88902381069176e-06, "loss": 0.2353302001953125, "step": 93500 }, { "epoch": 0.8085100863805761, "grad_norm": 0.7201092830597877, "learning_rate": 3.8888292267339285e-06, "loss": 0.16317138671875, "step": 93505 }, { "epoch": 0.8085533199021193, "grad_norm": 13.953028414535071, "learning_rate": 3.888634638676895e-06, "loss": 0.115899658203125, "step": 93510 }, { "epoch": 0.8085965534236625, "grad_norm": 2.5211395184545586, "learning_rate": 3.8884400465215585e-06, "loss": 0.228009033203125, "step": 93515 }, { "epoch": 0.8086397869452059, "grad_norm": 12.054083160940598, "learning_rate": 3.888245450268813e-06, "loss": 0.168463134765625, "step": 93520 }, { "epoch": 0.8086830204667491, "grad_norm": 3.6669909733631045, "learning_rate": 3.888050849919559e-06, "loss": 0.11962814331054687, "step": 93525 }, { "epoch": 0.8087262539882923, "grad_norm": 4.5477237656576195, "learning_rate": 3.8878562454746934e-06, "loss": 0.5094406127929687, "step": 93530 }, { "epoch": 0.8087694875098356, "grad_norm": 30.219138011514815, "learning_rate": 3.887661636935113e-06, "loss": 0.19092864990234376, "step": 93535 }, { "epoch": 0.8088127210313789, "grad_norm": 0.19607819739322557, "learning_rate": 3.887467024301714e-06, "loss": 0.12241363525390625, "step": 93540 }, { "epoch": 0.8088559545529221, "grad_norm": 35.48428474956718, "learning_rate": 3.8872724075753975e-06, "loss": 0.222015380859375, "step": 93545 }, { "epoch": 0.8088991880744654, "grad_norm": 26.204473696190572, "learning_rate": 3.887077786757059e-06, "loss": 0.2583320617675781, "step": 93550 }, { "epoch": 0.8089424215960087, "grad_norm": 1.1799766449146576, "learning_rate": 3.886883161847595e-06, "loss": 0.20364303588867189, "step": 93555 }, { "epoch": 0.8089856551175519, "grad_norm": 0.016832456451574163, "learning_rate": 3.8866885328479044e-06, "loss": 0.060635709762573244, "step": 93560 }, { "epoch": 0.8090288886390952, "grad_norm": 0.3730598661877132, "learning_rate": 3.886493899758885e-06, "loss": 0.4459541320800781, "step": 93565 }, { "epoch": 0.8090721221606385, "grad_norm": 1.449622593150568, "learning_rate": 3.8862992625814335e-06, "loss": 0.03241043090820313, "step": 93570 }, { "epoch": 0.8091153556821817, "grad_norm": 12.284406691118443, "learning_rate": 3.886104621316449e-06, "loss": 0.2079132080078125, "step": 93575 }, { "epoch": 0.809158589203725, "grad_norm": 3.257302707639821, "learning_rate": 3.885909975964828e-06, "loss": 0.18437118530273439, "step": 93580 }, { "epoch": 0.8092018227252683, "grad_norm": 16.339901296646936, "learning_rate": 3.8857153265274676e-06, "loss": 0.0803009033203125, "step": 93585 }, { "epoch": 0.8092450562468115, "grad_norm": 1.731924358093083, "learning_rate": 3.885520673005267e-06, "loss": 0.14964447021484376, "step": 93590 }, { "epoch": 0.8092882897683548, "grad_norm": 6.0749567157320445, "learning_rate": 3.885326015399124e-06, "loss": 0.123468017578125, "step": 93595 }, { "epoch": 0.8093315232898981, "grad_norm": 4.163846534336624, "learning_rate": 3.885131353709935e-06, "loss": 0.027765655517578126, "step": 93600 }, { "epoch": 0.8093747568114413, "grad_norm": 0.8803709542036211, "learning_rate": 3.8849366879385985e-06, "loss": 0.1566650390625, "step": 93605 }, { "epoch": 0.8094179903329846, "grad_norm": 3.9336732504851635, "learning_rate": 3.884742018086013e-06, "loss": 0.04686279296875, "step": 93610 }, { "epoch": 0.8094612238545279, "grad_norm": 35.486180199394745, "learning_rate": 3.884547344153074e-06, "loss": 0.204888916015625, "step": 93615 }, { "epoch": 0.8095044573760711, "grad_norm": 2.66888068343998, "learning_rate": 3.884352666140681e-06, "loss": 0.38133773803710935, "step": 93620 }, { "epoch": 0.8095476908976144, "grad_norm": 21.367725389185814, "learning_rate": 3.884157984049732e-06, "loss": 0.14497337341308594, "step": 93625 }, { "epoch": 0.8095909244191576, "grad_norm": 6.933009973814963, "learning_rate": 3.883963297881125e-06, "loss": 0.06207275390625, "step": 93630 }, { "epoch": 0.8096341579407009, "grad_norm": 16.280378044504676, "learning_rate": 3.883768607635758e-06, "loss": 0.052740478515625, "step": 93635 }, { "epoch": 0.8096773914622442, "grad_norm": 20.269309213937255, "learning_rate": 3.883573913314526e-06, "loss": 0.100860595703125, "step": 93640 }, { "epoch": 0.8097206249837874, "grad_norm": 6.562914674859246, "learning_rate": 3.883379214918331e-06, "loss": 0.07450103759765625, "step": 93645 }, { "epoch": 0.8097638585053307, "grad_norm": 2.7200062428688607, "learning_rate": 3.883184512448068e-06, "loss": 0.2732364654541016, "step": 93650 }, { "epoch": 0.809807092026874, "grad_norm": 0.7547294323902547, "learning_rate": 3.882989805904637e-06, "loss": 0.06562423706054688, "step": 93655 }, { "epoch": 0.8098503255484172, "grad_norm": 2.2519916850969612, "learning_rate": 3.882795095288935e-06, "loss": 0.28110504150390625, "step": 93660 }, { "epoch": 0.8098935590699605, "grad_norm": 1.3726655986935292, "learning_rate": 3.882600380601859e-06, "loss": 0.14806365966796875, "step": 93665 }, { "epoch": 0.8099367925915038, "grad_norm": 12.455918295389825, "learning_rate": 3.882405661844309e-06, "loss": 0.12697296142578124, "step": 93670 }, { "epoch": 0.809980026113047, "grad_norm": 29.82951960469572, "learning_rate": 3.882210939017182e-06, "loss": 0.203143310546875, "step": 93675 }, { "epoch": 0.8100232596345903, "grad_norm": 10.022571051098462, "learning_rate": 3.882016212121375e-06, "loss": 0.0531280517578125, "step": 93680 }, { "epoch": 0.8100664931561335, "grad_norm": 17.845137789290767, "learning_rate": 3.881821481157789e-06, "loss": 0.215899658203125, "step": 93685 }, { "epoch": 0.8101097266776768, "grad_norm": 14.096844636684242, "learning_rate": 3.881626746127319e-06, "loss": 0.11537017822265624, "step": 93690 }, { "epoch": 0.8101529601992201, "grad_norm": 1.6504507357543654, "learning_rate": 3.881432007030865e-06, "loss": 0.0470947265625, "step": 93695 }, { "epoch": 0.8101961937207633, "grad_norm": 1.0290505146979965, "learning_rate": 3.881237263869324e-06, "loss": 0.111956787109375, "step": 93700 }, { "epoch": 0.8102394272423066, "grad_norm": 3.17761019862778, "learning_rate": 3.8810425166435945e-06, "loss": 0.02568397521972656, "step": 93705 }, { "epoch": 0.8102826607638498, "grad_norm": 1.3405917546493356, "learning_rate": 3.8808477653545745e-06, "loss": 0.0591156005859375, "step": 93710 }, { "epoch": 0.8103258942853931, "grad_norm": 26.261161890007095, "learning_rate": 3.880653010003163e-06, "loss": 0.19029541015625, "step": 93715 }, { "epoch": 0.8103691278069364, "grad_norm": 0.3024739340501992, "learning_rate": 3.880458250590259e-06, "loss": 0.2144073486328125, "step": 93720 }, { "epoch": 0.8104123613284796, "grad_norm": 0.22207628995848336, "learning_rate": 3.880263487116758e-06, "loss": 0.07282829284667969, "step": 93725 }, { "epoch": 0.8104555948500229, "grad_norm": 6.051297383797549, "learning_rate": 3.880068719583558e-06, "loss": 0.04142608642578125, "step": 93730 }, { "epoch": 0.8104988283715662, "grad_norm": 1.7483445666879909, "learning_rate": 3.87987394799156e-06, "loss": 0.3048973083496094, "step": 93735 }, { "epoch": 0.8105420618931094, "grad_norm": 9.645805806175318, "learning_rate": 3.8796791723416615e-06, "loss": 0.06258544921875, "step": 93740 }, { "epoch": 0.8105852954146527, "grad_norm": 0.35190946418333, "learning_rate": 3.8794843926347606e-06, "loss": 0.101898193359375, "step": 93745 }, { "epoch": 0.810628528936196, "grad_norm": 0.8034243603160742, "learning_rate": 3.879289608871755e-06, "loss": 0.4437652587890625, "step": 93750 }, { "epoch": 0.8106717624577392, "grad_norm": 26.88993634117472, "learning_rate": 3.879094821053544e-06, "loss": 0.12059478759765625, "step": 93755 }, { "epoch": 0.8107149959792825, "grad_norm": 8.758918604026176, "learning_rate": 3.878900029181025e-06, "loss": 0.3297698974609375, "step": 93760 }, { "epoch": 0.8107582295008258, "grad_norm": 2.9104459079320035, "learning_rate": 3.8787052332550955e-06, "loss": 0.11863327026367188, "step": 93765 }, { "epoch": 0.810801463022369, "grad_norm": 1.122386523215488, "learning_rate": 3.878510433276657e-06, "loss": 0.12377166748046875, "step": 93770 }, { "epoch": 0.8108446965439123, "grad_norm": 14.73353797997312, "learning_rate": 3.878315629246605e-06, "loss": 0.108013916015625, "step": 93775 }, { "epoch": 0.8108879300654556, "grad_norm": 20.618432623969735, "learning_rate": 3.878120821165839e-06, "loss": 0.1802276611328125, "step": 93780 }, { "epoch": 0.8109311635869988, "grad_norm": 0.9432852320023812, "learning_rate": 3.877926009035258e-06, "loss": 0.07520179748535157, "step": 93785 }, { "epoch": 0.8109743971085421, "grad_norm": 2.793921034305909, "learning_rate": 3.877731192855758e-06, "loss": 0.1716644287109375, "step": 93790 }, { "epoch": 0.8110176306300854, "grad_norm": 0.6245271856402625, "learning_rate": 3.877536372628242e-06, "loss": 0.01890106201171875, "step": 93795 }, { "epoch": 0.8110608641516286, "grad_norm": 15.546105430372435, "learning_rate": 3.8773415483536046e-06, "loss": 0.12765045166015626, "step": 93800 }, { "epoch": 0.8111040976731718, "grad_norm": 0.1841215216439278, "learning_rate": 3.877146720032745e-06, "loss": 0.14998779296875, "step": 93805 }, { "epoch": 0.8111473311947152, "grad_norm": 5.47310548935682, "learning_rate": 3.876951887666562e-06, "loss": 0.08054046630859375, "step": 93810 }, { "epoch": 0.8111905647162584, "grad_norm": 0.03990807785573604, "learning_rate": 3.876757051255955e-06, "loss": 0.04614524841308594, "step": 93815 }, { "epoch": 0.8112337982378016, "grad_norm": 12.65646774553507, "learning_rate": 3.876562210801822e-06, "loss": 0.074468994140625, "step": 93820 }, { "epoch": 0.811277031759345, "grad_norm": 7.1642065234680015, "learning_rate": 3.876367366305061e-06, "loss": 0.03985099792480469, "step": 93825 }, { "epoch": 0.8113202652808882, "grad_norm": 1.301436065975622, "learning_rate": 3.876172517766572e-06, "loss": 0.11289291381835938, "step": 93830 }, { "epoch": 0.8113634988024314, "grad_norm": 8.670769615285504, "learning_rate": 3.875977665187253e-06, "loss": 0.08414764404296875, "step": 93835 }, { "epoch": 0.8114067323239748, "grad_norm": 1.8495029532072995, "learning_rate": 3.875782808568001e-06, "loss": 0.029541015625, "step": 93840 }, { "epoch": 0.811449965845518, "grad_norm": 3.459788329068792, "learning_rate": 3.875587947909716e-06, "loss": 0.0415740966796875, "step": 93845 }, { "epoch": 0.8114931993670612, "grad_norm": 5.701880114598224, "learning_rate": 3.875393083213299e-06, "loss": 0.3124504089355469, "step": 93850 }, { "epoch": 0.8115364328886046, "grad_norm": 1.4960285225992198, "learning_rate": 3.875198214479644e-06, "loss": 0.07677001953125, "step": 93855 }, { "epoch": 0.8115796664101478, "grad_norm": 1.024446969909071, "learning_rate": 3.875003341709654e-06, "loss": 0.11501836776733398, "step": 93860 }, { "epoch": 0.811622899931691, "grad_norm": 15.253310575155568, "learning_rate": 3.874808464904225e-06, "loss": 0.0884552001953125, "step": 93865 }, { "epoch": 0.8116661334532344, "grad_norm": 17.187984445054077, "learning_rate": 3.874613584064256e-06, "loss": 0.1818084716796875, "step": 93870 }, { "epoch": 0.8117093669747776, "grad_norm": 1.4064432917101033, "learning_rate": 3.8744186991906475e-06, "loss": 0.3840545654296875, "step": 93875 }, { "epoch": 0.8117526004963208, "grad_norm": 6.463410424476734, "learning_rate": 3.874223810284297e-06, "loss": 0.06781005859375, "step": 93880 }, { "epoch": 0.811795834017864, "grad_norm": 27.285129008103777, "learning_rate": 3.874028917346104e-06, "loss": 0.21669235229492187, "step": 93885 }, { "epoch": 0.8118390675394074, "grad_norm": 0.17598960232940222, "learning_rate": 3.873834020376966e-06, "loss": 0.06852607727050782, "step": 93890 }, { "epoch": 0.8118823010609506, "grad_norm": 0.8603717827610539, "learning_rate": 3.873639119377783e-06, "loss": 0.03620128631591797, "step": 93895 }, { "epoch": 0.8119255345824938, "grad_norm": 44.141125331865254, "learning_rate": 3.873444214349453e-06, "loss": 0.1391448974609375, "step": 93900 }, { "epoch": 0.8119687681040372, "grad_norm": 0.8871566343227499, "learning_rate": 3.873249305292876e-06, "loss": 0.025551795959472656, "step": 93905 }, { "epoch": 0.8120120016255804, "grad_norm": 30.809674537057393, "learning_rate": 3.873054392208951e-06, "loss": 0.224029541015625, "step": 93910 }, { "epoch": 0.8120552351471236, "grad_norm": 4.39652009697019, "learning_rate": 3.872859475098576e-06, "loss": 0.3383758544921875, "step": 93915 }, { "epoch": 0.812098468668667, "grad_norm": 5.649505892622098, "learning_rate": 3.87266455396265e-06, "loss": 0.03273162841796875, "step": 93920 }, { "epoch": 0.8121417021902102, "grad_norm": 1.6106154345414663, "learning_rate": 3.872469628802072e-06, "loss": 0.03134422302246094, "step": 93925 }, { "epoch": 0.8121849357117534, "grad_norm": 6.117267202894456, "learning_rate": 3.872274699617741e-06, "loss": 0.15774993896484374, "step": 93930 }, { "epoch": 0.8122281692332968, "grad_norm": 2.9542475052164807, "learning_rate": 3.872079766410556e-06, "loss": 0.13395233154296876, "step": 93935 }, { "epoch": 0.81227140275484, "grad_norm": 0.9208480627237198, "learning_rate": 3.871884829181418e-06, "loss": 0.23708343505859375, "step": 93940 }, { "epoch": 0.8123146362763832, "grad_norm": 5.395772075372919, "learning_rate": 3.871689887931222e-06, "loss": 0.39666900634765623, "step": 93945 }, { "epoch": 0.8123578697979266, "grad_norm": 1.252584561250449, "learning_rate": 3.87149494266087e-06, "loss": 0.11717386245727539, "step": 93950 }, { "epoch": 0.8124011033194698, "grad_norm": 2.4243898963637833, "learning_rate": 3.8712999933712604e-06, "loss": 0.2141864776611328, "step": 93955 }, { "epoch": 0.812444336841013, "grad_norm": 19.05664669262794, "learning_rate": 3.871105040063292e-06, "loss": 0.18477783203125, "step": 93960 }, { "epoch": 0.8124875703625564, "grad_norm": 18.387458592618568, "learning_rate": 3.870910082737864e-06, "loss": 0.3110393524169922, "step": 93965 }, { "epoch": 0.8125308038840996, "grad_norm": 0.46277676451301447, "learning_rate": 3.870715121395877e-06, "loss": 0.07308807373046874, "step": 93970 }, { "epoch": 0.8125740374056428, "grad_norm": 0.6592298523752431, "learning_rate": 3.870520156038228e-06, "loss": 0.067376708984375, "step": 93975 }, { "epoch": 0.812617270927186, "grad_norm": 0.8799447176407419, "learning_rate": 3.870325186665817e-06, "loss": 0.1262359619140625, "step": 93980 }, { "epoch": 0.8126605044487294, "grad_norm": 0.6838357381084214, "learning_rate": 3.870130213279543e-06, "loss": 0.04998493194580078, "step": 93985 }, { "epoch": 0.8127037379702726, "grad_norm": 1.6612806002445704, "learning_rate": 3.869935235880304e-06, "loss": 0.09072189331054688, "step": 93990 }, { "epoch": 0.8127469714918159, "grad_norm": 4.86738620866758, "learning_rate": 3.869740254469002e-06, "loss": 0.18068008422851561, "step": 93995 }, { "epoch": 0.8127902050133592, "grad_norm": 0.9662491610140252, "learning_rate": 3.869545269046535e-06, "loss": 0.10449790954589844, "step": 94000 }, { "epoch": 0.8128334385349024, "grad_norm": 0.4643604450481565, "learning_rate": 3.869350279613802e-06, "loss": 0.07886848449707032, "step": 94005 }, { "epoch": 0.8128766720564456, "grad_norm": 4.172410286581336, "learning_rate": 3.869155286171702e-06, "loss": 0.021105384826660155, "step": 94010 }, { "epoch": 0.812919905577989, "grad_norm": 12.375303903789856, "learning_rate": 3.868960288721134e-06, "loss": 0.6864295959472656, "step": 94015 }, { "epoch": 0.8129631390995322, "grad_norm": 19.318946938317893, "learning_rate": 3.868765287262999e-06, "loss": 0.078271484375, "step": 94020 }, { "epoch": 0.8130063726210754, "grad_norm": 17.95106726722064, "learning_rate": 3.868570281798195e-06, "loss": 0.07729949951171874, "step": 94025 }, { "epoch": 0.8130496061426188, "grad_norm": 13.856531988715693, "learning_rate": 3.8683752723276215e-06, "loss": 0.2038118362426758, "step": 94030 }, { "epoch": 0.813092839664162, "grad_norm": 5.86566093606606, "learning_rate": 3.868180258852178e-06, "loss": 0.48894081115722654, "step": 94035 }, { "epoch": 0.8131360731857052, "grad_norm": 15.787866508376089, "learning_rate": 3.8679852413727625e-06, "loss": 0.1642791748046875, "step": 94040 }, { "epoch": 0.8131793067072486, "grad_norm": 29.38761078462366, "learning_rate": 3.867790219890277e-06, "loss": 0.2343423843383789, "step": 94045 }, { "epoch": 0.8132225402287918, "grad_norm": 7.245486340714033, "learning_rate": 3.8675951944056205e-06, "loss": 0.223089599609375, "step": 94050 }, { "epoch": 0.813265773750335, "grad_norm": 8.684540466587608, "learning_rate": 3.867400164919691e-06, "loss": 0.1509246826171875, "step": 94055 }, { "epoch": 0.8133090072718783, "grad_norm": 21.532690807874207, "learning_rate": 3.867205131433389e-06, "loss": 0.09509506225585937, "step": 94060 }, { "epoch": 0.8133522407934216, "grad_norm": 12.199089770438095, "learning_rate": 3.867010093947613e-06, "loss": 0.0537353515625, "step": 94065 }, { "epoch": 0.8133954743149648, "grad_norm": 1.5695970445451424, "learning_rate": 3.866815052463262e-06, "loss": 0.0778900146484375, "step": 94070 }, { "epoch": 0.8134387078365081, "grad_norm": 2.4823743269125345, "learning_rate": 3.866620006981238e-06, "loss": 0.09178466796875, "step": 94075 }, { "epoch": 0.8134819413580514, "grad_norm": 5.13250174827728, "learning_rate": 3.8664249575024385e-06, "loss": 0.02501983642578125, "step": 94080 }, { "epoch": 0.8135251748795946, "grad_norm": 16.034557458394918, "learning_rate": 3.866229904027764e-06, "loss": 0.11288299560546874, "step": 94085 }, { "epoch": 0.8135684084011379, "grad_norm": 13.913777990053266, "learning_rate": 3.8660348465581146e-06, "loss": 0.0500218391418457, "step": 94090 }, { "epoch": 0.8136116419226812, "grad_norm": 6.348969536835029, "learning_rate": 3.865839785094387e-06, "loss": 0.08526077270507812, "step": 94095 }, { "epoch": 0.8136548754442244, "grad_norm": 2.171376618303781, "learning_rate": 3.865644719637484e-06, "loss": 0.25809783935546876, "step": 94100 }, { "epoch": 0.8136981089657677, "grad_norm": 6.582160129835488, "learning_rate": 3.8654496501883045e-06, "loss": 0.10898590087890625, "step": 94105 }, { "epoch": 0.813741342487311, "grad_norm": 10.285948338410243, "learning_rate": 3.865254576747746e-06, "loss": 0.16649169921875, "step": 94110 }, { "epoch": 0.8137845760088542, "grad_norm": 1.1845019708433804, "learning_rate": 3.865059499316711e-06, "loss": 0.069171142578125, "step": 94115 }, { "epoch": 0.8138278095303975, "grad_norm": 49.037631731528315, "learning_rate": 3.864864417896098e-06, "loss": 0.2736473083496094, "step": 94120 }, { "epoch": 0.8138710430519408, "grad_norm": 0.6722131039407073, "learning_rate": 3.8646693324868065e-06, "loss": 0.111663818359375, "step": 94125 }, { "epoch": 0.813914276573484, "grad_norm": 3.354375732077232, "learning_rate": 3.864474243089737e-06, "loss": 0.39515151977539065, "step": 94130 }, { "epoch": 0.8139575100950273, "grad_norm": 4.2943926864720146, "learning_rate": 3.8642791497057886e-06, "loss": 0.11646804809570313, "step": 94135 }, { "epoch": 0.8140007436165706, "grad_norm": 2.3815129545361646, "learning_rate": 3.864084052335861e-06, "loss": 0.032806396484375, "step": 94140 }, { "epoch": 0.8140439771381138, "grad_norm": 1.7189581608382847, "learning_rate": 3.8638889509808535e-06, "loss": 0.15565261840820313, "step": 94145 }, { "epoch": 0.8140872106596571, "grad_norm": 3.1137643514640954, "learning_rate": 3.863693845641667e-06, "loss": 0.4048095703125, "step": 94150 }, { "epoch": 0.8141304441812003, "grad_norm": 5.908833977561076, "learning_rate": 3.8634987363192e-06, "loss": 0.1987213134765625, "step": 94155 }, { "epoch": 0.8141736777027436, "grad_norm": 1.184578333772483, "learning_rate": 3.863303623014354e-06, "loss": 0.18705291748046876, "step": 94160 }, { "epoch": 0.8142169112242869, "grad_norm": 37.84836898728532, "learning_rate": 3.8631085057280275e-06, "loss": 0.461627197265625, "step": 94165 }, { "epoch": 0.8142601447458301, "grad_norm": 42.04997679262916, "learning_rate": 3.8629133844611215e-06, "loss": 0.17068557739257811, "step": 94170 }, { "epoch": 0.8143033782673734, "grad_norm": 0.5789721563690172, "learning_rate": 3.8627182592145345e-06, "loss": 0.07298240661621094, "step": 94175 }, { "epoch": 0.8143466117889167, "grad_norm": 13.027083636849762, "learning_rate": 3.862523129989169e-06, "loss": 0.184893798828125, "step": 94180 }, { "epoch": 0.8143898453104599, "grad_norm": 2.177045790931408, "learning_rate": 3.862327996785921e-06, "loss": 0.24341583251953125, "step": 94185 }, { "epoch": 0.8144330788320032, "grad_norm": 9.128366311019542, "learning_rate": 3.862132859605692e-06, "loss": 0.14388427734375, "step": 94190 }, { "epoch": 0.8144763123535464, "grad_norm": 11.540432292226903, "learning_rate": 3.861937718449383e-06, "loss": 0.2201751708984375, "step": 94195 }, { "epoch": 0.8145195458750897, "grad_norm": 2.1667688337576227, "learning_rate": 3.861742573317895e-06, "loss": 0.12853126525878905, "step": 94200 }, { "epoch": 0.814562779396633, "grad_norm": 7.446855416254287, "learning_rate": 3.8615474242121245e-06, "loss": 0.17338790893554687, "step": 94205 }, { "epoch": 0.8146060129181762, "grad_norm": 1.1490731422945424, "learning_rate": 3.861352271132974e-06, "loss": 0.299139404296875, "step": 94210 }, { "epoch": 0.8146492464397195, "grad_norm": 2.9280002366693068, "learning_rate": 3.861157114081344e-06, "loss": 0.100872802734375, "step": 94215 }, { "epoch": 0.8146924799612628, "grad_norm": 6.461190889432323, "learning_rate": 3.860961953058131e-06, "loss": 0.1771575927734375, "step": 94220 }, { "epoch": 0.814735713482806, "grad_norm": 20.75902321857254, "learning_rate": 3.86076678806424e-06, "loss": 0.191180419921875, "step": 94225 }, { "epoch": 0.8147789470043493, "grad_norm": 2.0872075815266795, "learning_rate": 3.860571619100568e-06, "loss": 0.03055877685546875, "step": 94230 }, { "epoch": 0.8148221805258925, "grad_norm": 1.1296933106275187, "learning_rate": 3.860376446168015e-06, "loss": 0.056072235107421875, "step": 94235 }, { "epoch": 0.8148654140474358, "grad_norm": 0.18664799754314668, "learning_rate": 3.8601812692674825e-06, "loss": 0.26851348876953124, "step": 94240 }, { "epoch": 0.8149086475689791, "grad_norm": 4.676443102591236, "learning_rate": 3.85998608839987e-06, "loss": 0.6602569580078125, "step": 94245 }, { "epoch": 0.8149518810905223, "grad_norm": 4.326493466505153, "learning_rate": 3.859790903566078e-06, "loss": 0.45040283203125, "step": 94250 }, { "epoch": 0.8149951146120656, "grad_norm": 0.7972817849501138, "learning_rate": 3.859595714767005e-06, "loss": 0.0523193359375, "step": 94255 }, { "epoch": 0.8150383481336089, "grad_norm": 0.5156507462742845, "learning_rate": 3.859400522003554e-06, "loss": 0.140032958984375, "step": 94260 }, { "epoch": 0.8150815816551521, "grad_norm": 20.823216437511213, "learning_rate": 3.859205325276622e-06, "loss": 0.1397258758544922, "step": 94265 }, { "epoch": 0.8151248151766954, "grad_norm": 12.893023305031388, "learning_rate": 3.859010124587112e-06, "loss": 0.135797119140625, "step": 94270 }, { "epoch": 0.8151680486982387, "grad_norm": 8.87559334985189, "learning_rate": 3.858814919935924e-06, "loss": 0.1519989013671875, "step": 94275 }, { "epoch": 0.8152112822197819, "grad_norm": 2.7515208397832, "learning_rate": 3.858619711323957e-06, "loss": 0.04403533935546875, "step": 94280 }, { "epoch": 0.8152545157413252, "grad_norm": 6.880652546409733, "learning_rate": 3.85842449875211e-06, "loss": 0.032830810546875, "step": 94285 }, { "epoch": 0.8152977492628685, "grad_norm": 36.9533448362575, "learning_rate": 3.858229282221287e-06, "loss": 0.40888214111328125, "step": 94290 }, { "epoch": 0.8153409827844117, "grad_norm": 5.991468777415245, "learning_rate": 3.858034061732386e-06, "loss": 0.2158935546875, "step": 94295 }, { "epoch": 0.815384216305955, "grad_norm": 2.539323658637793, "learning_rate": 3.857838837286307e-06, "loss": 0.0744110107421875, "step": 94300 }, { "epoch": 0.8154274498274983, "grad_norm": 0.03177924594321318, "learning_rate": 3.8576436088839525e-06, "loss": 0.01766548156738281, "step": 94305 }, { "epoch": 0.8154706833490415, "grad_norm": 0.49647750831462184, "learning_rate": 3.857448376526221e-06, "loss": 0.04771099090576172, "step": 94310 }, { "epoch": 0.8155139168705848, "grad_norm": 14.33393396314969, "learning_rate": 3.857253140214012e-06, "loss": 0.23469886779785157, "step": 94315 }, { "epoch": 0.8155571503921281, "grad_norm": 8.761871442413872, "learning_rate": 3.8570578999482286e-06, "loss": 0.07777099609375, "step": 94320 }, { "epoch": 0.8156003839136713, "grad_norm": 20.480934522049047, "learning_rate": 3.85686265572977e-06, "loss": 0.23277587890625, "step": 94325 }, { "epoch": 0.8156436174352145, "grad_norm": 67.95401182861714, "learning_rate": 3.8566674075595355e-06, "loss": 0.451055908203125, "step": 94330 }, { "epoch": 0.8156868509567579, "grad_norm": 0.5878447989468614, "learning_rate": 3.856472155438427e-06, "loss": 0.1385284423828125, "step": 94335 }, { "epoch": 0.8157300844783011, "grad_norm": 11.614987136036822, "learning_rate": 3.8562768993673455e-06, "loss": 0.12323570251464844, "step": 94340 }, { "epoch": 0.8157733179998443, "grad_norm": 8.7302622741701, "learning_rate": 3.8560816393471906e-06, "loss": 0.13268623352050782, "step": 94345 }, { "epoch": 0.8158165515213877, "grad_norm": 0.32696789160506556, "learning_rate": 3.855886375378862e-06, "loss": 0.28660430908203127, "step": 94350 }, { "epoch": 0.8158597850429309, "grad_norm": 0.2915573446617875, "learning_rate": 3.85569110746326e-06, "loss": 0.08351593017578125, "step": 94355 }, { "epoch": 0.8159030185644741, "grad_norm": 0.16278154056011865, "learning_rate": 3.8554958356012875e-06, "loss": 0.046849822998046874, "step": 94360 }, { "epoch": 0.8159462520860175, "grad_norm": 0.8386001325817809, "learning_rate": 3.855300559793844e-06, "loss": 0.17837181091308593, "step": 94365 }, { "epoch": 0.8159894856075607, "grad_norm": 8.407298718249054, "learning_rate": 3.85510528004183e-06, "loss": 0.1079345703125, "step": 94370 }, { "epoch": 0.8160327191291039, "grad_norm": 3.847801176585759, "learning_rate": 3.854909996346146e-06, "loss": 0.10640411376953125, "step": 94375 }, { "epoch": 0.8160759526506473, "grad_norm": 1.5468829455293478, "learning_rate": 3.854714708707692e-06, "loss": 0.06726875305175781, "step": 94380 }, { "epoch": 0.8161191861721905, "grad_norm": 30.484487852050826, "learning_rate": 3.854519417127369e-06, "loss": 0.2226898193359375, "step": 94385 }, { "epoch": 0.8161624196937337, "grad_norm": 8.43253074767105, "learning_rate": 3.854324121606079e-06, "loss": 0.37735748291015625, "step": 94390 }, { "epoch": 0.816205653215277, "grad_norm": 2.525919681308368, "learning_rate": 3.8541288221447205e-06, "loss": 0.09471435546875, "step": 94395 }, { "epoch": 0.8162488867368203, "grad_norm": 0.8908289393919061, "learning_rate": 3.853933518744196e-06, "loss": 0.06529464721679687, "step": 94400 }, { "epoch": 0.8162921202583635, "grad_norm": 4.754890883353468, "learning_rate": 3.853738211405407e-06, "loss": 0.189312744140625, "step": 94405 }, { "epoch": 0.8163353537799067, "grad_norm": 0.3227221375187238, "learning_rate": 3.85354290012925e-06, "loss": 0.0494171142578125, "step": 94410 }, { "epoch": 0.8163785873014501, "grad_norm": 24.118014687054153, "learning_rate": 3.85334758491663e-06, "loss": 0.23403072357177734, "step": 94415 }, { "epoch": 0.8164218208229933, "grad_norm": 5.125158411248579, "learning_rate": 3.853152265768447e-06, "loss": 0.22043209075927733, "step": 94420 }, { "epoch": 0.8164650543445365, "grad_norm": 15.693405279124908, "learning_rate": 3.8529569426856e-06, "loss": 0.11289749145507813, "step": 94425 }, { "epoch": 0.8165082878660799, "grad_norm": 2.6089944202284143, "learning_rate": 3.852761615668991e-06, "loss": 0.04691314697265625, "step": 94430 }, { "epoch": 0.8165515213876231, "grad_norm": 2.6485323189618777, "learning_rate": 3.852566284719522e-06, "loss": 0.203485107421875, "step": 94435 }, { "epoch": 0.8165947549091663, "grad_norm": 3.5636068020658525, "learning_rate": 3.852370949838092e-06, "loss": 0.17581787109375, "step": 94440 }, { "epoch": 0.8166379884307097, "grad_norm": 0.644330403810582, "learning_rate": 3.852175611025601e-06, "loss": 0.0243927001953125, "step": 94445 }, { "epoch": 0.8166812219522529, "grad_norm": 5.298988139194479, "learning_rate": 3.851980268282954e-06, "loss": 0.11943893432617188, "step": 94450 }, { "epoch": 0.8167244554737961, "grad_norm": 6.4146113817972115, "learning_rate": 3.851784921611048e-06, "loss": 0.0855621337890625, "step": 94455 }, { "epoch": 0.8167676889953395, "grad_norm": 9.048760027529594, "learning_rate": 3.851589571010784e-06, "loss": 0.20043621063232422, "step": 94460 }, { "epoch": 0.8168109225168827, "grad_norm": 1.3633051954290183, "learning_rate": 3.851394216483065e-06, "loss": 0.04610366821289062, "step": 94465 }, { "epoch": 0.8168541560384259, "grad_norm": 2.6756951131134263, "learning_rate": 3.851198858028791e-06, "loss": 0.020395660400390626, "step": 94470 }, { "epoch": 0.8168973895599693, "grad_norm": 5.844689616755688, "learning_rate": 3.851003495648864e-06, "loss": 0.135247802734375, "step": 94475 }, { "epoch": 0.8169406230815125, "grad_norm": 16.265614079790844, "learning_rate": 3.850808129344184e-06, "loss": 0.179345703125, "step": 94480 }, { "epoch": 0.8169838566030557, "grad_norm": 0.7572616839159512, "learning_rate": 3.850612759115652e-06, "loss": 0.10984230041503906, "step": 94485 }, { "epoch": 0.817027090124599, "grad_norm": 2.4624406518678317, "learning_rate": 3.850417384964168e-06, "loss": 0.1566925048828125, "step": 94490 }, { "epoch": 0.8170703236461423, "grad_norm": 5.949772448800551, "learning_rate": 3.850222006890635e-06, "loss": 0.09634552001953126, "step": 94495 }, { "epoch": 0.8171135571676855, "grad_norm": 6.380318481111533, "learning_rate": 3.850026624895953e-06, "loss": 0.06415824890136719, "step": 94500 }, { "epoch": 0.8171567906892288, "grad_norm": 0.36022434577227913, "learning_rate": 3.849831238981023e-06, "loss": 0.2568378448486328, "step": 94505 }, { "epoch": 0.8172000242107721, "grad_norm": 3.4509017965549336, "learning_rate": 3.849635849146748e-06, "loss": 0.0628082275390625, "step": 94510 }, { "epoch": 0.8172432577323153, "grad_norm": 0.2748804879482237, "learning_rate": 3.849440455394026e-06, "loss": 0.06894989013671875, "step": 94515 }, { "epoch": 0.8172864912538585, "grad_norm": 19.151265018031655, "learning_rate": 3.84924505772376e-06, "loss": 0.14533233642578125, "step": 94520 }, { "epoch": 0.8173297247754019, "grad_norm": 8.42660562310949, "learning_rate": 3.8490496561368505e-06, "loss": 0.39815940856933596, "step": 94525 }, { "epoch": 0.8173729582969451, "grad_norm": 3.5959210715080903, "learning_rate": 3.848854250634199e-06, "loss": 0.13021354675292968, "step": 94530 }, { "epoch": 0.8174161918184883, "grad_norm": 12.019168681815332, "learning_rate": 3.8486588412167085e-06, "loss": 0.4241649627685547, "step": 94535 }, { "epoch": 0.8174594253400317, "grad_norm": 4.6571004901322715, "learning_rate": 3.8484634278852766e-06, "loss": 0.19379501342773436, "step": 94540 }, { "epoch": 0.8175026588615749, "grad_norm": 32.159648646600594, "learning_rate": 3.848268010640807e-06, "loss": 0.11796340942382813, "step": 94545 }, { "epoch": 0.8175458923831181, "grad_norm": 13.117920936336313, "learning_rate": 3.8480725894842e-06, "loss": 0.14808349609375, "step": 94550 }, { "epoch": 0.8175891259046615, "grad_norm": 17.825806457401224, "learning_rate": 3.847877164416356e-06, "loss": 0.176788330078125, "step": 94555 }, { "epoch": 0.8176323594262047, "grad_norm": 8.720578491668668, "learning_rate": 3.847681735438179e-06, "loss": 0.129632568359375, "step": 94560 }, { "epoch": 0.8176755929477479, "grad_norm": 15.10372026082654, "learning_rate": 3.847486302550569e-06, "loss": 0.15294647216796875, "step": 94565 }, { "epoch": 0.8177188264692913, "grad_norm": 19.288818763966766, "learning_rate": 3.847290865754426e-06, "loss": 0.107257080078125, "step": 94570 }, { "epoch": 0.8177620599908345, "grad_norm": 7.532914475558679, "learning_rate": 3.847095425050653e-06, "loss": 0.08253765106201172, "step": 94575 }, { "epoch": 0.8178052935123777, "grad_norm": 29.316210330651447, "learning_rate": 3.846899980440151e-06, "loss": 0.5143798828125, "step": 94580 }, { "epoch": 0.817848527033921, "grad_norm": 0.764853919039327, "learning_rate": 3.846704531923821e-06, "loss": 0.31188812255859377, "step": 94585 }, { "epoch": 0.8178917605554643, "grad_norm": 0.6929573392261951, "learning_rate": 3.846509079502564e-06, "loss": 0.03174037933349609, "step": 94590 }, { "epoch": 0.8179349940770075, "grad_norm": 7.284778819234454, "learning_rate": 3.846313623177282e-06, "loss": 0.16825408935546876, "step": 94595 }, { "epoch": 0.8179782275985508, "grad_norm": 19.020954243023265, "learning_rate": 3.846118162948877e-06, "loss": 0.11466598510742188, "step": 94600 }, { "epoch": 0.8180214611200941, "grad_norm": 0.48720211202559666, "learning_rate": 3.845922698818249e-06, "loss": 0.11654529571533204, "step": 94605 }, { "epoch": 0.8180646946416373, "grad_norm": 34.964174726330654, "learning_rate": 3.8457272307863e-06, "loss": 0.11779937744140626, "step": 94610 }, { "epoch": 0.8181079281631806, "grad_norm": 0.12392992500088258, "learning_rate": 3.845531758853933e-06, "loss": 0.062066650390625, "step": 94615 }, { "epoch": 0.8181511616847239, "grad_norm": 12.120119998881615, "learning_rate": 3.845336283022047e-06, "loss": 0.13890609741210938, "step": 94620 }, { "epoch": 0.8181943952062671, "grad_norm": 5.890364233232756, "learning_rate": 3.845140803291546e-06, "loss": 0.1919769287109375, "step": 94625 }, { "epoch": 0.8182376287278104, "grad_norm": 1.026764900285431, "learning_rate": 3.84494531966333e-06, "loss": 0.06283111572265625, "step": 94630 }, { "epoch": 0.8182808622493537, "grad_norm": 3.181860592309467, "learning_rate": 3.8447498321382994e-06, "loss": 0.13341064453125, "step": 94635 }, { "epoch": 0.8183240957708969, "grad_norm": 3.854041882322157, "learning_rate": 3.8445543407173585e-06, "loss": 0.1704315185546875, "step": 94640 }, { "epoch": 0.8183673292924402, "grad_norm": 5.956205879528277, "learning_rate": 3.844358845401408e-06, "loss": 0.057622528076171874, "step": 94645 }, { "epoch": 0.8184105628139835, "grad_norm": 5.526619405298506, "learning_rate": 3.844163346191348e-06, "loss": 0.24660797119140626, "step": 94650 }, { "epoch": 0.8184537963355267, "grad_norm": 1.5761653786006258, "learning_rate": 3.8439678430880816e-06, "loss": 0.3546745300292969, "step": 94655 }, { "epoch": 0.81849702985707, "grad_norm": 2.2667228818617096, "learning_rate": 3.84377233609251e-06, "loss": 0.09561538696289062, "step": 94660 }, { "epoch": 0.8185402633786132, "grad_norm": 7.0418596319924385, "learning_rate": 3.843576825205536e-06, "loss": 0.3370628356933594, "step": 94665 }, { "epoch": 0.8185834969001565, "grad_norm": 4.438226244973461, "learning_rate": 3.843381310428059e-06, "loss": 0.21098175048828124, "step": 94670 }, { "epoch": 0.8186267304216998, "grad_norm": 1.9318882174496275, "learning_rate": 3.8431857917609826e-06, "loss": 0.1274688720703125, "step": 94675 }, { "epoch": 0.818669963943243, "grad_norm": 9.70714496066757, "learning_rate": 3.842990269205208e-06, "loss": 0.43773345947265624, "step": 94680 }, { "epoch": 0.8187131974647863, "grad_norm": 1.598111983471511, "learning_rate": 3.8427947427616356e-06, "loss": 0.08509178161621093, "step": 94685 }, { "epoch": 0.8187564309863296, "grad_norm": 16.029204606435066, "learning_rate": 3.842599212431168e-06, "loss": 0.1871429443359375, "step": 94690 }, { "epoch": 0.8187996645078728, "grad_norm": 15.695926628095961, "learning_rate": 3.842403678214709e-06, "loss": 0.10737457275390624, "step": 94695 }, { "epoch": 0.8188428980294161, "grad_norm": 3.657075979267019, "learning_rate": 3.842208140113157e-06, "loss": 0.131341552734375, "step": 94700 }, { "epoch": 0.8188861315509594, "grad_norm": 16.580789937478308, "learning_rate": 3.842012598127418e-06, "loss": 0.13037872314453125, "step": 94705 }, { "epoch": 0.8189293650725026, "grad_norm": 9.77270614080086, "learning_rate": 3.84181705225839e-06, "loss": 0.1568756103515625, "step": 94710 }, { "epoch": 0.8189725985940459, "grad_norm": 1.6179272556687347, "learning_rate": 3.841621502506975e-06, "loss": 0.14828453063964844, "step": 94715 }, { "epoch": 0.8190158321155891, "grad_norm": 4.835385342860099, "learning_rate": 3.841425948874077e-06, "loss": 0.03218994140625, "step": 94720 }, { "epoch": 0.8190590656371324, "grad_norm": 6.166234645128959, "learning_rate": 3.841230391360597e-06, "loss": 0.06194038391113281, "step": 94725 }, { "epoch": 0.8191022991586757, "grad_norm": 64.51974773525747, "learning_rate": 3.841034829967436e-06, "loss": 0.88756103515625, "step": 94730 }, { "epoch": 0.819145532680219, "grad_norm": 1.7358240425531333, "learning_rate": 3.840839264695498e-06, "loss": 0.10775146484375, "step": 94735 }, { "epoch": 0.8191887662017622, "grad_norm": 5.064194818339016, "learning_rate": 3.840643695545682e-06, "loss": 0.09622955322265625, "step": 94740 }, { "epoch": 0.8192319997233055, "grad_norm": 0.5473399084548717, "learning_rate": 3.840448122518893e-06, "loss": 0.035140609741210936, "step": 94745 }, { "epoch": 0.8192752332448487, "grad_norm": 1.0555991211271287, "learning_rate": 3.8402525456160306e-06, "loss": 0.4543243408203125, "step": 94750 }, { "epoch": 0.819318466766392, "grad_norm": 7.817084635574312, "learning_rate": 3.8400569648379985e-06, "loss": 0.10887603759765625, "step": 94755 }, { "epoch": 0.8193617002879352, "grad_norm": 16.27015116674615, "learning_rate": 3.839861380185697e-06, "loss": 0.15148468017578126, "step": 94760 }, { "epoch": 0.8194049338094785, "grad_norm": 0.9432668436416326, "learning_rate": 3.839665791660029e-06, "loss": 0.1097137451171875, "step": 94765 }, { "epoch": 0.8194481673310218, "grad_norm": 1.576123280758993, "learning_rate": 3.839470199261898e-06, "loss": 0.1216552734375, "step": 94770 }, { "epoch": 0.819491400852565, "grad_norm": 22.88570619644498, "learning_rate": 3.839274602992203e-06, "loss": 0.08867225646972657, "step": 94775 }, { "epoch": 0.8195346343741083, "grad_norm": 0.22144259703982666, "learning_rate": 3.839079002851848e-06, "loss": 0.03059864044189453, "step": 94780 }, { "epoch": 0.8195778678956516, "grad_norm": 4.019713820593294, "learning_rate": 3.838883398841735e-06, "loss": 0.09571533203125, "step": 94785 }, { "epoch": 0.8196211014171948, "grad_norm": 1.3534945004248589, "learning_rate": 3.838687790962767e-06, "loss": 0.24111557006835938, "step": 94790 }, { "epoch": 0.8196643349387381, "grad_norm": 0.27518058826203323, "learning_rate": 3.838492179215844e-06, "loss": 0.07002487182617187, "step": 94795 }, { "epoch": 0.8197075684602814, "grad_norm": 0.7157664972221993, "learning_rate": 3.838296563601869e-06, "loss": 0.12247123718261718, "step": 94800 }, { "epoch": 0.8197508019818246, "grad_norm": 16.879196899508738, "learning_rate": 3.838100944121744e-06, "loss": 0.11033096313476562, "step": 94805 }, { "epoch": 0.8197940355033679, "grad_norm": 16.53199128890788, "learning_rate": 3.837905320776371e-06, "loss": 0.16173095703125, "step": 94810 }, { "epoch": 0.8198372690249112, "grad_norm": 2.3288642484303628, "learning_rate": 3.837709693566654e-06, "loss": 0.05231456756591797, "step": 94815 }, { "epoch": 0.8198805025464544, "grad_norm": 14.871251274685457, "learning_rate": 3.837514062493494e-06, "loss": 0.07106361389160157, "step": 94820 }, { "epoch": 0.8199237360679977, "grad_norm": 3.38097274191181, "learning_rate": 3.837318427557793e-06, "loss": 0.0285308837890625, "step": 94825 }, { "epoch": 0.819966969589541, "grad_norm": 10.645483350147492, "learning_rate": 3.837122788760453e-06, "loss": 0.0488311767578125, "step": 94830 }, { "epoch": 0.8200102031110842, "grad_norm": 1.0464627859485143, "learning_rate": 3.836927146102376e-06, "loss": 0.031085968017578125, "step": 94835 }, { "epoch": 0.8200534366326274, "grad_norm": 12.951953337529824, "learning_rate": 3.836731499584466e-06, "loss": 0.23108081817626952, "step": 94840 }, { "epoch": 0.8200966701541708, "grad_norm": 5.3254376117069375, "learning_rate": 3.836535849207624e-06, "loss": 0.2549896240234375, "step": 94845 }, { "epoch": 0.820139903675714, "grad_norm": 18.4237812033251, "learning_rate": 3.836340194972754e-06, "loss": 0.1886322021484375, "step": 94850 }, { "epoch": 0.8201831371972572, "grad_norm": 0.8530162233963465, "learning_rate": 3.836144536880755e-06, "loss": 0.0230010986328125, "step": 94855 }, { "epoch": 0.8202263707188006, "grad_norm": 7.242904089249109, "learning_rate": 3.835948874932531e-06, "loss": 0.35846939086914065, "step": 94860 }, { "epoch": 0.8202696042403438, "grad_norm": 3.930323777338049, "learning_rate": 3.835753209128986e-06, "loss": 0.052448272705078125, "step": 94865 }, { "epoch": 0.820312837761887, "grad_norm": 24.20251093664334, "learning_rate": 3.83555753947102e-06, "loss": 0.0618682861328125, "step": 94870 }, { "epoch": 0.8203560712834304, "grad_norm": 29.12068922281167, "learning_rate": 3.835361865959537e-06, "loss": 0.622283935546875, "step": 94875 }, { "epoch": 0.8203993048049736, "grad_norm": 16.45615243066968, "learning_rate": 3.8351661885954395e-06, "loss": 0.06438064575195312, "step": 94880 }, { "epoch": 0.8204425383265168, "grad_norm": 17.026115711693407, "learning_rate": 3.834970507379628e-06, "loss": 0.1357391357421875, "step": 94885 }, { "epoch": 0.8204857718480602, "grad_norm": 4.351475600659797, "learning_rate": 3.834774822313007e-06, "loss": 0.28538665771484373, "step": 94890 }, { "epoch": 0.8205290053696034, "grad_norm": 0.09337288049197173, "learning_rate": 3.8345791333964784e-06, "loss": 0.1719745635986328, "step": 94895 }, { "epoch": 0.8205722388911466, "grad_norm": 18.514435715029325, "learning_rate": 3.834383440630945e-06, "loss": 0.49574737548828124, "step": 94900 }, { "epoch": 0.82061547241269, "grad_norm": 15.297753923128282, "learning_rate": 3.8341877440173075e-06, "loss": 0.05566558837890625, "step": 94905 }, { "epoch": 0.8206587059342332, "grad_norm": 8.146905548494681, "learning_rate": 3.833992043556471e-06, "loss": 0.09000282287597657, "step": 94910 }, { "epoch": 0.8207019394557764, "grad_norm": 18.56419698129264, "learning_rate": 3.833796339249336e-06, "loss": 0.32525787353515623, "step": 94915 }, { "epoch": 0.8207451729773197, "grad_norm": 2.157149718369498, "learning_rate": 3.833600631096807e-06, "loss": 0.0479766845703125, "step": 94920 }, { "epoch": 0.820788406498863, "grad_norm": 3.2270358829961494, "learning_rate": 3.833404919099785e-06, "loss": 0.2580322265625, "step": 94925 }, { "epoch": 0.8208316400204062, "grad_norm": 2.783659415968796, "learning_rate": 3.833209203259174e-06, "loss": 0.17872676849365235, "step": 94930 }, { "epoch": 0.8208748735419494, "grad_norm": 1.1038811295366395, "learning_rate": 3.833013483575875e-06, "loss": 0.190582275390625, "step": 94935 }, { "epoch": 0.8209181070634928, "grad_norm": 1.1078477712699084, "learning_rate": 3.832817760050791e-06, "loss": 0.10729827880859374, "step": 94940 }, { "epoch": 0.820961340585036, "grad_norm": 3.963350683227324, "learning_rate": 3.832622032684825e-06, "loss": 0.2170257568359375, "step": 94945 }, { "epoch": 0.8210045741065792, "grad_norm": 6.551876432986725, "learning_rate": 3.832426301478881e-06, "loss": 0.08255538940429688, "step": 94950 }, { "epoch": 0.8210478076281226, "grad_norm": 5.67649569981643, "learning_rate": 3.832230566433859e-06, "loss": 0.1855712890625, "step": 94955 }, { "epoch": 0.8210910411496658, "grad_norm": 3.6944579031448077, "learning_rate": 3.832034827550664e-06, "loss": 0.1682098388671875, "step": 94960 }, { "epoch": 0.821134274671209, "grad_norm": 0.027088940500607762, "learning_rate": 3.831839084830198e-06, "loss": 0.2252887725830078, "step": 94965 }, { "epoch": 0.8211775081927524, "grad_norm": 3.64626896722944, "learning_rate": 3.831643338273364e-06, "loss": 0.04377937316894531, "step": 94970 }, { "epoch": 0.8212207417142956, "grad_norm": 0.2683669023068381, "learning_rate": 3.831447587881063e-06, "loss": 0.0930206298828125, "step": 94975 }, { "epoch": 0.8212639752358388, "grad_norm": 13.836582238296984, "learning_rate": 3.8312518336542e-06, "loss": 0.16650543212890626, "step": 94980 }, { "epoch": 0.8213072087573822, "grad_norm": 26.37834726352445, "learning_rate": 3.831056075593677e-06, "loss": 0.19282493591308594, "step": 94985 }, { "epoch": 0.8213504422789254, "grad_norm": 4.38168381700854, "learning_rate": 3.8308603137003975e-06, "loss": 0.26653900146484377, "step": 94990 }, { "epoch": 0.8213936758004686, "grad_norm": 0.936745909861327, "learning_rate": 3.8306645479752626e-06, "loss": 0.1570098876953125, "step": 94995 }, { "epoch": 0.821436909322012, "grad_norm": 0.1947856219215376, "learning_rate": 3.830468778419176e-06, "loss": 0.27626609802246094, "step": 95000 }, { "epoch": 0.8214801428435552, "grad_norm": 49.366241485767915, "learning_rate": 3.830273005033042e-06, "loss": 0.32960824966430663, "step": 95005 }, { "epoch": 0.8215233763650984, "grad_norm": 9.689106894094804, "learning_rate": 3.830077227817762e-06, "loss": 0.0814727783203125, "step": 95010 }, { "epoch": 0.8215666098866417, "grad_norm": 5.536342063757986, "learning_rate": 3.829881446774238e-06, "loss": 0.6329315185546875, "step": 95015 }, { "epoch": 0.821609843408185, "grad_norm": 1.744165541378544, "learning_rate": 3.829685661903375e-06, "loss": 0.01712799072265625, "step": 95020 }, { "epoch": 0.8216530769297282, "grad_norm": 3.985895484826707, "learning_rate": 3.829489873206075e-06, "loss": 0.1742919921875, "step": 95025 }, { "epoch": 0.8216963104512714, "grad_norm": 0.48287630187139247, "learning_rate": 3.829294080683241e-06, "loss": 0.041112327575683595, "step": 95030 }, { "epoch": 0.8217395439728148, "grad_norm": 2.100533146292941, "learning_rate": 3.829098284335775e-06, "loss": 0.1320526123046875, "step": 95035 }, { "epoch": 0.821782777494358, "grad_norm": 16.884644153328942, "learning_rate": 3.828902484164583e-06, "loss": 0.2076751708984375, "step": 95040 }, { "epoch": 0.8218260110159012, "grad_norm": 6.738317694938837, "learning_rate": 3.8287066801705644e-06, "loss": 0.15875244140625, "step": 95045 }, { "epoch": 0.8218692445374446, "grad_norm": 4.049711496607912, "learning_rate": 3.828510872354624e-06, "loss": 0.08461837768554688, "step": 95050 }, { "epoch": 0.8219124780589878, "grad_norm": 0.22038734837068355, "learning_rate": 3.828315060717665e-06, "loss": 0.04700260162353516, "step": 95055 }, { "epoch": 0.821955711580531, "grad_norm": 25.915307877122487, "learning_rate": 3.828119245260591e-06, "loss": 0.08772163391113282, "step": 95060 }, { "epoch": 0.8219989451020744, "grad_norm": 10.937074888090965, "learning_rate": 3.827923425984302e-06, "loss": 0.09819164276123046, "step": 95065 }, { "epoch": 0.8220421786236176, "grad_norm": 2.0310357186244152, "learning_rate": 3.8277276028897045e-06, "loss": 0.07076263427734375, "step": 95070 }, { "epoch": 0.8220854121451608, "grad_norm": 4.792187290994239, "learning_rate": 3.827531775977701e-06, "loss": 0.0904388427734375, "step": 95075 }, { "epoch": 0.8221286456667042, "grad_norm": 6.327831893604041, "learning_rate": 3.827335945249194e-06, "loss": 0.193963623046875, "step": 95080 }, { "epoch": 0.8221718791882474, "grad_norm": 16.665259894943294, "learning_rate": 3.827140110705086e-06, "loss": 0.45212135314941404, "step": 95085 }, { "epoch": 0.8222151127097906, "grad_norm": 4.138399957643718, "learning_rate": 3.826944272346282e-06, "loss": 0.042352294921875, "step": 95090 }, { "epoch": 0.822258346231334, "grad_norm": 0.8256624560450322, "learning_rate": 3.826748430173682e-06, "loss": 0.10793952941894532, "step": 95095 }, { "epoch": 0.8223015797528772, "grad_norm": 1.3931428357766122, "learning_rate": 3.826552584188194e-06, "loss": 0.12424774169921875, "step": 95100 }, { "epoch": 0.8223448132744204, "grad_norm": 3.270321030619808, "learning_rate": 3.826356734390716e-06, "loss": 0.17683868408203124, "step": 95105 }, { "epoch": 0.8223880467959637, "grad_norm": 29.902435261374343, "learning_rate": 3.826160880782156e-06, "loss": 0.23847198486328125, "step": 95110 }, { "epoch": 0.822431280317507, "grad_norm": 18.82994756410598, "learning_rate": 3.825965023363413e-06, "loss": 0.221185302734375, "step": 95115 }, { "epoch": 0.8224745138390502, "grad_norm": 0.9861397017846055, "learning_rate": 3.8257691621353936e-06, "loss": 0.24033012390136718, "step": 95120 }, { "epoch": 0.8225177473605935, "grad_norm": 29.267116071974993, "learning_rate": 3.825573297099e-06, "loss": 0.25720748901367185, "step": 95125 }, { "epoch": 0.8225609808821368, "grad_norm": 6.617544283989875, "learning_rate": 3.825377428255134e-06, "loss": 0.04462223052978516, "step": 95130 }, { "epoch": 0.82260421440368, "grad_norm": 25.245359431487653, "learning_rate": 3.8251815556047006e-06, "loss": 0.13690948486328125, "step": 95135 }, { "epoch": 0.8226474479252233, "grad_norm": 4.378917667099138, "learning_rate": 3.8249856791486035e-06, "loss": 0.0695404052734375, "step": 95140 }, { "epoch": 0.8226906814467666, "grad_norm": 1.3430007373822406, "learning_rate": 3.8247897988877446e-06, "loss": 0.0328155517578125, "step": 95145 }, { "epoch": 0.8227339149683098, "grad_norm": 2.6295616028012425, "learning_rate": 3.824593914823028e-06, "loss": 0.049755859375, "step": 95150 }, { "epoch": 0.8227771484898531, "grad_norm": 1.2604643477884285, "learning_rate": 3.824398026955358e-06, "loss": 0.06432838439941406, "step": 95155 }, { "epoch": 0.8228203820113964, "grad_norm": 0.4407445463021618, "learning_rate": 3.824202135285636e-06, "loss": 0.03033294677734375, "step": 95160 }, { "epoch": 0.8228636155329396, "grad_norm": 1.8421841950072066, "learning_rate": 3.824006239814768e-06, "loss": 0.3302490234375, "step": 95165 }, { "epoch": 0.8229068490544829, "grad_norm": 1.1881473179038178, "learning_rate": 3.823810340543655e-06, "loss": 0.07079696655273438, "step": 95170 }, { "epoch": 0.8229500825760262, "grad_norm": 0.9122178655269442, "learning_rate": 3.823614437473201e-06, "loss": 0.18274993896484376, "step": 95175 }, { "epoch": 0.8229933160975694, "grad_norm": 4.501972397578914, "learning_rate": 3.823418530604311e-06, "loss": 0.0713104248046875, "step": 95180 }, { "epoch": 0.8230365496191127, "grad_norm": 33.326521603644906, "learning_rate": 3.823222619937888e-06, "loss": 0.282757568359375, "step": 95185 }, { "epoch": 0.8230797831406559, "grad_norm": 5.607604355428405, "learning_rate": 3.823026705474834e-06, "loss": 0.14115505218505858, "step": 95190 }, { "epoch": 0.8231230166621992, "grad_norm": 3.5056885557881032, "learning_rate": 3.822830787216054e-06, "loss": 0.069976806640625, "step": 95195 }, { "epoch": 0.8231662501837425, "grad_norm": 27.927456380466428, "learning_rate": 3.82263486516245e-06, "loss": 0.20120391845703126, "step": 95200 }, { "epoch": 0.8232094837052857, "grad_norm": 15.485491859729386, "learning_rate": 3.8224389393149285e-06, "loss": 0.26236667633056643, "step": 95205 }, { "epoch": 0.823252717226829, "grad_norm": 9.869575186064722, "learning_rate": 3.82224300967439e-06, "loss": 0.2013916015625, "step": 95210 }, { "epoch": 0.8232959507483723, "grad_norm": 2.64656485773859, "learning_rate": 3.82204707624174e-06, "loss": 0.20106964111328124, "step": 95215 }, { "epoch": 0.8233391842699155, "grad_norm": 4.8887266295345, "learning_rate": 3.821851139017882e-06, "loss": 0.2317596435546875, "step": 95220 }, { "epoch": 0.8233824177914588, "grad_norm": 43.74329695869185, "learning_rate": 3.821655198003718e-06, "loss": 0.3524620056152344, "step": 95225 }, { "epoch": 0.823425651313002, "grad_norm": 9.933656526783547, "learning_rate": 3.821459253200153e-06, "loss": 0.32635498046875, "step": 95230 }, { "epoch": 0.8234688848345453, "grad_norm": 12.023737549120167, "learning_rate": 3.821263304608091e-06, "loss": 0.1364105224609375, "step": 95235 }, { "epoch": 0.8235121183560886, "grad_norm": 16.67753651638166, "learning_rate": 3.8210673522284345e-06, "loss": 0.160968017578125, "step": 95240 }, { "epoch": 0.8235553518776318, "grad_norm": 15.653066581029938, "learning_rate": 3.820871396062089e-06, "loss": 0.114068603515625, "step": 95245 }, { "epoch": 0.8235985853991751, "grad_norm": 19.72408867520505, "learning_rate": 3.820675436109957e-06, "loss": 0.5147216796875, "step": 95250 }, { "epoch": 0.8236418189207184, "grad_norm": 12.971948147340356, "learning_rate": 3.820479472372941e-06, "loss": 0.09380722045898438, "step": 95255 }, { "epoch": 0.8236850524422616, "grad_norm": 13.22452483918622, "learning_rate": 3.820283504851947e-06, "loss": 0.132562255859375, "step": 95260 }, { "epoch": 0.8237282859638049, "grad_norm": 2.555080094782011, "learning_rate": 3.820087533547879e-06, "loss": 0.12931480407714843, "step": 95265 }, { "epoch": 0.8237715194853482, "grad_norm": 6.667231685521783, "learning_rate": 3.819891558461639e-06, "loss": 0.1395801544189453, "step": 95270 }, { "epoch": 0.8238147530068914, "grad_norm": 30.070034166694555, "learning_rate": 3.819695579594132e-06, "loss": 0.3595607280731201, "step": 95275 }, { "epoch": 0.8238579865284347, "grad_norm": 5.945003784075263, "learning_rate": 3.8194995969462606e-06, "loss": 0.17623481750488282, "step": 95280 }, { "epoch": 0.8239012200499779, "grad_norm": 0.088018683403436, "learning_rate": 3.81930361051893e-06, "loss": 0.089825439453125, "step": 95285 }, { "epoch": 0.8239444535715212, "grad_norm": 2.396106685511471, "learning_rate": 3.819107620313043e-06, "loss": 0.19098663330078125, "step": 95290 }, { "epoch": 0.8239876870930645, "grad_norm": 7.425529073102912, "learning_rate": 3.818911626329505e-06, "loss": 0.16041412353515624, "step": 95295 }, { "epoch": 0.8240309206146077, "grad_norm": 13.365983542666676, "learning_rate": 3.818715628569218e-06, "loss": 0.10790061950683594, "step": 95300 }, { "epoch": 0.824074154136151, "grad_norm": 15.862177380031287, "learning_rate": 3.818519627033087e-06, "loss": 0.10102767944335937, "step": 95305 }, { "epoch": 0.8241173876576943, "grad_norm": 10.06924190579867, "learning_rate": 3.818323621722015e-06, "loss": 0.1662872314453125, "step": 95310 }, { "epoch": 0.8241606211792375, "grad_norm": 1.2073895805598855, "learning_rate": 3.818127612636908e-06, "loss": 0.10269927978515625, "step": 95315 }, { "epoch": 0.8242038547007808, "grad_norm": 16.06222596695128, "learning_rate": 3.817931599778668e-06, "loss": 0.10072250366210937, "step": 95320 }, { "epoch": 0.8242470882223241, "grad_norm": 0.7777304299124038, "learning_rate": 3.817735583148201e-06, "loss": 0.32217044830322267, "step": 95325 }, { "epoch": 0.8242903217438673, "grad_norm": 17.89706251798702, "learning_rate": 3.817539562746409e-06, "loss": 0.107757568359375, "step": 95330 }, { "epoch": 0.8243335552654106, "grad_norm": 2.6892716644193833, "learning_rate": 3.817343538574197e-06, "loss": 0.05309562683105469, "step": 95335 }, { "epoch": 0.8243767887869539, "grad_norm": 41.9399128545525, "learning_rate": 3.817147510632468e-06, "loss": 0.38062591552734376, "step": 95340 }, { "epoch": 0.8244200223084971, "grad_norm": 13.050072904200134, "learning_rate": 3.816951478922128e-06, "loss": 0.18708343505859376, "step": 95345 }, { "epoch": 0.8244632558300404, "grad_norm": 1.3618900984452338, "learning_rate": 3.816755443444079e-06, "loss": 0.07184600830078125, "step": 95350 }, { "epoch": 0.8245064893515837, "grad_norm": 0.40678916066260523, "learning_rate": 3.816559404199226e-06, "loss": 0.07082881927490234, "step": 95355 }, { "epoch": 0.8245497228731269, "grad_norm": 7.670512824536827, "learning_rate": 3.816363361188474e-06, "loss": 0.404949951171875, "step": 95360 }, { "epoch": 0.8245929563946701, "grad_norm": 0.7265629444647184, "learning_rate": 3.816167314412726e-06, "loss": 0.09910659790039063, "step": 95365 }, { "epoch": 0.8246361899162135, "grad_norm": 29.401379030165597, "learning_rate": 3.815971263872885e-06, "loss": 0.18910980224609375, "step": 95370 }, { "epoch": 0.8246794234377567, "grad_norm": 3.782342690057675, "learning_rate": 3.815775209569859e-06, "loss": 0.0654510498046875, "step": 95375 }, { "epoch": 0.8247226569592999, "grad_norm": 0.5775031020557257, "learning_rate": 3.815579151504549e-06, "loss": 0.0401519775390625, "step": 95380 }, { "epoch": 0.8247658904808433, "grad_norm": 16.135319332555696, "learning_rate": 3.81538308967786e-06, "loss": 0.224932861328125, "step": 95385 }, { "epoch": 0.8248091240023865, "grad_norm": 5.6641832239052015, "learning_rate": 3.815187024090697e-06, "loss": 0.23427848815917968, "step": 95390 }, { "epoch": 0.8248523575239297, "grad_norm": 7.566455017217345, "learning_rate": 3.8149909547439625e-06, "loss": 0.3728193283081055, "step": 95395 }, { "epoch": 0.824895591045473, "grad_norm": 8.303496845262897, "learning_rate": 3.814794881638561e-06, "loss": 0.05112953186035156, "step": 95400 }, { "epoch": 0.8249388245670163, "grad_norm": 0.31572991245062526, "learning_rate": 3.8145988047753983e-06, "loss": 0.34985198974609377, "step": 95405 }, { "epoch": 0.8249820580885595, "grad_norm": 12.775776640704102, "learning_rate": 3.8144027241553784e-06, "loss": 0.13977127075195311, "step": 95410 }, { "epoch": 0.8250252916101029, "grad_norm": 6.569639210762601, "learning_rate": 3.8142066397794045e-06, "loss": 0.22540435791015626, "step": 95415 }, { "epoch": 0.8250685251316461, "grad_norm": 12.365693793760219, "learning_rate": 3.8140105516483814e-06, "loss": 0.09448604583740235, "step": 95420 }, { "epoch": 0.8251117586531893, "grad_norm": 0.7479171481174728, "learning_rate": 3.813814459763214e-06, "loss": 0.0891693115234375, "step": 95425 }, { "epoch": 0.8251549921747326, "grad_norm": 0.9654986796142895, "learning_rate": 3.813618364124805e-06, "loss": 0.03987464904785156, "step": 95430 }, { "epoch": 0.8251982256962759, "grad_norm": 3.0383597146135237, "learning_rate": 3.8134222647340617e-06, "loss": 0.10267982482910157, "step": 95435 }, { "epoch": 0.8252414592178191, "grad_norm": 70.8279424394032, "learning_rate": 3.813226161591887e-06, "loss": 0.17004928588867188, "step": 95440 }, { "epoch": 0.8252846927393624, "grad_norm": 2.419214631978474, "learning_rate": 3.813030054699183e-06, "loss": 0.211627197265625, "step": 95445 }, { "epoch": 0.8253279262609057, "grad_norm": 1.7501486238583108, "learning_rate": 3.8128339440568584e-06, "loss": 0.1104583740234375, "step": 95450 }, { "epoch": 0.8253711597824489, "grad_norm": 3.6384430354463233, "learning_rate": 3.8126378296658137e-06, "loss": 0.25132598876953127, "step": 95455 }, { "epoch": 0.8254143933039921, "grad_norm": 45.51646932951335, "learning_rate": 3.8124417115269565e-06, "loss": 0.40915069580078123, "step": 95460 }, { "epoch": 0.8254576268255355, "grad_norm": 74.32865354628903, "learning_rate": 3.812245589641189e-06, "loss": 0.267156982421875, "step": 95465 }, { "epoch": 0.8255008603470787, "grad_norm": 19.90979806664767, "learning_rate": 3.8120494640094177e-06, "loss": 0.09115662574768066, "step": 95470 }, { "epoch": 0.8255440938686219, "grad_norm": 5.536901234221899, "learning_rate": 3.811853334632545e-06, "loss": 0.05745086669921875, "step": 95475 }, { "epoch": 0.8255873273901653, "grad_norm": 0.4600219959017181, "learning_rate": 3.811657201511477e-06, "loss": 0.019066238403320314, "step": 95480 }, { "epoch": 0.8256305609117085, "grad_norm": 7.3245265322882, "learning_rate": 3.8114610646471167e-06, "loss": 0.04866485595703125, "step": 95485 }, { "epoch": 0.8256737944332517, "grad_norm": 15.879154163968273, "learning_rate": 3.8112649240403705e-06, "loss": 0.09278182983398438, "step": 95490 }, { "epoch": 0.8257170279547951, "grad_norm": 11.586893923729681, "learning_rate": 3.8110687796921417e-06, "loss": 0.10515365600585938, "step": 95495 }, { "epoch": 0.8257602614763383, "grad_norm": 9.937104292902202, "learning_rate": 3.8108726316033366e-06, "loss": 0.17253570556640624, "step": 95500 }, { "epoch": 0.8258034949978815, "grad_norm": 0.5443320800321443, "learning_rate": 3.8106764797748574e-06, "loss": 0.035245513916015624, "step": 95505 }, { "epoch": 0.8258467285194249, "grad_norm": 8.278484358713513, "learning_rate": 3.8104803242076094e-06, "loss": 0.14273681640625, "step": 95510 }, { "epoch": 0.8258899620409681, "grad_norm": 3.0343548351564165, "learning_rate": 3.8102841649024995e-06, "loss": 0.2198089599609375, "step": 95515 }, { "epoch": 0.8259331955625113, "grad_norm": 40.432543461310004, "learning_rate": 3.8100880018604298e-06, "loss": 0.1420623779296875, "step": 95520 }, { "epoch": 0.8259764290840547, "grad_norm": 4.890040735901828, "learning_rate": 3.8098918350823056e-06, "loss": 0.1552032470703125, "step": 95525 }, { "epoch": 0.8260196626055979, "grad_norm": 5.022043932371652, "learning_rate": 3.809695664569032e-06, "loss": 0.05247421264648437, "step": 95530 }, { "epoch": 0.8260628961271411, "grad_norm": 0.9064352890371885, "learning_rate": 3.8094994903215136e-06, "loss": 0.32057647705078124, "step": 95535 }, { "epoch": 0.8261061296486844, "grad_norm": 14.609305967946272, "learning_rate": 3.8093033123406545e-06, "loss": 0.3564933776855469, "step": 95540 }, { "epoch": 0.8261493631702277, "grad_norm": 2.862455258375446, "learning_rate": 3.8091071306273608e-06, "loss": 0.19741992950439452, "step": 95545 }, { "epoch": 0.8261925966917709, "grad_norm": 12.753111337851555, "learning_rate": 3.8089109451825367e-06, "loss": 0.16610946655273437, "step": 95550 }, { "epoch": 0.8262358302133141, "grad_norm": 2.326051482069865, "learning_rate": 3.8087147560070865e-06, "loss": 0.11141357421875, "step": 95555 }, { "epoch": 0.8262790637348575, "grad_norm": 5.266272342275659, "learning_rate": 3.8085185631019147e-06, "loss": 0.096319580078125, "step": 95560 }, { "epoch": 0.8263222972564007, "grad_norm": 3.070041918899321, "learning_rate": 3.808322366467927e-06, "loss": 0.4031105041503906, "step": 95565 }, { "epoch": 0.826365530777944, "grad_norm": 37.909268203008715, "learning_rate": 3.8081261661060285e-06, "loss": 0.8208938598632812, "step": 95570 }, { "epoch": 0.8264087642994873, "grad_norm": 12.786241911270762, "learning_rate": 3.8079299620171224e-06, "loss": 0.22810592651367187, "step": 95575 }, { "epoch": 0.8264519978210305, "grad_norm": 0.9720267362226994, "learning_rate": 3.807733754202116e-06, "loss": 0.09109811782836914, "step": 95580 }, { "epoch": 0.8264952313425737, "grad_norm": 8.726509149800952, "learning_rate": 3.807537542661912e-06, "loss": 0.19645252227783203, "step": 95585 }, { "epoch": 0.8265384648641171, "grad_norm": 0.4393879003971361, "learning_rate": 3.807341327397417e-06, "loss": 0.2332530975341797, "step": 95590 }, { "epoch": 0.8265816983856603, "grad_norm": 0.6627865830649676, "learning_rate": 3.807145108409534e-06, "loss": 0.25090560913085935, "step": 95595 }, { "epoch": 0.8266249319072035, "grad_norm": 36.43397763425057, "learning_rate": 3.80694888569917e-06, "loss": 0.11245651245117187, "step": 95600 }, { "epoch": 0.8266681654287469, "grad_norm": 0.442597057383638, "learning_rate": 3.8067526592672288e-06, "loss": 0.04542732238769531, "step": 95605 }, { "epoch": 0.8267113989502901, "grad_norm": 1.4873139669129212, "learning_rate": 3.8065564291146153e-06, "loss": 0.3209362030029297, "step": 95610 }, { "epoch": 0.8267546324718333, "grad_norm": 1.824430387425033, "learning_rate": 3.806360195242236e-06, "loss": 0.1264892578125, "step": 95615 }, { "epoch": 0.8267978659933767, "grad_norm": 2.8807321372520684, "learning_rate": 3.806163957650994e-06, "loss": 0.08635406494140625, "step": 95620 }, { "epoch": 0.8268410995149199, "grad_norm": 6.197424230441856, "learning_rate": 3.8059677163417937e-06, "loss": 0.14070510864257812, "step": 95625 }, { "epoch": 0.8268843330364631, "grad_norm": 20.26600686500017, "learning_rate": 3.805771471315543e-06, "loss": 0.17217330932617186, "step": 95630 }, { "epoch": 0.8269275665580064, "grad_norm": 2.648908376413645, "learning_rate": 3.8055752225731463e-06, "loss": 0.18927001953125, "step": 95635 }, { "epoch": 0.8269708000795497, "grad_norm": 6.6316320298092615, "learning_rate": 3.8053789701155063e-06, "loss": 0.28469047546386717, "step": 95640 }, { "epoch": 0.8270140336010929, "grad_norm": 31.42183909469889, "learning_rate": 3.8051827139435306e-06, "loss": 0.10569534301757813, "step": 95645 }, { "epoch": 0.8270572671226362, "grad_norm": 1.6443816237921027, "learning_rate": 3.8049864540581223e-06, "loss": 0.14394969940185548, "step": 95650 }, { "epoch": 0.8271005006441795, "grad_norm": 0.6668723080610456, "learning_rate": 3.804790190460188e-06, "loss": 0.18204498291015625, "step": 95655 }, { "epoch": 0.8271437341657227, "grad_norm": 3.214704430979094, "learning_rate": 3.804593923150633e-06, "loss": 0.34579925537109374, "step": 95660 }, { "epoch": 0.827186967687266, "grad_norm": 32.31574033766616, "learning_rate": 3.804397652130362e-06, "loss": 0.3575653076171875, "step": 95665 }, { "epoch": 0.8272302012088093, "grad_norm": 5.736805256808461, "learning_rate": 3.804201377400279e-06, "loss": 0.042804718017578125, "step": 95670 }, { "epoch": 0.8272734347303525, "grad_norm": 1.6406854559365114, "learning_rate": 3.8040050989612913e-06, "loss": 0.0773956298828125, "step": 95675 }, { "epoch": 0.8273166682518958, "grad_norm": 6.4414601022446, "learning_rate": 3.803808816814303e-06, "loss": 0.16007232666015625, "step": 95680 }, { "epoch": 0.8273599017734391, "grad_norm": 3.0597292498681727, "learning_rate": 3.8036125309602184e-06, "loss": 0.11777362823486329, "step": 95685 }, { "epoch": 0.8274031352949823, "grad_norm": 4.1348737004923395, "learning_rate": 3.803416241399945e-06, "loss": 0.1287841796875, "step": 95690 }, { "epoch": 0.8274463688165256, "grad_norm": 11.577110691165268, "learning_rate": 3.803219948134387e-06, "loss": 0.08471298217773438, "step": 95695 }, { "epoch": 0.8274896023380689, "grad_norm": 0.17370876396541798, "learning_rate": 3.803023651164449e-06, "loss": 0.060467529296875, "step": 95700 }, { "epoch": 0.8275328358596121, "grad_norm": 0.9387501797168367, "learning_rate": 3.802827350491037e-06, "loss": 0.09315719604492187, "step": 95705 }, { "epoch": 0.8275760693811554, "grad_norm": 23.319858728522167, "learning_rate": 3.8026310461150563e-06, "loss": 0.191485595703125, "step": 95710 }, { "epoch": 0.8276193029026986, "grad_norm": 8.17465945111339, "learning_rate": 3.8024347380374124e-06, "loss": 0.05931587219238281, "step": 95715 }, { "epoch": 0.8276625364242419, "grad_norm": 1.7609410893400452, "learning_rate": 3.8022384262590096e-06, "loss": 0.16466827392578126, "step": 95720 }, { "epoch": 0.8277057699457852, "grad_norm": 6.416945093887552, "learning_rate": 3.802042110780755e-06, "loss": 0.12315177917480469, "step": 95725 }, { "epoch": 0.8277490034673284, "grad_norm": 1.9389098729574497, "learning_rate": 3.801845791603552e-06, "loss": 0.13719635009765624, "step": 95730 }, { "epoch": 0.8277922369888717, "grad_norm": 1.8863295183192101, "learning_rate": 3.8016494687283073e-06, "loss": 0.10592269897460938, "step": 95735 }, { "epoch": 0.827835470510415, "grad_norm": 3.2461664579095406, "learning_rate": 3.801453142155927e-06, "loss": 0.28241729736328125, "step": 95740 }, { "epoch": 0.8278787040319582, "grad_norm": 2.491929639833242, "learning_rate": 3.8012568118873158e-06, "loss": 0.07102317810058593, "step": 95745 }, { "epoch": 0.8279219375535015, "grad_norm": 13.792403809394916, "learning_rate": 3.801060477923377e-06, "loss": 0.10716533660888672, "step": 95750 }, { "epoch": 0.8279651710750447, "grad_norm": 0.18501003608168265, "learning_rate": 3.8008641402650203e-06, "loss": 0.20724067687988282, "step": 95755 }, { "epoch": 0.828008404596588, "grad_norm": 38.78648884936868, "learning_rate": 3.8006677989131474e-06, "loss": 0.24662322998046876, "step": 95760 }, { "epoch": 0.8280516381181313, "grad_norm": 4.355212046455161, "learning_rate": 3.8004714538686657e-06, "loss": 0.44315185546875, "step": 95765 }, { "epoch": 0.8280948716396745, "grad_norm": 24.939219340700774, "learning_rate": 3.800275105132481e-06, "loss": 0.27096710205078123, "step": 95770 }, { "epoch": 0.8281381051612178, "grad_norm": 26.182711449550077, "learning_rate": 3.8000787527054982e-06, "loss": 0.5129981994628906, "step": 95775 }, { "epoch": 0.8281813386827611, "grad_norm": 8.19408986710705, "learning_rate": 3.799882396588622e-06, "loss": 0.19535560607910157, "step": 95780 }, { "epoch": 0.8282245722043043, "grad_norm": 14.979791567745332, "learning_rate": 3.7996860367827596e-06, "loss": 0.2756053924560547, "step": 95785 }, { "epoch": 0.8282678057258476, "grad_norm": 16.752970763802654, "learning_rate": 3.7994896732888153e-06, "loss": 0.452166748046875, "step": 95790 }, { "epoch": 0.8283110392473909, "grad_norm": 21.2067864983389, "learning_rate": 3.799293306107695e-06, "loss": 0.39158172607421876, "step": 95795 }, { "epoch": 0.8283542727689341, "grad_norm": 0.3583464929644535, "learning_rate": 3.799096935240306e-06, "loss": 0.09263153076171875, "step": 95800 }, { "epoch": 0.8283975062904774, "grad_norm": 2.7017841408985896, "learning_rate": 3.7989005606875515e-06, "loss": 0.1225341796875, "step": 95805 }, { "epoch": 0.8284407398120206, "grad_norm": 2.5838028935031967, "learning_rate": 3.798704182450339e-06, "loss": 0.14229736328125, "step": 95810 }, { "epoch": 0.8284839733335639, "grad_norm": 8.059198024805148, "learning_rate": 3.7985078005295723e-06, "loss": 0.07872695922851562, "step": 95815 }, { "epoch": 0.8285272068551072, "grad_norm": 2.341713371583922, "learning_rate": 3.7983114149261583e-06, "loss": 0.30028076171875, "step": 95820 }, { "epoch": 0.8285704403766504, "grad_norm": 5.676512651334515, "learning_rate": 3.7981150256410035e-06, "loss": 0.05584793090820313, "step": 95825 }, { "epoch": 0.8286136738981937, "grad_norm": 0.10875422780043757, "learning_rate": 3.7979186326750123e-06, "loss": 0.04439697265625, "step": 95830 }, { "epoch": 0.828656907419737, "grad_norm": 50.29880275958587, "learning_rate": 3.7977222360290908e-06, "loss": 0.21462631225585938, "step": 95835 }, { "epoch": 0.8287001409412802, "grad_norm": 1.1758638085705042, "learning_rate": 3.7975258357041456e-06, "loss": 0.15111923217773438, "step": 95840 }, { "epoch": 0.8287433744628235, "grad_norm": 46.959476267383245, "learning_rate": 3.7973294317010805e-06, "loss": 0.253387451171875, "step": 95845 }, { "epoch": 0.8287866079843668, "grad_norm": 7.480824118320203, "learning_rate": 3.7971330240208027e-06, "loss": 0.038469696044921876, "step": 95850 }, { "epoch": 0.82882984150591, "grad_norm": 20.331596238252583, "learning_rate": 3.7969366126642185e-06, "loss": 0.06359710693359374, "step": 95855 }, { "epoch": 0.8288730750274533, "grad_norm": 31.753623065472066, "learning_rate": 3.7967401976322318e-06, "loss": 0.2332489013671875, "step": 95860 }, { "epoch": 0.8289163085489966, "grad_norm": 0.6339120687453512, "learning_rate": 3.796543778925751e-06, "loss": 0.05875396728515625, "step": 95865 }, { "epoch": 0.8289595420705398, "grad_norm": 3.322713307562633, "learning_rate": 3.7963473565456805e-06, "loss": 0.08140411376953124, "step": 95870 }, { "epoch": 0.8290027755920831, "grad_norm": 14.012644133244423, "learning_rate": 3.796150930492926e-06, "loss": 0.13845329284667968, "step": 95875 }, { "epoch": 0.8290460091136264, "grad_norm": 3.4924342757576294, "learning_rate": 3.795954500768394e-06, "loss": 0.13249435424804687, "step": 95880 }, { "epoch": 0.8290892426351696, "grad_norm": 4.584249673636428, "learning_rate": 3.79575806737299e-06, "loss": 0.09507827758789063, "step": 95885 }, { "epoch": 0.8291324761567128, "grad_norm": 8.060292932614834, "learning_rate": 3.7955616303076204e-06, "loss": 0.1957775115966797, "step": 95890 }, { "epoch": 0.8291757096782562, "grad_norm": 33.05344634375383, "learning_rate": 3.79536518957319e-06, "loss": 0.1350006103515625, "step": 95895 }, { "epoch": 0.8292189431997994, "grad_norm": 1.8809482707415957, "learning_rate": 3.7951687451706065e-06, "loss": 0.077752685546875, "step": 95900 }, { "epoch": 0.8292621767213426, "grad_norm": 21.501829093483778, "learning_rate": 3.794972297100775e-06, "loss": 0.1554872512817383, "step": 95905 }, { "epoch": 0.829305410242886, "grad_norm": 4.720771183326066, "learning_rate": 3.7947758453646007e-06, "loss": 0.04782257080078125, "step": 95910 }, { "epoch": 0.8293486437644292, "grad_norm": 0.8740606239863339, "learning_rate": 3.7945793899629915e-06, "loss": 0.06777801513671874, "step": 95915 }, { "epoch": 0.8293918772859724, "grad_norm": 8.313401672236116, "learning_rate": 3.794382930896852e-06, "loss": 0.19737701416015624, "step": 95920 }, { "epoch": 0.8294351108075158, "grad_norm": 1.7458887724611722, "learning_rate": 3.7941864681670883e-06, "loss": 0.07686729431152343, "step": 95925 }, { "epoch": 0.829478344329059, "grad_norm": 24.942083585193274, "learning_rate": 3.793990001774606e-06, "loss": 0.2157928466796875, "step": 95930 }, { "epoch": 0.8295215778506022, "grad_norm": 36.11602373287351, "learning_rate": 3.793793531720314e-06, "loss": 0.14629974365234374, "step": 95935 }, { "epoch": 0.8295648113721455, "grad_norm": 0.6689101144684331, "learning_rate": 3.793597058005114e-06, "loss": 0.031353759765625, "step": 95940 }, { "epoch": 0.8296080448936888, "grad_norm": 4.055977329758264, "learning_rate": 3.7934005806299168e-06, "loss": 0.13619537353515626, "step": 95945 }, { "epoch": 0.829651278415232, "grad_norm": 0.12469999229765354, "learning_rate": 3.7932040995956256e-06, "loss": 0.015305709838867188, "step": 95950 }, { "epoch": 0.8296945119367753, "grad_norm": 0.6522590274819472, "learning_rate": 3.793007614903146e-06, "loss": 0.17346725463867188, "step": 95955 }, { "epoch": 0.8297377454583186, "grad_norm": 0.9829163793386321, "learning_rate": 3.792811126553386e-06, "loss": 0.2924949645996094, "step": 95960 }, { "epoch": 0.8297809789798618, "grad_norm": 54.910898218917254, "learning_rate": 3.792614634547252e-06, "loss": 0.24766845703125, "step": 95965 }, { "epoch": 0.829824212501405, "grad_norm": 7.065856713237965, "learning_rate": 3.792418138885649e-06, "loss": 0.1480224609375, "step": 95970 }, { "epoch": 0.8298674460229484, "grad_norm": 10.002986401380838, "learning_rate": 3.7922216395694824e-06, "loss": 0.12006149291992188, "step": 95975 }, { "epoch": 0.8299106795444916, "grad_norm": 20.98897998424471, "learning_rate": 3.792025136599661e-06, "loss": 0.20168075561523438, "step": 95980 }, { "epoch": 0.8299539130660348, "grad_norm": 4.737082001764916, "learning_rate": 3.7918286299770888e-06, "loss": 0.40008392333984377, "step": 95985 }, { "epoch": 0.8299971465875782, "grad_norm": 2.3533211079211176, "learning_rate": 3.7916321197026723e-06, "loss": 0.0811767578125, "step": 95990 }, { "epoch": 0.8300403801091214, "grad_norm": 18.85254342837717, "learning_rate": 3.79143560577732e-06, "loss": 0.21679420471191407, "step": 95995 }, { "epoch": 0.8300836136306646, "grad_norm": 6.71897600082717, "learning_rate": 3.791239088201936e-06, "loss": 0.25599365234375, "step": 96000 }, { "epoch": 0.830126847152208, "grad_norm": 17.394801376565066, "learning_rate": 3.791042566977427e-06, "loss": 0.344879150390625, "step": 96005 }, { "epoch": 0.8301700806737512, "grad_norm": 14.311765670246684, "learning_rate": 3.7908460421046998e-06, "loss": 0.189190673828125, "step": 96010 }, { "epoch": 0.8302133141952944, "grad_norm": 6.186646022678049, "learning_rate": 3.79064951358466e-06, "loss": 0.17374496459960936, "step": 96015 }, { "epoch": 0.8302565477168378, "grad_norm": 17.36800234570328, "learning_rate": 3.790452981418215e-06, "loss": 0.14225997924804687, "step": 96020 }, { "epoch": 0.830299781238381, "grad_norm": 1.769661375318304, "learning_rate": 3.790256445606271e-06, "loss": 0.13004608154296876, "step": 96025 }, { "epoch": 0.8303430147599242, "grad_norm": 4.408337558668798, "learning_rate": 3.790059906149734e-06, "loss": 0.434930419921875, "step": 96030 }, { "epoch": 0.8303862482814676, "grad_norm": 3.411722900059765, "learning_rate": 3.7898633630495096e-06, "loss": 0.04676074981689453, "step": 96035 }, { "epoch": 0.8304294818030108, "grad_norm": 1.0091848741682727, "learning_rate": 3.789666816306506e-06, "loss": 0.085382080078125, "step": 96040 }, { "epoch": 0.830472715324554, "grad_norm": 7.002205424621634, "learning_rate": 3.789470265921629e-06, "loss": 0.0523223876953125, "step": 96045 }, { "epoch": 0.8305159488460974, "grad_norm": 0.5506691564690136, "learning_rate": 3.7892737118957843e-06, "loss": 0.268585205078125, "step": 96050 }, { "epoch": 0.8305591823676406, "grad_norm": 9.225581548912709, "learning_rate": 3.7890771542298794e-06, "loss": 0.3017322540283203, "step": 96055 }, { "epoch": 0.8306024158891838, "grad_norm": 30.669031930977134, "learning_rate": 3.7888805929248204e-06, "loss": 0.252178955078125, "step": 96060 }, { "epoch": 0.830645649410727, "grad_norm": 5.146220681934366, "learning_rate": 3.788684027981514e-06, "loss": 0.12392082214355468, "step": 96065 }, { "epoch": 0.8306888829322704, "grad_norm": 9.093596128377772, "learning_rate": 3.7884874594008653e-06, "loss": 0.118011474609375, "step": 96070 }, { "epoch": 0.8307321164538136, "grad_norm": 10.89610757307477, "learning_rate": 3.7882908871837827e-06, "loss": 0.24433746337890624, "step": 96075 }, { "epoch": 0.8307753499753568, "grad_norm": 15.310925075879057, "learning_rate": 3.7880943113311734e-06, "loss": 0.119659423828125, "step": 96080 }, { "epoch": 0.8308185834969002, "grad_norm": 1.463517678195856, "learning_rate": 3.787897731843942e-06, "loss": 0.07136650085449218, "step": 96085 }, { "epoch": 0.8308618170184434, "grad_norm": 22.141553751869488, "learning_rate": 3.7877011487229952e-06, "loss": 0.4105194091796875, "step": 96090 }, { "epoch": 0.8309050505399866, "grad_norm": 1.731879702255007, "learning_rate": 3.7875045619692417e-06, "loss": 0.07343864440917969, "step": 96095 }, { "epoch": 0.83094828406153, "grad_norm": 0.6369167114372745, "learning_rate": 3.7873079715835853e-06, "loss": 0.06705818176269532, "step": 96100 }, { "epoch": 0.8309915175830732, "grad_norm": 31.11690484125819, "learning_rate": 3.7871113775669346e-06, "loss": 0.12911567687988282, "step": 96105 }, { "epoch": 0.8310347511046164, "grad_norm": 0.9265997974281043, "learning_rate": 3.786914779920196e-06, "loss": 0.10887680053710938, "step": 96110 }, { "epoch": 0.8310779846261598, "grad_norm": 2.492354978894703, "learning_rate": 3.7867181786442755e-06, "loss": 0.02281646728515625, "step": 96115 }, { "epoch": 0.831121218147703, "grad_norm": 0.7206825350690201, "learning_rate": 3.7865215737400805e-06, "loss": 0.2692840576171875, "step": 96120 }, { "epoch": 0.8311644516692462, "grad_norm": 3.5738850225252623, "learning_rate": 3.7863249652085187e-06, "loss": 0.08875503540039062, "step": 96125 }, { "epoch": 0.8312076851907896, "grad_norm": 0.08618169861087935, "learning_rate": 3.786128353050493e-06, "loss": 0.3274364471435547, "step": 96130 }, { "epoch": 0.8312509187123328, "grad_norm": 1.0207522693353497, "learning_rate": 3.7859317372669148e-06, "loss": 0.14720993041992186, "step": 96135 }, { "epoch": 0.831294152233876, "grad_norm": 0.39667421853266893, "learning_rate": 3.7857351178586884e-06, "loss": 0.1299774169921875, "step": 96140 }, { "epoch": 0.8313373857554193, "grad_norm": 0.7017283987336739, "learning_rate": 3.7855384948267216e-06, "loss": 0.0710601806640625, "step": 96145 }, { "epoch": 0.8313806192769626, "grad_norm": 0.8273723972302289, "learning_rate": 3.7853418681719196e-06, "loss": 0.03449935913085937, "step": 96150 }, { "epoch": 0.8314238527985058, "grad_norm": 64.49954643009299, "learning_rate": 3.7851452378951907e-06, "loss": 0.15400238037109376, "step": 96155 }, { "epoch": 0.8314670863200491, "grad_norm": 0.3744248176956144, "learning_rate": 3.7849486039974416e-06, "loss": 0.09820709228515626, "step": 96160 }, { "epoch": 0.8315103198415924, "grad_norm": 7.289430355657631, "learning_rate": 3.7847519664795792e-06, "loss": 0.04309158325195313, "step": 96165 }, { "epoch": 0.8315535533631356, "grad_norm": 26.350276314421883, "learning_rate": 3.7845553253425094e-06, "loss": 0.276690673828125, "step": 96170 }, { "epoch": 0.8315967868846789, "grad_norm": 0.7935960818674688, "learning_rate": 3.7843586805871405e-06, "loss": 0.10256195068359375, "step": 96175 }, { "epoch": 0.8316400204062222, "grad_norm": 1.5411795746191583, "learning_rate": 3.784162032214378e-06, "loss": 0.031039047241210937, "step": 96180 }, { "epoch": 0.8316832539277654, "grad_norm": 2.640493222636719, "learning_rate": 3.7839653802251294e-06, "loss": 0.42412490844726564, "step": 96185 }, { "epoch": 0.8317264874493087, "grad_norm": 4.907609243644374, "learning_rate": 3.7837687246203023e-06, "loss": 0.19598159790039063, "step": 96190 }, { "epoch": 0.831769720970852, "grad_norm": 0.6072035148938351, "learning_rate": 3.7835720654008027e-06, "loss": 0.0474945068359375, "step": 96195 }, { "epoch": 0.8318129544923952, "grad_norm": 18.14670239963452, "learning_rate": 3.7833754025675383e-06, "loss": 0.3537689208984375, "step": 96200 }, { "epoch": 0.8318561880139385, "grad_norm": 0.6611847761683082, "learning_rate": 3.783178736121416e-06, "loss": 0.1934814453125, "step": 96205 }, { "epoch": 0.8318994215354818, "grad_norm": 5.103645047313432, "learning_rate": 3.7829820660633415e-06, "loss": 0.16406688690185547, "step": 96210 }, { "epoch": 0.831942655057025, "grad_norm": 2.1113274987865682, "learning_rate": 3.7827853923942235e-06, "loss": 0.082568359375, "step": 96215 }, { "epoch": 0.8319858885785683, "grad_norm": 0.9042990787388023, "learning_rate": 3.782588715114969e-06, "loss": 0.23360671997070312, "step": 96220 }, { "epoch": 0.8320291221001116, "grad_norm": 24.287174303849984, "learning_rate": 3.782392034226483e-06, "loss": 0.37569198608398435, "step": 96225 }, { "epoch": 0.8320723556216548, "grad_norm": 0.34383237944385076, "learning_rate": 3.7821953497296756e-06, "loss": 0.15826797485351562, "step": 96230 }, { "epoch": 0.832115589143198, "grad_norm": 1.2489083351451153, "learning_rate": 3.781998661625452e-06, "loss": 0.12046318054199219, "step": 96235 }, { "epoch": 0.8321588226647413, "grad_norm": 3.341478909689922, "learning_rate": 3.781801969914719e-06, "loss": 0.1612762451171875, "step": 96240 }, { "epoch": 0.8322020561862846, "grad_norm": 14.937519219549857, "learning_rate": 3.7816052745983845e-06, "loss": 0.155120849609375, "step": 96245 }, { "epoch": 0.8322452897078279, "grad_norm": 0.8589340148697633, "learning_rate": 3.7814085756773564e-06, "loss": 0.112255859375, "step": 96250 }, { "epoch": 0.8322885232293711, "grad_norm": 6.508806580514579, "learning_rate": 3.7812118731525406e-06, "loss": 0.13814773559570312, "step": 96255 }, { "epoch": 0.8323317567509144, "grad_norm": 2.029215038979377, "learning_rate": 3.7810151670248447e-06, "loss": 0.1558765411376953, "step": 96260 }, { "epoch": 0.8323749902724576, "grad_norm": 3.9398065885242506, "learning_rate": 3.7808184572951755e-06, "loss": 0.05290431976318359, "step": 96265 }, { "epoch": 0.8324182237940009, "grad_norm": 0.4483069384782662, "learning_rate": 3.780621743964441e-06, "loss": 0.04987659454345703, "step": 96270 }, { "epoch": 0.8324614573155442, "grad_norm": 2.9891230326330933, "learning_rate": 3.7804250270335477e-06, "loss": 0.15023193359375, "step": 96275 }, { "epoch": 0.8325046908370874, "grad_norm": 1.188968733954632, "learning_rate": 3.7802283065034033e-06, "loss": 0.015967559814453126, "step": 96280 }, { "epoch": 0.8325479243586307, "grad_norm": 24.16069470690506, "learning_rate": 3.780031582374915e-06, "loss": 0.11925201416015625, "step": 96285 }, { "epoch": 0.832591157880174, "grad_norm": 4.711835995620194, "learning_rate": 3.7798348546489893e-06, "loss": 0.105902099609375, "step": 96290 }, { "epoch": 0.8326343914017172, "grad_norm": 1.92435376115014, "learning_rate": 3.779638123326535e-06, "loss": 0.05945587158203125, "step": 96295 }, { "epoch": 0.8326776249232605, "grad_norm": 0.09780378902001959, "learning_rate": 3.779441388408458e-06, "loss": 0.2457141876220703, "step": 96300 }, { "epoch": 0.8327208584448038, "grad_norm": 5.1398596994223, "learning_rate": 3.7792446498956664e-06, "loss": 0.0967864990234375, "step": 96305 }, { "epoch": 0.832764091966347, "grad_norm": 4.5001784894998025, "learning_rate": 3.7790479077890675e-06, "loss": 0.06324310302734375, "step": 96310 }, { "epoch": 0.8328073254878903, "grad_norm": 22.418877872584925, "learning_rate": 3.7788511620895687e-06, "loss": 0.309564208984375, "step": 96315 }, { "epoch": 0.8328505590094335, "grad_norm": 5.435888817360604, "learning_rate": 3.7786544127980767e-06, "loss": 0.2827056884765625, "step": 96320 }, { "epoch": 0.8328937925309768, "grad_norm": 7.114688857418344, "learning_rate": 3.778457659915499e-06, "loss": 0.090045166015625, "step": 96325 }, { "epoch": 0.8329370260525201, "grad_norm": 4.367512195471608, "learning_rate": 3.778260903442744e-06, "loss": 0.186822509765625, "step": 96330 }, { "epoch": 0.8329802595740633, "grad_norm": 9.934758785650052, "learning_rate": 3.778064143380719e-06, "loss": 0.08712005615234375, "step": 96335 }, { "epoch": 0.8330234930956066, "grad_norm": 13.324781390404791, "learning_rate": 3.7778673797303294e-06, "loss": 0.07202663421630859, "step": 96340 }, { "epoch": 0.8330667266171499, "grad_norm": 14.753412168628019, "learning_rate": 3.777670612492485e-06, "loss": 0.17009353637695312, "step": 96345 }, { "epoch": 0.8331099601386931, "grad_norm": 22.374371379638085, "learning_rate": 3.777473841668093e-06, "loss": 0.41983642578125, "step": 96350 }, { "epoch": 0.8331531936602364, "grad_norm": 16.052340803458517, "learning_rate": 3.777277067258059e-06, "loss": 0.16586036682128907, "step": 96355 }, { "epoch": 0.8331964271817797, "grad_norm": 0.07840267804080747, "learning_rate": 3.7770802892632923e-06, "loss": 0.11456642150878907, "step": 96360 }, { "epoch": 0.8332396607033229, "grad_norm": 5.25089767902959, "learning_rate": 3.776883507684701e-06, "loss": 0.0684967041015625, "step": 96365 }, { "epoch": 0.8332828942248662, "grad_norm": 5.291145383317298, "learning_rate": 3.7766867225231903e-06, "loss": 0.0930511474609375, "step": 96370 }, { "epoch": 0.8333261277464095, "grad_norm": 6.746620757103072, "learning_rate": 3.77648993377967e-06, "loss": 0.14512100219726562, "step": 96375 }, { "epoch": 0.8333693612679527, "grad_norm": 19.743790429272558, "learning_rate": 3.7762931414550457e-06, "loss": 0.1988882064819336, "step": 96380 }, { "epoch": 0.833412594789496, "grad_norm": 16.85726835350797, "learning_rate": 3.776096345550227e-06, "loss": 0.33489990234375, "step": 96385 }, { "epoch": 0.8334558283110393, "grad_norm": 1.2153221131355503, "learning_rate": 3.7758995460661206e-06, "loss": 0.062349700927734376, "step": 96390 }, { "epoch": 0.8334990618325825, "grad_norm": 15.505502818058133, "learning_rate": 3.775702743003634e-06, "loss": 0.12258453369140625, "step": 96395 }, { "epoch": 0.8335422953541258, "grad_norm": 12.502560451032704, "learning_rate": 3.775505936363674e-06, "loss": 0.22769527435302733, "step": 96400 }, { "epoch": 0.8335855288756691, "grad_norm": 34.6096234408278, "learning_rate": 3.77530912614715e-06, "loss": 0.1627593994140625, "step": 96405 }, { "epoch": 0.8336287623972123, "grad_norm": 0.6344200219918071, "learning_rate": 3.775112312354968e-06, "loss": 0.016579246520996092, "step": 96410 }, { "epoch": 0.8336719959187555, "grad_norm": 14.355816932664865, "learning_rate": 3.774915494988038e-06, "loss": 0.2018157958984375, "step": 96415 }, { "epoch": 0.8337152294402989, "grad_norm": 5.600967463024027, "learning_rate": 3.7747186740472647e-06, "loss": 0.03474712371826172, "step": 96420 }, { "epoch": 0.8337584629618421, "grad_norm": 11.46847112890929, "learning_rate": 3.774521849533558e-06, "loss": 0.21033134460449218, "step": 96425 }, { "epoch": 0.8338016964833853, "grad_norm": 10.566190432880962, "learning_rate": 3.774325021447825e-06, "loss": 0.11905059814453126, "step": 96430 }, { "epoch": 0.8338449300049287, "grad_norm": 23.61415062480093, "learning_rate": 3.7741281897909734e-06, "loss": 0.22647705078125, "step": 96435 }, { "epoch": 0.8338881635264719, "grad_norm": 1.1059354059087416, "learning_rate": 3.7739313545639105e-06, "loss": 0.0773956298828125, "step": 96440 }, { "epoch": 0.8339313970480151, "grad_norm": 16.192585150689094, "learning_rate": 3.7737345157675448e-06, "loss": 0.16335830688476563, "step": 96445 }, { "epoch": 0.8339746305695585, "grad_norm": 8.200912282919838, "learning_rate": 3.773537673402784e-06, "loss": 0.203143310546875, "step": 96450 }, { "epoch": 0.8340178640911017, "grad_norm": 7.3396712394385, "learning_rate": 3.7733408274705365e-06, "loss": 0.20358772277832032, "step": 96455 }, { "epoch": 0.8340610976126449, "grad_norm": 1.2139952658339215, "learning_rate": 3.773143977971709e-06, "loss": 0.2485626220703125, "step": 96460 }, { "epoch": 0.8341043311341882, "grad_norm": 48.587727626321914, "learning_rate": 3.7729471249072093e-06, "loss": 0.29858856201171874, "step": 96465 }, { "epoch": 0.8341475646557315, "grad_norm": 16.19739753553969, "learning_rate": 3.772750268277946e-06, "loss": 0.08494338989257813, "step": 96470 }, { "epoch": 0.8341907981772747, "grad_norm": 0.631336662015237, "learning_rate": 3.772553408084827e-06, "loss": 0.1323974609375, "step": 96475 }, { "epoch": 0.834234031698818, "grad_norm": 0.530388328853129, "learning_rate": 3.772356544328759e-06, "loss": 0.198504638671875, "step": 96480 }, { "epoch": 0.8342772652203613, "grad_norm": 2.974008775076336, "learning_rate": 3.772159677010652e-06, "loss": 0.19759864807128907, "step": 96485 }, { "epoch": 0.8343204987419045, "grad_norm": 0.5166576635026691, "learning_rate": 3.7719628061314125e-06, "loss": 0.1967041015625, "step": 96490 }, { "epoch": 0.8343637322634477, "grad_norm": 43.201955668351154, "learning_rate": 3.7717659316919482e-06, "loss": 0.28394775390625, "step": 96495 }, { "epoch": 0.8344069657849911, "grad_norm": 2.985207192733869, "learning_rate": 3.771569053693168e-06, "loss": 0.04622955322265625, "step": 96500 }, { "epoch": 0.8344501993065343, "grad_norm": 1.6788762392866465, "learning_rate": 3.77137217213598e-06, "loss": 0.1456085205078125, "step": 96505 }, { "epoch": 0.8344934328280775, "grad_norm": 12.321532921454354, "learning_rate": 3.7711752870212917e-06, "loss": 0.07692184448242187, "step": 96510 }, { "epoch": 0.8345366663496209, "grad_norm": 0.969878587423087, "learning_rate": 3.7709783983500103e-06, "loss": 0.02071533203125, "step": 96515 }, { "epoch": 0.8345798998711641, "grad_norm": 14.567172374411133, "learning_rate": 3.770781506123045e-06, "loss": 0.0535614013671875, "step": 96520 }, { "epoch": 0.8346231333927073, "grad_norm": 0.1329765071213829, "learning_rate": 3.7705846103413037e-06, "loss": 0.08035449981689453, "step": 96525 }, { "epoch": 0.8346663669142507, "grad_norm": 7.845059667022285, "learning_rate": 3.7703877110056937e-06, "loss": 0.085321044921875, "step": 96530 }, { "epoch": 0.8347096004357939, "grad_norm": 29.91435216428069, "learning_rate": 3.7701908081171244e-06, "loss": 0.2470458984375, "step": 96535 }, { "epoch": 0.8347528339573371, "grad_norm": 16.77354438369668, "learning_rate": 3.769993901676503e-06, "loss": 0.1400177001953125, "step": 96540 }, { "epoch": 0.8347960674788805, "grad_norm": 7.728812514058776, "learning_rate": 3.769796991684737e-06, "loss": 0.0447174072265625, "step": 96545 }, { "epoch": 0.8348393010004237, "grad_norm": 1.7781785126052738, "learning_rate": 3.769600078142736e-06, "loss": 0.0223052978515625, "step": 96550 }, { "epoch": 0.8348825345219669, "grad_norm": 5.960585109152105, "learning_rate": 3.769403161051407e-06, "loss": 0.04146499633789062, "step": 96555 }, { "epoch": 0.8349257680435103, "grad_norm": 11.055590106696174, "learning_rate": 3.769206240411659e-06, "loss": 0.3443931579589844, "step": 96560 }, { "epoch": 0.8349690015650535, "grad_norm": 8.88521319012635, "learning_rate": 3.7690093162244e-06, "loss": 0.0682952880859375, "step": 96565 }, { "epoch": 0.8350122350865967, "grad_norm": 32.703224991980186, "learning_rate": 3.7688123884905382e-06, "loss": 0.5269630432128907, "step": 96570 }, { "epoch": 0.8350554686081401, "grad_norm": 8.135248682032332, "learning_rate": 3.76861545721098e-06, "loss": 0.0822540283203125, "step": 96575 }, { "epoch": 0.8350987021296833, "grad_norm": 0.7302916969094099, "learning_rate": 3.7684185223866357e-06, "loss": 0.16744842529296874, "step": 96580 }, { "epoch": 0.8351419356512265, "grad_norm": 7.388387873455563, "learning_rate": 3.7682215840184146e-06, "loss": 0.28488502502441404, "step": 96585 }, { "epoch": 0.8351851691727697, "grad_norm": 6.875567052551374, "learning_rate": 3.768024642107223e-06, "loss": 0.09401626586914062, "step": 96590 }, { "epoch": 0.8352284026943131, "grad_norm": 3.8274212061702504, "learning_rate": 3.7678276966539686e-06, "loss": 0.088140869140625, "step": 96595 }, { "epoch": 0.8352716362158563, "grad_norm": 2.5627269821588055, "learning_rate": 3.7676307476595612e-06, "loss": 0.18732452392578125, "step": 96600 }, { "epoch": 0.8353148697373995, "grad_norm": 7.005477458467124, "learning_rate": 3.7674337951249086e-06, "loss": 0.05909423828125, "step": 96605 }, { "epoch": 0.8353581032589429, "grad_norm": 10.764476129696222, "learning_rate": 3.7672368390509186e-06, "loss": 0.112701416015625, "step": 96610 }, { "epoch": 0.8354013367804861, "grad_norm": 24.71465113670981, "learning_rate": 3.767039879438501e-06, "loss": 0.22211723327636718, "step": 96615 }, { "epoch": 0.8354445703020293, "grad_norm": 1.4371465833411572, "learning_rate": 3.7668429162885637e-06, "loss": 0.28282470703125, "step": 96620 }, { "epoch": 0.8354878038235727, "grad_norm": 40.30444372255531, "learning_rate": 3.7666459496020136e-06, "loss": 0.33711090087890627, "step": 96625 }, { "epoch": 0.8355310373451159, "grad_norm": 18.50524114322925, "learning_rate": 3.7664489793797604e-06, "loss": 0.3557456970214844, "step": 96630 }, { "epoch": 0.8355742708666591, "grad_norm": 26.848840720389916, "learning_rate": 3.766252005622712e-06, "loss": 0.5610252380371094, "step": 96635 }, { "epoch": 0.8356175043882025, "grad_norm": 1.8843714908949614, "learning_rate": 3.766055028331777e-06, "loss": 0.07780647277832031, "step": 96640 }, { "epoch": 0.8356607379097457, "grad_norm": 0.4253193632559754, "learning_rate": 3.765858047507864e-06, "loss": 0.08204193115234375, "step": 96645 }, { "epoch": 0.8357039714312889, "grad_norm": 3.4623397182733457, "learning_rate": 3.7656610631518824e-06, "loss": 0.17771835327148439, "step": 96650 }, { "epoch": 0.8357472049528323, "grad_norm": 11.244218600187821, "learning_rate": 3.7654640752647385e-06, "loss": 0.0807861328125, "step": 96655 }, { "epoch": 0.8357904384743755, "grad_norm": 0.3429392300101963, "learning_rate": 3.765267083847342e-06, "loss": 0.09940643310546875, "step": 96660 }, { "epoch": 0.8358336719959187, "grad_norm": 1.278174276021209, "learning_rate": 3.765070088900602e-06, "loss": 0.05287017822265625, "step": 96665 }, { "epoch": 0.835876905517462, "grad_norm": 7.925569702958283, "learning_rate": 3.764873090425426e-06, "loss": 0.10093116760253906, "step": 96670 }, { "epoch": 0.8359201390390053, "grad_norm": 4.625623310637544, "learning_rate": 3.764676088422722e-06, "loss": 0.15621490478515626, "step": 96675 }, { "epoch": 0.8359633725605485, "grad_norm": 7.042751604245016, "learning_rate": 3.7644790828934002e-06, "loss": 0.07993202209472657, "step": 96680 }, { "epoch": 0.8360066060820918, "grad_norm": 0.22302958416475507, "learning_rate": 3.7642820738383693e-06, "loss": 0.1370086669921875, "step": 96685 }, { "epoch": 0.8360498396036351, "grad_norm": 9.074541879323192, "learning_rate": 3.7640850612585352e-06, "loss": 0.1230560302734375, "step": 96690 }, { "epoch": 0.8360930731251783, "grad_norm": 6.6606128150504205, "learning_rate": 3.7638880451548095e-06, "loss": 0.0641754150390625, "step": 96695 }, { "epoch": 0.8361363066467216, "grad_norm": 21.284862934503472, "learning_rate": 3.7636910255280996e-06, "loss": 0.16217422485351562, "step": 96700 }, { "epoch": 0.8361795401682649, "grad_norm": 33.475268943437, "learning_rate": 3.7634940023793137e-06, "loss": 0.1026336669921875, "step": 96705 }, { "epoch": 0.8362227736898081, "grad_norm": 2.1737876766824225, "learning_rate": 3.7632969757093615e-06, "loss": 0.1335845947265625, "step": 96710 }, { "epoch": 0.8362660072113514, "grad_norm": 24.355542876517028, "learning_rate": 3.7630999455191513e-06, "loss": 0.18673934936523437, "step": 96715 }, { "epoch": 0.8363092407328947, "grad_norm": 1.280131845275192, "learning_rate": 3.76290291180959e-06, "loss": 0.334613037109375, "step": 96720 }, { "epoch": 0.8363524742544379, "grad_norm": 0.49907998933529946, "learning_rate": 3.7627058745815892e-06, "loss": 0.15419769287109375, "step": 96725 }, { "epoch": 0.8363957077759812, "grad_norm": 5.1831349005657605, "learning_rate": 3.762508833836056e-06, "loss": 0.04071502685546875, "step": 96730 }, { "epoch": 0.8364389412975245, "grad_norm": 18.136601400776993, "learning_rate": 3.7623117895738994e-06, "loss": 0.2180694580078125, "step": 96735 }, { "epoch": 0.8364821748190677, "grad_norm": 0.7132071368767198, "learning_rate": 3.762114741796029e-06, "loss": 0.10902862548828125, "step": 96740 }, { "epoch": 0.836525408340611, "grad_norm": 4.506569378104345, "learning_rate": 3.7619176905033522e-06, "loss": 0.0679901123046875, "step": 96745 }, { "epoch": 0.8365686418621543, "grad_norm": 3.6250408374927483, "learning_rate": 3.7617206356967777e-06, "loss": 0.07197914123535157, "step": 96750 }, { "epoch": 0.8366118753836975, "grad_norm": 27.53057007052387, "learning_rate": 3.761523577377215e-06, "loss": 0.3568023681640625, "step": 96755 }, { "epoch": 0.8366551089052408, "grad_norm": 6.107564609230118, "learning_rate": 3.7613265155455735e-06, "loss": 0.092327880859375, "step": 96760 }, { "epoch": 0.836698342426784, "grad_norm": 5.145738195492549, "learning_rate": 3.7611294502027612e-06, "loss": 0.03828353881835937, "step": 96765 }, { "epoch": 0.8367415759483273, "grad_norm": 0.9828319600860292, "learning_rate": 3.7609323813496864e-06, "loss": 0.05058631896972656, "step": 96770 }, { "epoch": 0.8367848094698705, "grad_norm": 4.365291558109639, "learning_rate": 3.7607353089872597e-06, "loss": 0.3606842041015625, "step": 96775 }, { "epoch": 0.8368280429914138, "grad_norm": 10.123545131857174, "learning_rate": 3.7605382331163886e-06, "loss": 0.10597381591796876, "step": 96780 }, { "epoch": 0.8368712765129571, "grad_norm": 0.13551469191204682, "learning_rate": 3.7603411537379824e-06, "loss": 0.07076416015625, "step": 96785 }, { "epoch": 0.8369145100345003, "grad_norm": 15.566302481019386, "learning_rate": 3.76014407085295e-06, "loss": 0.04577789306640625, "step": 96790 }, { "epoch": 0.8369577435560436, "grad_norm": 1.9973017947921654, "learning_rate": 3.7599469844622004e-06, "loss": 0.10069503784179687, "step": 96795 }, { "epoch": 0.8370009770775869, "grad_norm": 21.376581519112662, "learning_rate": 3.759749894566642e-06, "loss": 0.1458404541015625, "step": 96800 }, { "epoch": 0.8370442105991301, "grad_norm": 4.374426852306407, "learning_rate": 3.759552801167185e-06, "loss": 0.058624267578125, "step": 96805 }, { "epoch": 0.8370874441206734, "grad_norm": 19.117575265571688, "learning_rate": 3.7593557042647365e-06, "loss": 0.109307861328125, "step": 96810 }, { "epoch": 0.8371306776422167, "grad_norm": 1.3016489570138965, "learning_rate": 3.759158603860207e-06, "loss": 0.0800018310546875, "step": 96815 }, { "epoch": 0.8371739111637599, "grad_norm": 6.700816185545996, "learning_rate": 3.7589614999545063e-06, "loss": 0.03684654235839844, "step": 96820 }, { "epoch": 0.8372171446853032, "grad_norm": 2.1771557367847096, "learning_rate": 3.758764392548541e-06, "loss": 0.327447509765625, "step": 96825 }, { "epoch": 0.8372603782068465, "grad_norm": 0.13171302557266631, "learning_rate": 3.758567281643222e-06, "loss": 0.08410873413085937, "step": 96830 }, { "epoch": 0.8373036117283897, "grad_norm": 4.350422309157536, "learning_rate": 3.7583701672394564e-06, "loss": 0.26356201171875, "step": 96835 }, { "epoch": 0.837346845249933, "grad_norm": 14.890589400057129, "learning_rate": 3.758173049338156e-06, "loss": 0.10239639282226562, "step": 96840 }, { "epoch": 0.8373900787714762, "grad_norm": 1.2077706241515698, "learning_rate": 3.7579759279402285e-06, "loss": 0.08441085815429687, "step": 96845 }, { "epoch": 0.8374333122930195, "grad_norm": 13.770829715156891, "learning_rate": 3.757778803046582e-06, "loss": 0.13451385498046875, "step": 96850 }, { "epoch": 0.8374765458145628, "grad_norm": 12.975838427820852, "learning_rate": 3.757581674658128e-06, "loss": 0.09188385009765625, "step": 96855 }, { "epoch": 0.837519779336106, "grad_norm": 4.059561000074339, "learning_rate": 3.7573845427757728e-06, "loss": 0.227777099609375, "step": 96860 }, { "epoch": 0.8375630128576493, "grad_norm": 9.270539976275375, "learning_rate": 3.7571874074004276e-06, "loss": 0.2239992141723633, "step": 96865 }, { "epoch": 0.8376062463791926, "grad_norm": 99.43027265055908, "learning_rate": 3.756990268533001e-06, "loss": 0.31853790283203126, "step": 96870 }, { "epoch": 0.8376494799007358, "grad_norm": 15.199229310516184, "learning_rate": 3.756793126174403e-06, "loss": 0.2715118408203125, "step": 96875 }, { "epoch": 0.8376927134222791, "grad_norm": 5.875690719434381, "learning_rate": 3.7565959803255406e-06, "loss": 0.1002288818359375, "step": 96880 }, { "epoch": 0.8377359469438224, "grad_norm": 4.409915989915002, "learning_rate": 3.756398830987325e-06, "loss": 0.048181915283203126, "step": 96885 }, { "epoch": 0.8377791804653656, "grad_norm": 34.57124348372097, "learning_rate": 3.7562016781606644e-06, "loss": 0.32738304138183594, "step": 96890 }, { "epoch": 0.8378224139869089, "grad_norm": 1.4212322222329745, "learning_rate": 3.7560045218464687e-06, "loss": 0.0506256103515625, "step": 96895 }, { "epoch": 0.8378656475084522, "grad_norm": 2.7199389692170324, "learning_rate": 3.7558073620456474e-06, "loss": 0.1325897216796875, "step": 96900 }, { "epoch": 0.8379088810299954, "grad_norm": 55.2226258316418, "learning_rate": 3.7556101987591093e-06, "loss": 0.36460418701171876, "step": 96905 }, { "epoch": 0.8379521145515387, "grad_norm": 1.7803735784297545, "learning_rate": 3.7554130319877633e-06, "loss": 0.18291015625, "step": 96910 }, { "epoch": 0.837995348073082, "grad_norm": 3.254725027011468, "learning_rate": 3.7552158617325195e-06, "loss": 0.04987030029296875, "step": 96915 }, { "epoch": 0.8380385815946252, "grad_norm": 0.7746524348943964, "learning_rate": 3.7550186879942864e-06, "loss": 0.1118011474609375, "step": 96920 }, { "epoch": 0.8380818151161685, "grad_norm": 6.510313953516108, "learning_rate": 3.7548215107739737e-06, "loss": 0.037506103515625, "step": 96925 }, { "epoch": 0.8381250486377118, "grad_norm": 0.8683714490482307, "learning_rate": 3.7546243300724916e-06, "loss": 0.07079620361328125, "step": 96930 }, { "epoch": 0.838168282159255, "grad_norm": 6.334286184724993, "learning_rate": 3.754427145890749e-06, "loss": 0.19136199951171876, "step": 96935 }, { "epoch": 0.8382115156807982, "grad_norm": 38.144929932127496, "learning_rate": 3.754229958229655e-06, "loss": 0.5348594665527344, "step": 96940 }, { "epoch": 0.8382547492023416, "grad_norm": 18.151002517945415, "learning_rate": 3.7540327670901183e-06, "loss": 0.13996124267578125, "step": 96945 }, { "epoch": 0.8382979827238848, "grad_norm": 0.3486914244051513, "learning_rate": 3.7538355724730487e-06, "loss": 0.16181869506835939, "step": 96950 }, { "epoch": 0.838341216245428, "grad_norm": 5.340127078596424, "learning_rate": 3.7536383743793573e-06, "loss": 0.08820343017578125, "step": 96955 }, { "epoch": 0.8383844497669714, "grad_norm": 16.400621281295233, "learning_rate": 3.7534411728099513e-06, "loss": 0.0894989013671875, "step": 96960 }, { "epoch": 0.8384276832885146, "grad_norm": 0.5315613274192642, "learning_rate": 3.7532439677657418e-06, "loss": 0.14397964477539063, "step": 96965 }, { "epoch": 0.8384709168100578, "grad_norm": 1.09540220745323, "learning_rate": 3.7530467592476383e-06, "loss": 0.06859512329101562, "step": 96970 }, { "epoch": 0.8385141503316011, "grad_norm": 20.786411815614304, "learning_rate": 3.7528495472565482e-06, "loss": 0.12938995361328126, "step": 96975 }, { "epoch": 0.8385573838531444, "grad_norm": 0.37799665352434175, "learning_rate": 3.7526523317933832e-06, "loss": 0.037133979797363284, "step": 96980 }, { "epoch": 0.8386006173746876, "grad_norm": 2.9734250495771812, "learning_rate": 3.7524551128590522e-06, "loss": 0.024297332763671874, "step": 96985 }, { "epoch": 0.838643850896231, "grad_norm": 10.748084111644232, "learning_rate": 3.7522578904544645e-06, "loss": 0.21159286499023439, "step": 96990 }, { "epoch": 0.8386870844177742, "grad_norm": 3.71231670953113, "learning_rate": 3.752060664580531e-06, "loss": 0.2312957763671875, "step": 96995 }, { "epoch": 0.8387303179393174, "grad_norm": 26.64742443631219, "learning_rate": 3.751863435238158e-06, "loss": 0.15435333251953126, "step": 97000 }, { "epoch": 0.8387735514608607, "grad_norm": 3.4988251930481695, "learning_rate": 3.7516662024282586e-06, "loss": 0.053905487060546875, "step": 97005 }, { "epoch": 0.838816784982404, "grad_norm": 13.686128856112061, "learning_rate": 3.7514689661517416e-06, "loss": 0.104058837890625, "step": 97010 }, { "epoch": 0.8388600185039472, "grad_norm": 2.2181608917154265, "learning_rate": 3.7512717264095156e-06, "loss": 0.192828369140625, "step": 97015 }, { "epoch": 0.8389032520254904, "grad_norm": 0.775623494634863, "learning_rate": 3.751074483202491e-06, "loss": 0.015455245971679688, "step": 97020 }, { "epoch": 0.8389464855470338, "grad_norm": 26.03131898087769, "learning_rate": 3.750877236531577e-06, "loss": 0.0942230224609375, "step": 97025 }, { "epoch": 0.838989719068577, "grad_norm": 5.79178646624585, "learning_rate": 3.7506799863976828e-06, "loss": 0.056406402587890626, "step": 97030 }, { "epoch": 0.8390329525901202, "grad_norm": 20.79554201121651, "learning_rate": 3.75048273280172e-06, "loss": 0.14336585998535156, "step": 97035 }, { "epoch": 0.8390761861116636, "grad_norm": 0.3605205760875404, "learning_rate": 3.7502854757445964e-06, "loss": 0.1055633544921875, "step": 97040 }, { "epoch": 0.8391194196332068, "grad_norm": 6.603695876065844, "learning_rate": 3.750088215227223e-06, "loss": 0.034351348876953125, "step": 97045 }, { "epoch": 0.83916265315475, "grad_norm": 8.060193317357013, "learning_rate": 3.749890951250509e-06, "loss": 0.06771926879882813, "step": 97050 }, { "epoch": 0.8392058866762934, "grad_norm": 1.052389464241339, "learning_rate": 3.749693683815364e-06, "loss": 0.11895103454589843, "step": 97055 }, { "epoch": 0.8392491201978366, "grad_norm": 4.51403741617157, "learning_rate": 3.7494964129226975e-06, "loss": 0.0807586669921875, "step": 97060 }, { "epoch": 0.8392923537193798, "grad_norm": 0.34338087408274437, "learning_rate": 3.749299138573421e-06, "loss": 0.2620212554931641, "step": 97065 }, { "epoch": 0.8393355872409232, "grad_norm": 9.01051944190351, "learning_rate": 3.7491018607684416e-06, "loss": 0.25645599365234373, "step": 97070 }, { "epoch": 0.8393788207624664, "grad_norm": 2.130163542846221, "learning_rate": 3.7489045795086717e-06, "loss": 0.178033447265625, "step": 97075 }, { "epoch": 0.8394220542840096, "grad_norm": 4.317652739915416, "learning_rate": 3.74870729479502e-06, "loss": 0.11461181640625, "step": 97080 }, { "epoch": 0.839465287805553, "grad_norm": 27.176094081326454, "learning_rate": 3.7485100066283954e-06, "loss": 0.1689910888671875, "step": 97085 }, { "epoch": 0.8395085213270962, "grad_norm": 9.625667120764971, "learning_rate": 3.7483127150097094e-06, "loss": 0.13816604614257813, "step": 97090 }, { "epoch": 0.8395517548486394, "grad_norm": 0.6082527990034695, "learning_rate": 3.748115419939872e-06, "loss": 0.15821094512939454, "step": 97095 }, { "epoch": 0.8395949883701828, "grad_norm": 3.758135212776918, "learning_rate": 3.747918121419791e-06, "loss": 0.17370758056640626, "step": 97100 }, { "epoch": 0.839638221891726, "grad_norm": 2.3748405651894373, "learning_rate": 3.747720819450379e-06, "loss": 0.060260009765625, "step": 97105 }, { "epoch": 0.8396814554132692, "grad_norm": 13.123558673258922, "learning_rate": 3.747523514032544e-06, "loss": 0.3103485107421875, "step": 97110 }, { "epoch": 0.8397246889348124, "grad_norm": 3.6524310099750292, "learning_rate": 3.7473262051671964e-06, "loss": 0.04724464416503906, "step": 97115 }, { "epoch": 0.8397679224563558, "grad_norm": 35.229027961253244, "learning_rate": 3.747128892855247e-06, "loss": 0.42387542724609373, "step": 97120 }, { "epoch": 0.839811155977899, "grad_norm": 41.3155841664801, "learning_rate": 3.746931577097605e-06, "loss": 0.15755157470703124, "step": 97125 }, { "epoch": 0.8398543894994422, "grad_norm": 0.06557302399955801, "learning_rate": 3.7467342578951805e-06, "loss": 0.06402626037597656, "step": 97130 }, { "epoch": 0.8398976230209856, "grad_norm": 6.668309459863062, "learning_rate": 3.746536935248883e-06, "loss": 0.2833160400390625, "step": 97135 }, { "epoch": 0.8399408565425288, "grad_norm": 1.7233241566228679, "learning_rate": 3.7463396091596242e-06, "loss": 0.01637420654296875, "step": 97140 }, { "epoch": 0.839984090064072, "grad_norm": 7.167561436350774, "learning_rate": 3.7461422796283123e-06, "loss": 0.190899658203125, "step": 97145 }, { "epoch": 0.8400273235856154, "grad_norm": 24.155930867598233, "learning_rate": 3.7459449466558582e-06, "loss": 0.12534236907958984, "step": 97150 }, { "epoch": 0.8400705571071586, "grad_norm": 0.8803218181812112, "learning_rate": 3.7457476102431727e-06, "loss": 0.012072944641113281, "step": 97155 }, { "epoch": 0.8401137906287018, "grad_norm": 4.163396221625789, "learning_rate": 3.7455502703911645e-06, "loss": 0.10041275024414062, "step": 97160 }, { "epoch": 0.8401570241502452, "grad_norm": 3.5721284502495205, "learning_rate": 3.745352927100744e-06, "loss": 0.2088836669921875, "step": 97165 }, { "epoch": 0.8402002576717884, "grad_norm": 10.759820083947783, "learning_rate": 3.745155580372822e-06, "loss": 0.0655364990234375, "step": 97170 }, { "epoch": 0.8402434911933316, "grad_norm": 36.47427285204347, "learning_rate": 3.744958230208308e-06, "loss": 0.31470565795898436, "step": 97175 }, { "epoch": 0.840286724714875, "grad_norm": 7.1544348557322435, "learning_rate": 3.7447608766081126e-06, "loss": 0.12376174926757813, "step": 97180 }, { "epoch": 0.8403299582364182, "grad_norm": 0.6708144866159536, "learning_rate": 3.744563519573146e-06, "loss": 0.012837982177734375, "step": 97185 }, { "epoch": 0.8403731917579614, "grad_norm": 8.573258937206667, "learning_rate": 3.744366159104319e-06, "loss": 0.10506401062011719, "step": 97190 }, { "epoch": 0.8404164252795047, "grad_norm": 1.2352579215024888, "learning_rate": 3.74416879520254e-06, "loss": 0.12931060791015625, "step": 97195 }, { "epoch": 0.840459658801048, "grad_norm": 5.015312483499957, "learning_rate": 3.74397142786872e-06, "loss": 0.2724964141845703, "step": 97200 }, { "epoch": 0.8405028923225912, "grad_norm": 0.5039503351852878, "learning_rate": 3.7437740571037703e-06, "loss": 0.09146270751953126, "step": 97205 }, { "epoch": 0.8405461258441345, "grad_norm": 6.147009408071136, "learning_rate": 3.7435766829086003e-06, "loss": 0.2109668731689453, "step": 97210 }, { "epoch": 0.8405893593656778, "grad_norm": 0.8004853277660025, "learning_rate": 3.74337930528412e-06, "loss": 0.14573593139648439, "step": 97215 }, { "epoch": 0.840632592887221, "grad_norm": 0.046173124924990025, "learning_rate": 3.7431819242312405e-06, "loss": 0.15448837280273436, "step": 97220 }, { "epoch": 0.8406758264087643, "grad_norm": 45.880325132017646, "learning_rate": 3.7429845397508713e-06, "loss": 0.14917144775390626, "step": 97225 }, { "epoch": 0.8407190599303076, "grad_norm": 0.38382411645910314, "learning_rate": 3.7427871518439227e-06, "loss": 0.20296783447265626, "step": 97230 }, { "epoch": 0.8407622934518508, "grad_norm": 4.004877633829783, "learning_rate": 3.742589760511306e-06, "loss": 0.035962677001953124, "step": 97235 }, { "epoch": 0.8408055269733941, "grad_norm": 0.09424087054696959, "learning_rate": 3.742392365753931e-06, "loss": 0.08429794311523438, "step": 97240 }, { "epoch": 0.8408487604949374, "grad_norm": 19.90456159212885, "learning_rate": 3.7421949675727073e-06, "loss": 0.1387847900390625, "step": 97245 }, { "epoch": 0.8408919940164806, "grad_norm": 19.019074306175813, "learning_rate": 3.7419975659685465e-06, "loss": 0.47075881958007815, "step": 97250 }, { "epoch": 0.8409352275380239, "grad_norm": 13.998433902639174, "learning_rate": 3.7418001609423585e-06, "loss": 0.06872406005859374, "step": 97255 }, { "epoch": 0.8409784610595672, "grad_norm": 1.582061624204541, "learning_rate": 3.7416027524950528e-06, "loss": 0.21611251831054687, "step": 97260 }, { "epoch": 0.8410216945811104, "grad_norm": 3.1735201697415794, "learning_rate": 3.741405340627542e-06, "loss": 0.017287826538085936, "step": 97265 }, { "epoch": 0.8410649281026537, "grad_norm": 2.9941297135743423, "learning_rate": 3.7412079253407352e-06, "loss": 0.04940643310546875, "step": 97270 }, { "epoch": 0.841108161624197, "grad_norm": 5.934920604912023, "learning_rate": 3.741010506635543e-06, "loss": 0.11319503784179688, "step": 97275 }, { "epoch": 0.8411513951457402, "grad_norm": 19.634648489520835, "learning_rate": 3.740813084512875e-06, "loss": 0.13873443603515626, "step": 97280 }, { "epoch": 0.8411946286672835, "grad_norm": 12.83282140012862, "learning_rate": 3.740615658973643e-06, "loss": 0.06062507629394531, "step": 97285 }, { "epoch": 0.8412378621888267, "grad_norm": 24.4045852596502, "learning_rate": 3.740418230018757e-06, "loss": 0.07973279953002929, "step": 97290 }, { "epoch": 0.84128109571037, "grad_norm": 18.944007294856643, "learning_rate": 3.740220797649127e-06, "loss": 0.1437713623046875, "step": 97295 }, { "epoch": 0.8413243292319132, "grad_norm": 13.190267486713923, "learning_rate": 3.7400233618656653e-06, "loss": 0.25455780029296876, "step": 97300 }, { "epoch": 0.8413675627534565, "grad_norm": 0.7022295903588476, "learning_rate": 3.7398259226692805e-06, "loss": 0.14893112182617188, "step": 97305 }, { "epoch": 0.8414107962749998, "grad_norm": 0.13310059916661454, "learning_rate": 3.7396284800608836e-06, "loss": 0.17362213134765625, "step": 97310 }, { "epoch": 0.841454029796543, "grad_norm": 1.7445660552634137, "learning_rate": 3.7394310340413855e-06, "loss": 0.022690200805664064, "step": 97315 }, { "epoch": 0.8414972633180863, "grad_norm": 4.15504475448005, "learning_rate": 3.7392335846116975e-06, "loss": 0.1955476760864258, "step": 97320 }, { "epoch": 0.8415404968396296, "grad_norm": 2.2739613093216766, "learning_rate": 3.7390361317727282e-06, "loss": 0.13377456665039061, "step": 97325 }, { "epoch": 0.8415837303611728, "grad_norm": 0.8427658905636792, "learning_rate": 3.7388386755253916e-06, "loss": 0.17927474975585939, "step": 97330 }, { "epoch": 0.8416269638827161, "grad_norm": 0.102384906352703, "learning_rate": 3.738641215870595e-06, "loss": 0.05683259963989258, "step": 97335 }, { "epoch": 0.8416701974042594, "grad_norm": 1.6187095490666614, "learning_rate": 3.73844375280925e-06, "loss": 0.017405128479003905, "step": 97340 }, { "epoch": 0.8417134309258026, "grad_norm": 1.8795458415347532, "learning_rate": 3.7382462863422683e-06, "loss": 0.16083297729492188, "step": 97345 }, { "epoch": 0.8417566644473459, "grad_norm": 12.628427995662456, "learning_rate": 3.7380488164705608e-06, "loss": 0.07771148681640624, "step": 97350 }, { "epoch": 0.8417998979688892, "grad_norm": 2.3770327265093028, "learning_rate": 3.7378513431950357e-06, "loss": 0.05682029724121094, "step": 97355 }, { "epoch": 0.8418431314904324, "grad_norm": 13.582960032111487, "learning_rate": 3.7376538665166066e-06, "loss": 0.1960540771484375, "step": 97360 }, { "epoch": 0.8418863650119757, "grad_norm": 0.013035629433942705, "learning_rate": 3.737456386436183e-06, "loss": 0.061035537719726564, "step": 97365 }, { "epoch": 0.8419295985335189, "grad_norm": 24.930852355627817, "learning_rate": 3.737258902954675e-06, "loss": 0.12327880859375, "step": 97370 }, { "epoch": 0.8419728320550622, "grad_norm": 37.0632211912528, "learning_rate": 3.7370614160729943e-06, "loss": 0.1089019775390625, "step": 97375 }, { "epoch": 0.8420160655766055, "grad_norm": 26.79310851910728, "learning_rate": 3.736863925792052e-06, "loss": 0.214129638671875, "step": 97380 }, { "epoch": 0.8420592990981487, "grad_norm": 3.321993098898483, "learning_rate": 3.7366664321127582e-06, "loss": 0.16436386108398438, "step": 97385 }, { "epoch": 0.842102532619692, "grad_norm": 60.14149895568481, "learning_rate": 3.7364689350360237e-06, "loss": 0.3298248291015625, "step": 97390 }, { "epoch": 0.8421457661412353, "grad_norm": 2.6438951872415144, "learning_rate": 3.73627143456276e-06, "loss": 0.0707061767578125, "step": 97395 }, { "epoch": 0.8421889996627785, "grad_norm": 4.280875835129718, "learning_rate": 3.7360739306938765e-06, "loss": 0.40164947509765625, "step": 97400 }, { "epoch": 0.8422322331843218, "grad_norm": 4.153390855820856, "learning_rate": 3.735876423430285e-06, "loss": 0.41605377197265625, "step": 97405 }, { "epoch": 0.8422754667058651, "grad_norm": 0.07126794284213235, "learning_rate": 3.735678912772898e-06, "loss": 0.051981353759765626, "step": 97410 }, { "epoch": 0.8423187002274083, "grad_norm": 25.116710283072763, "learning_rate": 3.735481398722624e-06, "loss": 0.297772216796875, "step": 97415 }, { "epoch": 0.8423619337489516, "grad_norm": 0.6515949474302748, "learning_rate": 3.7352838812803746e-06, "loss": 0.188543701171875, "step": 97420 }, { "epoch": 0.8424051672704949, "grad_norm": 14.183649558864987, "learning_rate": 3.7350863604470607e-06, "loss": 0.1421142578125, "step": 97425 }, { "epoch": 0.8424484007920381, "grad_norm": 5.405461832964358, "learning_rate": 3.7348888362235936e-06, "loss": 0.04456415176391602, "step": 97430 }, { "epoch": 0.8424916343135814, "grad_norm": 6.4648603927949635, "learning_rate": 3.7346913086108846e-06, "loss": 0.11682052612304687, "step": 97435 }, { "epoch": 0.8425348678351247, "grad_norm": 0.33849856913762594, "learning_rate": 3.734493777609844e-06, "loss": 0.006485748291015625, "step": 97440 }, { "epoch": 0.8425781013566679, "grad_norm": 1.9076695670554045, "learning_rate": 3.7342962432213835e-06, "loss": 0.14552459716796876, "step": 97445 }, { "epoch": 0.8426213348782111, "grad_norm": 1.7845554600218714, "learning_rate": 3.734098705446413e-06, "loss": 0.0766510009765625, "step": 97450 }, { "epoch": 0.8426645683997545, "grad_norm": 0.17485139479180878, "learning_rate": 3.7339011642858435e-06, "loss": 0.08243045806884766, "step": 97455 }, { "epoch": 0.8427078019212977, "grad_norm": 23.81562409477535, "learning_rate": 3.7337036197405876e-06, "loss": 0.0920196533203125, "step": 97460 }, { "epoch": 0.8427510354428409, "grad_norm": 0.7764419670458819, "learning_rate": 3.7335060718115554e-06, "loss": 0.27086219787597654, "step": 97465 }, { "epoch": 0.8427942689643843, "grad_norm": 0.20760122425180655, "learning_rate": 3.733308520499658e-06, "loss": 0.235455322265625, "step": 97470 }, { "epoch": 0.8428375024859275, "grad_norm": 1.4397389039311588, "learning_rate": 3.733110965805806e-06, "loss": 0.11587448120117187, "step": 97475 }, { "epoch": 0.8428807360074707, "grad_norm": 0.49324940961409225, "learning_rate": 3.7329134077309114e-06, "loss": 0.05825309753417969, "step": 97480 }, { "epoch": 0.842923969529014, "grad_norm": 18.588504964047182, "learning_rate": 3.732715846275884e-06, "loss": 0.4588409423828125, "step": 97485 }, { "epoch": 0.8429672030505573, "grad_norm": 20.818899980381698, "learning_rate": 3.732518281441637e-06, "loss": 0.15258331298828126, "step": 97490 }, { "epoch": 0.8430104365721005, "grad_norm": 24.776350416119424, "learning_rate": 3.732320713229081e-06, "loss": 0.160150146484375, "step": 97495 }, { "epoch": 0.8430536700936438, "grad_norm": 6.560581162687481, "learning_rate": 3.732123141639125e-06, "loss": 0.18776702880859375, "step": 97500 }, { "epoch": 0.8430969036151871, "grad_norm": 0.49543462404665284, "learning_rate": 3.731925566672683e-06, "loss": 0.22693023681640626, "step": 97505 }, { "epoch": 0.8431401371367303, "grad_norm": 26.8319669569567, "learning_rate": 3.731727988330664e-06, "loss": 0.24910202026367187, "step": 97510 }, { "epoch": 0.8431833706582736, "grad_norm": 0.2967175611351099, "learning_rate": 3.73153040661398e-06, "loss": 0.041196441650390624, "step": 97515 }, { "epoch": 0.8432266041798169, "grad_norm": 16.69449582472111, "learning_rate": 3.731332821523544e-06, "loss": 0.29920501708984376, "step": 97520 }, { "epoch": 0.8432698377013601, "grad_norm": 10.786692429228395, "learning_rate": 3.7311352330602647e-06, "loss": 0.08470993041992188, "step": 97525 }, { "epoch": 0.8433130712229034, "grad_norm": 11.646490732200718, "learning_rate": 3.730937641225054e-06, "loss": 0.28072357177734375, "step": 97530 }, { "epoch": 0.8433563047444467, "grad_norm": 2.1118418226919626, "learning_rate": 3.730740046018824e-06, "loss": 0.037319183349609375, "step": 97535 }, { "epoch": 0.8433995382659899, "grad_norm": 0.8605975129409361, "learning_rate": 3.7305424474424845e-06, "loss": 0.10795745849609376, "step": 97540 }, { "epoch": 0.8434427717875331, "grad_norm": 2.4863022903513734, "learning_rate": 3.7303448454969488e-06, "loss": 0.06533308029174804, "step": 97545 }, { "epoch": 0.8434860053090765, "grad_norm": 22.7620244127002, "learning_rate": 3.7301472401831268e-06, "loss": 0.15396957397460936, "step": 97550 }, { "epoch": 0.8435292388306197, "grad_norm": 10.69592323943143, "learning_rate": 3.7299496315019308e-06, "loss": 0.363800048828125, "step": 97555 }, { "epoch": 0.8435724723521629, "grad_norm": 2.9840010417387393, "learning_rate": 3.729752019454271e-06, "loss": 0.060359954833984375, "step": 97560 }, { "epoch": 0.8436157058737063, "grad_norm": 13.796166599548348, "learning_rate": 3.729554404041059e-06, "loss": 0.08178939819335937, "step": 97565 }, { "epoch": 0.8436589393952495, "grad_norm": 33.56851208238359, "learning_rate": 3.7293567852632075e-06, "loss": 0.2648956298828125, "step": 97570 }, { "epoch": 0.8437021729167927, "grad_norm": 38.2603594944419, "learning_rate": 3.7291591631216267e-06, "loss": 0.28199005126953125, "step": 97575 }, { "epoch": 0.8437454064383361, "grad_norm": 0.03599529943033191, "learning_rate": 3.7289615376172277e-06, "loss": 0.07584114074707031, "step": 97580 }, { "epoch": 0.8437886399598793, "grad_norm": 5.621976383971339, "learning_rate": 3.7287639087509232e-06, "loss": 0.0916717529296875, "step": 97585 }, { "epoch": 0.8438318734814225, "grad_norm": 7.35375669369457, "learning_rate": 3.7285662765236237e-06, "loss": 0.181695556640625, "step": 97590 }, { "epoch": 0.8438751070029659, "grad_norm": 8.358806719168976, "learning_rate": 3.7283686409362407e-06, "loss": 0.3379608154296875, "step": 97595 }, { "epoch": 0.8439183405245091, "grad_norm": 0.6423384677265761, "learning_rate": 3.7281710019896855e-06, "loss": 0.13013916015625, "step": 97600 }, { "epoch": 0.8439615740460523, "grad_norm": 1.2841428015559884, "learning_rate": 3.727973359684871e-06, "loss": 0.018903350830078124, "step": 97605 }, { "epoch": 0.8440048075675957, "grad_norm": 1.4210320254472009, "learning_rate": 3.7277757140227067e-06, "loss": 0.1399505615234375, "step": 97610 }, { "epoch": 0.8440480410891389, "grad_norm": 7.425714013934437, "learning_rate": 3.727578065004106e-06, "loss": 0.1323455810546875, "step": 97615 }, { "epoch": 0.8440912746106821, "grad_norm": 1.5339820276208977, "learning_rate": 3.7273804126299792e-06, "loss": 0.23861427307128907, "step": 97620 }, { "epoch": 0.8441345081322253, "grad_norm": 22.23324457771674, "learning_rate": 3.7271827569012386e-06, "loss": 0.0421783447265625, "step": 97625 }, { "epoch": 0.8441777416537687, "grad_norm": 12.29312646490276, "learning_rate": 3.7269850978187942e-06, "loss": 0.21585235595703126, "step": 97630 }, { "epoch": 0.8442209751753119, "grad_norm": 13.399430134770528, "learning_rate": 3.72678743538356e-06, "loss": 0.22566986083984375, "step": 97635 }, { "epoch": 0.8442642086968551, "grad_norm": 1.9043248007276143, "learning_rate": 3.7265897695964464e-06, "loss": 0.2844211578369141, "step": 97640 }, { "epoch": 0.8443074422183985, "grad_norm": 16.06888845152413, "learning_rate": 3.726392100458364e-06, "loss": 0.1675262451171875, "step": 97645 }, { "epoch": 0.8443506757399417, "grad_norm": 11.041871910651457, "learning_rate": 3.7261944279702256e-06, "loss": 0.24973602294921876, "step": 97650 }, { "epoch": 0.8443939092614849, "grad_norm": 25.608249638709953, "learning_rate": 3.7259967521329436e-06, "loss": 0.10070953369140626, "step": 97655 }, { "epoch": 0.8444371427830283, "grad_norm": 16.41874192027305, "learning_rate": 3.7257990729474275e-06, "loss": 0.170867919921875, "step": 97660 }, { "epoch": 0.8444803763045715, "grad_norm": 1.645821082064582, "learning_rate": 3.725601390414591e-06, "loss": 0.0812255859375, "step": 97665 }, { "epoch": 0.8445236098261147, "grad_norm": 1.044957134631827, "learning_rate": 3.7254037045353454e-06, "loss": 0.134991455078125, "step": 97670 }, { "epoch": 0.8445668433476581, "grad_norm": 6.033916669972115, "learning_rate": 3.7252060153106013e-06, "loss": 0.0273651123046875, "step": 97675 }, { "epoch": 0.8446100768692013, "grad_norm": 0.10671078710436314, "learning_rate": 3.7250083227412705e-06, "loss": 0.08536996841430664, "step": 97680 }, { "epoch": 0.8446533103907445, "grad_norm": 11.059425662336189, "learning_rate": 3.724810626828267e-06, "loss": 0.12511444091796875, "step": 97685 }, { "epoch": 0.8446965439122879, "grad_norm": 11.755346554553185, "learning_rate": 3.7246129275724993e-06, "loss": 0.3920135498046875, "step": 97690 }, { "epoch": 0.8447397774338311, "grad_norm": 0.1472851791128166, "learning_rate": 3.7244152249748816e-06, "loss": 0.30131874084472654, "step": 97695 }, { "epoch": 0.8447830109553743, "grad_norm": 5.683331373625128, "learning_rate": 3.7242175190363254e-06, "loss": 0.074493408203125, "step": 97700 }, { "epoch": 0.8448262444769177, "grad_norm": 19.602669769226395, "learning_rate": 3.7240198097577413e-06, "loss": 0.1790008544921875, "step": 97705 }, { "epoch": 0.8448694779984609, "grad_norm": 13.75114032191499, "learning_rate": 3.7238220971400415e-06, "loss": 0.23846282958984374, "step": 97710 }, { "epoch": 0.8449127115200041, "grad_norm": 25.395381744630498, "learning_rate": 3.723624381184139e-06, "loss": 0.14691009521484374, "step": 97715 }, { "epoch": 0.8449559450415474, "grad_norm": 2.072023871818939, "learning_rate": 3.7234266618909447e-06, "loss": 0.11310653686523438, "step": 97720 }, { "epoch": 0.8449991785630907, "grad_norm": 0.27435876650052005, "learning_rate": 3.7232289392613705e-06, "loss": 0.20061264038085938, "step": 97725 }, { "epoch": 0.8450424120846339, "grad_norm": 3.7885060280650302, "learning_rate": 3.723031213296328e-06, "loss": 0.0437591552734375, "step": 97730 }, { "epoch": 0.8450856456061772, "grad_norm": 12.521768346253873, "learning_rate": 3.7228334839967293e-06, "loss": 0.142626953125, "step": 97735 }, { "epoch": 0.8451288791277205, "grad_norm": 10.005859882013041, "learning_rate": 3.722635751363486e-06, "loss": 0.15809783935546876, "step": 97740 }, { "epoch": 0.8451721126492637, "grad_norm": 42.12465083136559, "learning_rate": 3.722438015397512e-06, "loss": 0.31432571411132815, "step": 97745 }, { "epoch": 0.845215346170807, "grad_norm": 0.3095198074502339, "learning_rate": 3.722240276099717e-06, "loss": 0.18184356689453124, "step": 97750 }, { "epoch": 0.8452585796923503, "grad_norm": 8.76775725720226, "learning_rate": 3.722042533471013e-06, "loss": 0.539227294921875, "step": 97755 }, { "epoch": 0.8453018132138935, "grad_norm": 1.6773317094477962, "learning_rate": 3.7218447875123135e-06, "loss": 0.47025260925292967, "step": 97760 }, { "epoch": 0.8453450467354368, "grad_norm": 26.130798472627514, "learning_rate": 3.721647038224529e-06, "loss": 0.11072158813476562, "step": 97765 }, { "epoch": 0.8453882802569801, "grad_norm": 5.985267878661393, "learning_rate": 3.7214492856085724e-06, "loss": 0.31370849609375, "step": 97770 }, { "epoch": 0.8454315137785233, "grad_norm": 17.93438007911326, "learning_rate": 3.7212515296653555e-06, "loss": 0.12188262939453125, "step": 97775 }, { "epoch": 0.8454747473000666, "grad_norm": 27.36439358005828, "learning_rate": 3.7210537703957906e-06, "loss": 0.244476318359375, "step": 97780 }, { "epoch": 0.8455179808216099, "grad_norm": 0.5222573594623725, "learning_rate": 3.7208560078007883e-06, "loss": 0.05076522827148437, "step": 97785 }, { "epoch": 0.8455612143431531, "grad_norm": 2.7749268228367305, "learning_rate": 3.720658241881263e-06, "loss": 0.12605438232421876, "step": 97790 }, { "epoch": 0.8456044478646964, "grad_norm": 0.9280031324839909, "learning_rate": 3.720460472638125e-06, "loss": 0.1600738525390625, "step": 97795 }, { "epoch": 0.8456476813862396, "grad_norm": 5.041333519090864, "learning_rate": 3.7202627000722875e-06, "loss": 0.20944442749023437, "step": 97800 }, { "epoch": 0.8456909149077829, "grad_norm": 1.6198273041923297, "learning_rate": 3.7200649241846612e-06, "loss": 0.23948564529418945, "step": 97805 }, { "epoch": 0.8457341484293261, "grad_norm": 4.204839506057941, "learning_rate": 3.7198671449761597e-06, "loss": 0.07463178634643555, "step": 97810 }, { "epoch": 0.8457773819508694, "grad_norm": 7.001700295152136, "learning_rate": 3.7196693624476948e-06, "loss": 0.04987335205078125, "step": 97815 }, { "epoch": 0.8458206154724127, "grad_norm": 18.021635562544752, "learning_rate": 3.719471576600177e-06, "loss": 0.14364299774169922, "step": 97820 }, { "epoch": 0.845863848993956, "grad_norm": 72.1244515542571, "learning_rate": 3.719273787434521e-06, "loss": 0.2975128173828125, "step": 97825 }, { "epoch": 0.8459070825154992, "grad_norm": 1.0623672510928541, "learning_rate": 3.7190759949516386e-06, "loss": 0.152349853515625, "step": 97830 }, { "epoch": 0.8459503160370425, "grad_norm": 58.31474252008576, "learning_rate": 3.7188781991524396e-06, "loss": 0.18405914306640625, "step": 97835 }, { "epoch": 0.8459935495585857, "grad_norm": 9.768408836363713, "learning_rate": 3.7186804000378388e-06, "loss": 0.1024871826171875, "step": 97840 }, { "epoch": 0.846036783080129, "grad_norm": 26.226241647782768, "learning_rate": 3.718482597608747e-06, "loss": 0.13772201538085938, "step": 97845 }, { "epoch": 0.8460800166016723, "grad_norm": 2.0769473256126676, "learning_rate": 3.718284791866077e-06, "loss": 0.04190406799316406, "step": 97850 }, { "epoch": 0.8461232501232155, "grad_norm": 3.369581320786887, "learning_rate": 3.718086982810742e-06, "loss": 0.15228805541992188, "step": 97855 }, { "epoch": 0.8461664836447588, "grad_norm": 20.043976698984714, "learning_rate": 3.7178891704436525e-06, "loss": 0.2888206481933594, "step": 97860 }, { "epoch": 0.8462097171663021, "grad_norm": 1.6651014613716075, "learning_rate": 3.7176913547657214e-06, "loss": 0.6082656860351563, "step": 97865 }, { "epoch": 0.8462529506878453, "grad_norm": 0.07822058920423783, "learning_rate": 3.7174935357778616e-06, "loss": 0.06753997802734375, "step": 97870 }, { "epoch": 0.8462961842093886, "grad_norm": 26.53241642282686, "learning_rate": 3.7172957134809847e-06, "loss": 0.11647491455078125, "step": 97875 }, { "epoch": 0.8463394177309319, "grad_norm": 4.8087360927157095, "learning_rate": 3.7170978878760028e-06, "loss": 0.09264717102050782, "step": 97880 }, { "epoch": 0.8463826512524751, "grad_norm": 0.28747929044199694, "learning_rate": 3.7169000589638294e-06, "loss": 0.10922832489013672, "step": 97885 }, { "epoch": 0.8464258847740184, "grad_norm": 44.36371602440757, "learning_rate": 3.7167022267453773e-06, "loss": 0.44474077224731445, "step": 97890 }, { "epoch": 0.8464691182955616, "grad_norm": 3.780816682190929, "learning_rate": 3.716504391221557e-06, "loss": 0.0860809326171875, "step": 97895 }, { "epoch": 0.8465123518171049, "grad_norm": 0.030064595615172197, "learning_rate": 3.716306552393281e-06, "loss": 0.30935020446777345, "step": 97900 }, { "epoch": 0.8465555853386482, "grad_norm": 6.6840716847860575, "learning_rate": 3.7161087102614633e-06, "loss": 0.0804840087890625, "step": 97905 }, { "epoch": 0.8465988188601914, "grad_norm": 23.120049115555183, "learning_rate": 3.7159108648270153e-06, "loss": 0.31763916015625, "step": 97910 }, { "epoch": 0.8466420523817347, "grad_norm": 1.8285524439681764, "learning_rate": 3.71571301609085e-06, "loss": 0.22017822265625, "step": 97915 }, { "epoch": 0.846685285903278, "grad_norm": 0.8155981831579571, "learning_rate": 3.715515164053879e-06, "loss": 0.10326347351074219, "step": 97920 }, { "epoch": 0.8467285194248212, "grad_norm": 0.2591998429524822, "learning_rate": 3.7153173087170163e-06, "loss": 0.0400238037109375, "step": 97925 }, { "epoch": 0.8467717529463645, "grad_norm": 5.567711853919852, "learning_rate": 3.7151194500811714e-06, "loss": 0.28381500244140623, "step": 97930 }, { "epoch": 0.8468149864679078, "grad_norm": 2.0374962445592093, "learning_rate": 3.71492158814726e-06, "loss": 0.04454193115234375, "step": 97935 }, { "epoch": 0.846858219989451, "grad_norm": 1.5889131029248882, "learning_rate": 3.7147237229161932e-06, "loss": 0.0426849365234375, "step": 97940 }, { "epoch": 0.8469014535109943, "grad_norm": 0.9847236121804681, "learning_rate": 3.714525854388884e-06, "loss": 0.024555206298828125, "step": 97945 }, { "epoch": 0.8469446870325376, "grad_norm": 4.7371438400120205, "learning_rate": 3.7143279825662447e-06, "loss": 0.03328857421875, "step": 97950 }, { "epoch": 0.8469879205540808, "grad_norm": 0.7760385869876328, "learning_rate": 3.7141301074491886e-06, "loss": 0.10066184997558594, "step": 97955 }, { "epoch": 0.8470311540756241, "grad_norm": 1.6418097058288643, "learning_rate": 3.713932229038626e-06, "loss": 0.12262115478515626, "step": 97960 }, { "epoch": 0.8470743875971674, "grad_norm": 13.443160931696324, "learning_rate": 3.713734347335472e-06, "loss": 0.09667434692382812, "step": 97965 }, { "epoch": 0.8471176211187106, "grad_norm": 18.4211025743306, "learning_rate": 3.713536462340638e-06, "loss": 0.16056404113769532, "step": 97970 }, { "epoch": 0.8471608546402538, "grad_norm": 3.1543765636908065, "learning_rate": 3.713338574055037e-06, "loss": 0.0163665771484375, "step": 97975 }, { "epoch": 0.8472040881617972, "grad_norm": 4.236629371174547, "learning_rate": 3.7131406824795812e-06, "loss": 0.349322509765625, "step": 97980 }, { "epoch": 0.8472473216833404, "grad_norm": 3.320978119999257, "learning_rate": 3.7129427876151843e-06, "loss": 0.09667625427246093, "step": 97985 }, { "epoch": 0.8472905552048836, "grad_norm": 19.280116774237072, "learning_rate": 3.712744889462757e-06, "loss": 0.14312896728515626, "step": 97990 }, { "epoch": 0.847333788726427, "grad_norm": 1.135528156672056, "learning_rate": 3.712546988023214e-06, "loss": 0.1552886962890625, "step": 97995 }, { "epoch": 0.8473770222479702, "grad_norm": 0.3618315181385488, "learning_rate": 3.7123490832974673e-06, "loss": 0.14481983184814454, "step": 98000 }, { "epoch": 0.8474202557695134, "grad_norm": 2.1248463019601673, "learning_rate": 3.71215117528643e-06, "loss": 0.22133331298828124, "step": 98005 }, { "epoch": 0.8474634892910567, "grad_norm": 2.911554732028446, "learning_rate": 3.7119532639910135e-06, "loss": 0.052387237548828125, "step": 98010 }, { "epoch": 0.8475067228126, "grad_norm": 1.1169216744952974, "learning_rate": 3.7117553494121327e-06, "loss": 0.6775653839111329, "step": 98015 }, { "epoch": 0.8475499563341432, "grad_norm": 2.1451955753440206, "learning_rate": 3.7115574315506976e-06, "loss": 0.08124198913574218, "step": 98020 }, { "epoch": 0.8475931898556865, "grad_norm": 0.09925022008362468, "learning_rate": 3.7113595104076228e-06, "loss": 0.15153961181640624, "step": 98025 }, { "epoch": 0.8476364233772298, "grad_norm": 10.472056438622028, "learning_rate": 3.7111615859838216e-06, "loss": 0.15045318603515626, "step": 98030 }, { "epoch": 0.847679656898773, "grad_norm": 2.9362762260693698, "learning_rate": 3.7109636582802063e-06, "loss": 0.07654037475585937, "step": 98035 }, { "epoch": 0.8477228904203163, "grad_norm": 24.651745689480364, "learning_rate": 3.7107657272976884e-06, "loss": 0.3072166442871094, "step": 98040 }, { "epoch": 0.8477661239418596, "grad_norm": 0.28833401926600793, "learning_rate": 3.710567793037182e-06, "loss": 0.058740234375, "step": 98045 }, { "epoch": 0.8478093574634028, "grad_norm": 0.2037877268928163, "learning_rate": 3.7103698554996e-06, "loss": 0.015177726745605469, "step": 98050 }, { "epoch": 0.8478525909849461, "grad_norm": 8.587303606107573, "learning_rate": 3.710171914685854e-06, "loss": 0.17502822875976562, "step": 98055 }, { "epoch": 0.8478958245064894, "grad_norm": 5.335372246263375, "learning_rate": 3.7099739705968595e-06, "loss": 0.03783721923828125, "step": 98060 }, { "epoch": 0.8479390580280326, "grad_norm": 33.893080233196784, "learning_rate": 3.709776023233527e-06, "loss": 0.2424591064453125, "step": 98065 }, { "epoch": 0.8479822915495758, "grad_norm": 0.6006641663430186, "learning_rate": 3.70957807259677e-06, "loss": 0.19162139892578126, "step": 98070 }, { "epoch": 0.8480255250711192, "grad_norm": 5.8973787535571525, "learning_rate": 3.7093801186875016e-06, "loss": 0.16309814453125, "step": 98075 }, { "epoch": 0.8480687585926624, "grad_norm": 1.855252951091658, "learning_rate": 3.7091821615066354e-06, "loss": 0.04568939208984375, "step": 98080 }, { "epoch": 0.8481119921142056, "grad_norm": 6.309169370101892, "learning_rate": 3.7089842010550836e-06, "loss": 0.12360305786132812, "step": 98085 }, { "epoch": 0.848155225635749, "grad_norm": 1.4464372992062005, "learning_rate": 3.708786237333759e-06, "loss": 0.14173049926757814, "step": 98090 }, { "epoch": 0.8481984591572922, "grad_norm": 2.0712396581991195, "learning_rate": 3.708588270343575e-06, "loss": 0.24589157104492188, "step": 98095 }, { "epoch": 0.8482416926788354, "grad_norm": 15.02705446565136, "learning_rate": 3.7083903000854443e-06, "loss": 0.145501708984375, "step": 98100 }, { "epoch": 0.8482849262003788, "grad_norm": 66.94408199435509, "learning_rate": 3.70819232656028e-06, "loss": 0.15828857421875, "step": 98105 }, { "epoch": 0.848328159721922, "grad_norm": 35.8918457074658, "learning_rate": 3.707994349768996e-06, "loss": 0.49725341796875, "step": 98110 }, { "epoch": 0.8483713932434652, "grad_norm": 23.1327489854984, "learning_rate": 3.707796369712504e-06, "loss": 0.20842399597167968, "step": 98115 }, { "epoch": 0.8484146267650086, "grad_norm": 0.22778074913014057, "learning_rate": 3.7075983863917177e-06, "loss": 0.06273193359375, "step": 98120 }, { "epoch": 0.8484578602865518, "grad_norm": 39.464335074167366, "learning_rate": 3.7074003998075503e-06, "loss": 0.3427703857421875, "step": 98125 }, { "epoch": 0.848501093808095, "grad_norm": 3.8218169423882213, "learning_rate": 3.707202409960915e-06, "loss": 0.21245193481445312, "step": 98130 }, { "epoch": 0.8485443273296384, "grad_norm": 5.019374286962974, "learning_rate": 3.7070044168527243e-06, "loss": 0.41410980224609373, "step": 98135 }, { "epoch": 0.8485875608511816, "grad_norm": 37.16615038186973, "learning_rate": 3.706806420483892e-06, "loss": 0.37767868041992186, "step": 98140 }, { "epoch": 0.8486307943727248, "grad_norm": 9.543784493105882, "learning_rate": 3.706608420855331e-06, "loss": 0.03538837432861328, "step": 98145 }, { "epoch": 0.848674027894268, "grad_norm": 14.25522609961982, "learning_rate": 3.7064104179679547e-06, "loss": 0.06331253051757812, "step": 98150 }, { "epoch": 0.8487172614158114, "grad_norm": 2.1690306797408363, "learning_rate": 3.7062124118226752e-06, "loss": 0.1930328369140625, "step": 98155 }, { "epoch": 0.8487604949373546, "grad_norm": 6.874017816653156, "learning_rate": 3.7060144024204067e-06, "loss": 0.3171966552734375, "step": 98160 }, { "epoch": 0.8488037284588978, "grad_norm": 43.60328588418634, "learning_rate": 3.7058163897620618e-06, "loss": 0.20572319030761718, "step": 98165 }, { "epoch": 0.8488469619804412, "grad_norm": 9.258129959766261, "learning_rate": 3.7056183738485544e-06, "loss": 0.04112167358398437, "step": 98170 }, { "epoch": 0.8488901955019844, "grad_norm": 5.907770265874856, "learning_rate": 3.7054203546807977e-06, "loss": 0.04526519775390625, "step": 98175 }, { "epoch": 0.8489334290235276, "grad_norm": 10.666359420677834, "learning_rate": 3.705222332259705e-06, "loss": 0.08195381164550782, "step": 98180 }, { "epoch": 0.848976662545071, "grad_norm": 28.751527727601243, "learning_rate": 3.7050243065861883e-06, "loss": 0.24672698974609375, "step": 98185 }, { "epoch": 0.8490198960666142, "grad_norm": 0.2495015027521453, "learning_rate": 3.7048262776611613e-06, "loss": 0.010280609130859375, "step": 98190 }, { "epoch": 0.8490631295881574, "grad_norm": 42.57734719225909, "learning_rate": 3.7046282454855395e-06, "loss": 0.107318115234375, "step": 98195 }, { "epoch": 0.8491063631097008, "grad_norm": 0.6416452835872996, "learning_rate": 3.704430210060233e-06, "loss": 0.04405136108398437, "step": 98200 }, { "epoch": 0.849149596631244, "grad_norm": 24.335926332281925, "learning_rate": 3.7042321713861576e-06, "loss": 0.073907470703125, "step": 98205 }, { "epoch": 0.8491928301527872, "grad_norm": 15.521586949820637, "learning_rate": 3.7040341294642258e-06, "loss": 0.06755294799804687, "step": 98210 }, { "epoch": 0.8492360636743306, "grad_norm": 2.9408334708737653, "learning_rate": 3.7038360842953503e-06, "loss": 0.04749298095703125, "step": 98215 }, { "epoch": 0.8492792971958738, "grad_norm": 3.852366395385093, "learning_rate": 3.7036380358804446e-06, "loss": 0.10582695007324219, "step": 98220 }, { "epoch": 0.849322530717417, "grad_norm": 12.507041045155106, "learning_rate": 3.703439984220423e-06, "loss": 0.141156005859375, "step": 98225 }, { "epoch": 0.8493657642389604, "grad_norm": 1.536810587454739, "learning_rate": 3.7032419293161977e-06, "loss": 0.1184844970703125, "step": 98230 }, { "epoch": 0.8494089977605036, "grad_norm": 2.2911631101484993, "learning_rate": 3.7030438711686836e-06, "loss": 0.06766510009765625, "step": 98235 }, { "epoch": 0.8494522312820468, "grad_norm": 2.4659916499780627, "learning_rate": 3.7028458097787934e-06, "loss": 0.07421875, "step": 98240 }, { "epoch": 0.8494954648035901, "grad_norm": 0.3339325393771338, "learning_rate": 3.70264774514744e-06, "loss": 0.3047969818115234, "step": 98245 }, { "epoch": 0.8495386983251334, "grad_norm": 4.368704607832222, "learning_rate": 3.7024496772755364e-06, "loss": 0.1869110107421875, "step": 98250 }, { "epoch": 0.8495819318466766, "grad_norm": 52.642963287374236, "learning_rate": 3.702251606163998e-06, "loss": 0.20531988143920898, "step": 98255 }, { "epoch": 0.8496251653682199, "grad_norm": 6.260014301806778, "learning_rate": 3.702053531813738e-06, "loss": 0.070013427734375, "step": 98260 }, { "epoch": 0.8496683988897632, "grad_norm": 0.34754528218505676, "learning_rate": 3.7018554542256674e-06, "loss": 0.3860038757324219, "step": 98265 }, { "epoch": 0.8497116324113064, "grad_norm": 0.18792438176122772, "learning_rate": 3.7016573734007024e-06, "loss": 0.2030975341796875, "step": 98270 }, { "epoch": 0.8497548659328497, "grad_norm": 19.81189705384257, "learning_rate": 3.701459289339755e-06, "loss": 0.09867324829101562, "step": 98275 }, { "epoch": 0.849798099454393, "grad_norm": 20.30376432759144, "learning_rate": 3.7012612020437396e-06, "loss": 0.33013763427734377, "step": 98280 }, { "epoch": 0.8498413329759362, "grad_norm": 0.08382920813496307, "learning_rate": 3.70106311151357e-06, "loss": 0.150958251953125, "step": 98285 }, { "epoch": 0.8498845664974795, "grad_norm": 14.790310067611495, "learning_rate": 3.70086501775016e-06, "loss": 0.07649765014648438, "step": 98290 }, { "epoch": 0.8499278000190228, "grad_norm": 49.9603131007387, "learning_rate": 3.7006669207544208e-06, "loss": 0.237469482421875, "step": 98295 }, { "epoch": 0.849971033540566, "grad_norm": 1.2101864916527854, "learning_rate": 3.700468820527268e-06, "loss": 0.2802391052246094, "step": 98300 }, { "epoch": 0.8500142670621093, "grad_norm": 0.7165328675653801, "learning_rate": 3.700270717069616e-06, "loss": 0.039579010009765624, "step": 98305 }, { "epoch": 0.8500575005836526, "grad_norm": 6.795459632183635, "learning_rate": 3.7000726103823756e-06, "loss": 0.13256607055664063, "step": 98310 }, { "epoch": 0.8501007341051958, "grad_norm": 150.93638033214103, "learning_rate": 3.6998745004664638e-06, "loss": 0.4904930114746094, "step": 98315 }, { "epoch": 0.850143967626739, "grad_norm": 4.118454189857479, "learning_rate": 3.6996763873227923e-06, "loss": 0.06773529052734376, "step": 98320 }, { "epoch": 0.8501872011482823, "grad_norm": 0.6309994428088844, "learning_rate": 3.6994782709522747e-06, "loss": 0.1255950927734375, "step": 98325 }, { "epoch": 0.8502304346698256, "grad_norm": 31.359406958912974, "learning_rate": 3.699280151355824e-06, "loss": 0.1710174560546875, "step": 98330 }, { "epoch": 0.8502736681913688, "grad_norm": 4.088783804679001, "learning_rate": 3.6990820285343566e-06, "loss": 0.5569915771484375, "step": 98335 }, { "epoch": 0.8503169017129121, "grad_norm": 4.892791606362863, "learning_rate": 3.698883902488785e-06, "loss": 0.06581878662109375, "step": 98340 }, { "epoch": 0.8503601352344554, "grad_norm": 0.11222028908266635, "learning_rate": 3.6986857732200217e-06, "loss": 0.05364151000976562, "step": 98345 }, { "epoch": 0.8504033687559986, "grad_norm": 1.1276600682582083, "learning_rate": 3.698487640728982e-06, "loss": 0.16385650634765625, "step": 98350 }, { "epoch": 0.8504466022775419, "grad_norm": 0.8058537821985986, "learning_rate": 3.6982895050165783e-06, "loss": 0.09798583984375, "step": 98355 }, { "epoch": 0.8504898357990852, "grad_norm": 7.318526712288243, "learning_rate": 3.6980913660837246e-06, "loss": 0.04978179931640625, "step": 98360 }, { "epoch": 0.8505330693206284, "grad_norm": 3.9715201214511593, "learning_rate": 3.6978932239313363e-06, "loss": 0.2677486419677734, "step": 98365 }, { "epoch": 0.8505763028421717, "grad_norm": 29.337297301998817, "learning_rate": 3.697695078560326e-06, "loss": 0.13492431640625, "step": 98370 }, { "epoch": 0.850619536363715, "grad_norm": 22.39845765141021, "learning_rate": 3.697496929971607e-06, "loss": 0.331298828125, "step": 98375 }, { "epoch": 0.8506627698852582, "grad_norm": 21.33360542614224, "learning_rate": 3.6972987781660953e-06, "loss": 0.098223876953125, "step": 98380 }, { "epoch": 0.8507060034068015, "grad_norm": 15.948081294429125, "learning_rate": 3.697100623144701e-06, "loss": 0.07290205955505372, "step": 98385 }, { "epoch": 0.8507492369283448, "grad_norm": 29.847067643929048, "learning_rate": 3.6969024649083414e-06, "loss": 0.22267093658447265, "step": 98390 }, { "epoch": 0.850792470449888, "grad_norm": 0.7900087098286053, "learning_rate": 3.6967043034579295e-06, "loss": 0.3133026123046875, "step": 98395 }, { "epoch": 0.8508357039714313, "grad_norm": 289.875818599431, "learning_rate": 3.696506138794379e-06, "loss": 0.34030303955078123, "step": 98400 }, { "epoch": 0.8508789374929746, "grad_norm": 11.825166358301688, "learning_rate": 3.696307970918603e-06, "loss": 0.071807861328125, "step": 98405 }, { "epoch": 0.8509221710145178, "grad_norm": 1.5427908903538246, "learning_rate": 3.696109799831517e-06, "loss": 0.35645599365234376, "step": 98410 }, { "epoch": 0.8509654045360611, "grad_norm": 8.550634420457099, "learning_rate": 3.6959116255340337e-06, "loss": 0.26237640380859373, "step": 98415 }, { "epoch": 0.8510086380576043, "grad_norm": 0.29067799008292594, "learning_rate": 3.6957134480270675e-06, "loss": 0.38879547119140623, "step": 98420 }, { "epoch": 0.8510518715791476, "grad_norm": 5.81225058789922, "learning_rate": 3.695515267311532e-06, "loss": 0.08768310546875, "step": 98425 }, { "epoch": 0.8510951051006909, "grad_norm": 0.3192512138247596, "learning_rate": 3.695317083388342e-06, "loss": 0.1226318359375, "step": 98430 }, { "epoch": 0.8511383386222341, "grad_norm": 14.957171455047362, "learning_rate": 3.6951188962584106e-06, "loss": 0.1533203125, "step": 98435 }, { "epoch": 0.8511815721437774, "grad_norm": 8.428918835925025, "learning_rate": 3.6949207059226527e-06, "loss": 0.0625885009765625, "step": 98440 }, { "epoch": 0.8512248056653207, "grad_norm": 25.13960696998142, "learning_rate": 3.6947225123819813e-06, "loss": 0.14381561279296876, "step": 98445 }, { "epoch": 0.8512680391868639, "grad_norm": 10.689465232412115, "learning_rate": 3.694524315637312e-06, "loss": 0.02590179443359375, "step": 98450 }, { "epoch": 0.8513112727084072, "grad_norm": 0.16783770274359178, "learning_rate": 3.694326115689557e-06, "loss": 0.1537017822265625, "step": 98455 }, { "epoch": 0.8513545062299505, "grad_norm": 11.138694307817905, "learning_rate": 3.694127912539632e-06, "loss": 0.4480998992919922, "step": 98460 }, { "epoch": 0.8513977397514937, "grad_norm": 6.01878655786148, "learning_rate": 3.6939297061884507e-06, "loss": 0.028458404541015624, "step": 98465 }, { "epoch": 0.851440973273037, "grad_norm": 0.31791838979125664, "learning_rate": 3.693731496636925e-06, "loss": 0.36507415771484375, "step": 98470 }, { "epoch": 0.8514842067945803, "grad_norm": 1.9457736225148696, "learning_rate": 3.693533283885973e-06, "loss": 0.07357406616210938, "step": 98475 }, { "epoch": 0.8515274403161235, "grad_norm": 8.935511232320936, "learning_rate": 3.6933350679365064e-06, "loss": 0.1298297882080078, "step": 98480 }, { "epoch": 0.8515706738376668, "grad_norm": 22.224373360643824, "learning_rate": 3.6931368487894386e-06, "loss": 0.13030242919921875, "step": 98485 }, { "epoch": 0.85161390735921, "grad_norm": 9.057580371625887, "learning_rate": 3.6929386264456863e-06, "loss": 0.208740234375, "step": 98490 }, { "epoch": 0.8516571408807533, "grad_norm": 5.980475033678996, "learning_rate": 3.6927404009061608e-06, "loss": 0.23694305419921874, "step": 98495 }, { "epoch": 0.8517003744022965, "grad_norm": 2.523000474599271, "learning_rate": 3.692542172171779e-06, "loss": 0.15069046020507812, "step": 98500 }, { "epoch": 0.8517436079238399, "grad_norm": 2.6839354169623832, "learning_rate": 3.6923439402434537e-06, "loss": 0.05101966857910156, "step": 98505 }, { "epoch": 0.8517868414453831, "grad_norm": 3.626698556730894, "learning_rate": 3.6921457051220997e-06, "loss": 0.04661483764648437, "step": 98510 }, { "epoch": 0.8518300749669263, "grad_norm": 8.627841388824862, "learning_rate": 3.6919474668086304e-06, "loss": 0.17290802001953126, "step": 98515 }, { "epoch": 0.8518733084884696, "grad_norm": 1.4402340327241876, "learning_rate": 3.69174922530396e-06, "loss": 0.12243194580078125, "step": 98520 }, { "epoch": 0.8519165420100129, "grad_norm": 55.749552011723075, "learning_rate": 3.6915509806090036e-06, "loss": 0.2795360565185547, "step": 98525 }, { "epoch": 0.8519597755315561, "grad_norm": 34.35683262142587, "learning_rate": 3.6913527327246756e-06, "loss": 0.335882568359375, "step": 98530 }, { "epoch": 0.8520030090530994, "grad_norm": 29.026959738171588, "learning_rate": 3.691154481651889e-06, "loss": 0.22971878051757813, "step": 98535 }, { "epoch": 0.8520462425746427, "grad_norm": 1.5615815314556116, "learning_rate": 3.69095622739156e-06, "loss": 0.05148162841796875, "step": 98540 }, { "epoch": 0.8520894760961859, "grad_norm": 4.788251421370459, "learning_rate": 3.690757969944602e-06, "loss": 0.0918212890625, "step": 98545 }, { "epoch": 0.8521327096177292, "grad_norm": 18.203419577690152, "learning_rate": 3.690559709311929e-06, "loss": 0.3576194763183594, "step": 98550 }, { "epoch": 0.8521759431392725, "grad_norm": 31.115678090541035, "learning_rate": 3.6903614454944547e-06, "loss": 0.41876564025878904, "step": 98555 }, { "epoch": 0.8522191766608157, "grad_norm": 0.7505980013815255, "learning_rate": 3.690163178493096e-06, "loss": 0.13621139526367188, "step": 98560 }, { "epoch": 0.852262410182359, "grad_norm": 9.229848082754454, "learning_rate": 3.6899649083087647e-06, "loss": 0.17049026489257812, "step": 98565 }, { "epoch": 0.8523056437039023, "grad_norm": 0.08815284588288054, "learning_rate": 3.689766634942376e-06, "loss": 0.0338836669921875, "step": 98570 }, { "epoch": 0.8523488772254455, "grad_norm": 6.25535812928381, "learning_rate": 3.6895683583948453e-06, "loss": 0.05374755859375, "step": 98575 }, { "epoch": 0.8523921107469888, "grad_norm": 9.556368838561689, "learning_rate": 3.689370078667086e-06, "loss": 0.08656044006347656, "step": 98580 }, { "epoch": 0.8524353442685321, "grad_norm": 5.380898210969775, "learning_rate": 3.689171795760012e-06, "loss": 0.06418476104736329, "step": 98585 }, { "epoch": 0.8524785777900753, "grad_norm": 6.6328402176322365, "learning_rate": 3.6889735096745395e-06, "loss": 0.1583740234375, "step": 98590 }, { "epoch": 0.8525218113116185, "grad_norm": 3.434485206438253, "learning_rate": 3.6887752204115823e-06, "loss": 0.1222900390625, "step": 98595 }, { "epoch": 0.8525650448331619, "grad_norm": 6.5158125770247395, "learning_rate": 3.688576927972054e-06, "loss": 0.17619781494140624, "step": 98600 }, { "epoch": 0.8526082783547051, "grad_norm": 21.144287637147674, "learning_rate": 3.68837863235687e-06, "loss": 0.16438560485839843, "step": 98605 }, { "epoch": 0.8526515118762483, "grad_norm": 21.085473042001627, "learning_rate": 3.6881803335669444e-06, "loss": 0.24367408752441405, "step": 98610 }, { "epoch": 0.8526947453977917, "grad_norm": 18.0797308338346, "learning_rate": 3.687982031603192e-06, "loss": 0.329779052734375, "step": 98615 }, { "epoch": 0.8527379789193349, "grad_norm": 0.22424741225711534, "learning_rate": 3.6877837264665277e-06, "loss": 0.3094902038574219, "step": 98620 }, { "epoch": 0.8527812124408781, "grad_norm": 14.86398764809246, "learning_rate": 3.6875854181578652e-06, "loss": 0.16880111694335936, "step": 98625 }, { "epoch": 0.8528244459624215, "grad_norm": 13.85164178706647, "learning_rate": 3.687387106678119e-06, "loss": 0.193408203125, "step": 98630 }, { "epoch": 0.8528676794839647, "grad_norm": 0.31174518269516754, "learning_rate": 3.687188792028205e-06, "loss": 0.212347412109375, "step": 98635 }, { "epoch": 0.8529109130055079, "grad_norm": 18.141762723449354, "learning_rate": 3.6869904742090363e-06, "loss": 0.1145751953125, "step": 98640 }, { "epoch": 0.8529541465270513, "grad_norm": 27.559287152620566, "learning_rate": 3.6867921532215286e-06, "loss": 0.1735687255859375, "step": 98645 }, { "epoch": 0.8529973800485945, "grad_norm": 7.421777463748799, "learning_rate": 3.6865938290665966e-06, "loss": 0.1080657958984375, "step": 98650 }, { "epoch": 0.8530406135701377, "grad_norm": 3.96183962853183, "learning_rate": 3.6863955017451544e-06, "loss": 0.047747802734375, "step": 98655 }, { "epoch": 0.8530838470916811, "grad_norm": 26.57519486769007, "learning_rate": 3.6861971712581163e-06, "loss": 0.42315521240234377, "step": 98660 }, { "epoch": 0.8531270806132243, "grad_norm": 3.6758607905018112, "learning_rate": 3.685998837606398e-06, "loss": 0.1651123046875, "step": 98665 }, { "epoch": 0.8531703141347675, "grad_norm": 7.949925723183979, "learning_rate": 3.6858005007909133e-06, "loss": 0.04991912841796875, "step": 98670 }, { "epoch": 0.8532135476563107, "grad_norm": 25.89363796778883, "learning_rate": 3.6856021608125775e-06, "loss": 0.148614501953125, "step": 98675 }, { "epoch": 0.8532567811778541, "grad_norm": 2.586647121999476, "learning_rate": 3.685403817672305e-06, "loss": 0.15043144226074218, "step": 98680 }, { "epoch": 0.8533000146993973, "grad_norm": 4.315331818566206, "learning_rate": 3.685205471371011e-06, "loss": 0.11605987548828126, "step": 98685 }, { "epoch": 0.8533432482209405, "grad_norm": 0.9617398169927169, "learning_rate": 3.6850071219096094e-06, "loss": 0.250079345703125, "step": 98690 }, { "epoch": 0.8533864817424839, "grad_norm": 2.7240963710000936, "learning_rate": 3.684808769289015e-06, "loss": 0.07609710693359376, "step": 98695 }, { "epoch": 0.8534297152640271, "grad_norm": 0.07958511334653678, "learning_rate": 3.684610413510144e-06, "loss": 0.014470672607421875, "step": 98700 }, { "epoch": 0.8534729487855703, "grad_norm": 3.4663575432785674, "learning_rate": 3.6844120545739103e-06, "loss": 0.04725494384765625, "step": 98705 }, { "epoch": 0.8535161823071137, "grad_norm": 3.4309081266815307, "learning_rate": 3.684213692481228e-06, "loss": 0.28829498291015626, "step": 98710 }, { "epoch": 0.8535594158286569, "grad_norm": 5.775419972608558, "learning_rate": 3.6840153272330133e-06, "loss": 0.2072265625, "step": 98715 }, { "epoch": 0.8536026493502001, "grad_norm": 3.417676557472473, "learning_rate": 3.6838169588301796e-06, "loss": 0.0642852783203125, "step": 98720 }, { "epoch": 0.8536458828717435, "grad_norm": 2.9918632564916177, "learning_rate": 3.683618587273643e-06, "loss": 0.06486549377441406, "step": 98725 }, { "epoch": 0.8536891163932867, "grad_norm": 0.9314008322757823, "learning_rate": 3.6834202125643186e-06, "loss": 0.10314254760742188, "step": 98730 }, { "epoch": 0.8537323499148299, "grad_norm": 0.8643856679000873, "learning_rate": 3.68322183470312e-06, "loss": 0.131524658203125, "step": 98735 }, { "epoch": 0.8537755834363733, "grad_norm": 4.815767974165202, "learning_rate": 3.6830234536909616e-06, "loss": 0.08428230285644531, "step": 98740 }, { "epoch": 0.8538188169579165, "grad_norm": 23.148655538322785, "learning_rate": 3.6828250695287602e-06, "loss": 0.32778244018554686, "step": 98745 }, { "epoch": 0.8538620504794597, "grad_norm": 0.42139828022740516, "learning_rate": 3.6826266822174305e-06, "loss": 0.0667938232421875, "step": 98750 }, { "epoch": 0.8539052840010031, "grad_norm": 11.019291800241302, "learning_rate": 3.682428291757886e-06, "loss": 0.1278564453125, "step": 98755 }, { "epoch": 0.8539485175225463, "grad_norm": 2.3421000904893847, "learning_rate": 3.6822298981510432e-06, "loss": 0.2562877655029297, "step": 98760 }, { "epoch": 0.8539917510440895, "grad_norm": 7.11150994210121, "learning_rate": 3.6820315013978157e-06, "loss": 0.13678398132324218, "step": 98765 }, { "epoch": 0.8540349845656328, "grad_norm": 0.6733661241230862, "learning_rate": 3.68183310149912e-06, "loss": 0.03348236083984375, "step": 98770 }, { "epoch": 0.8540782180871761, "grad_norm": 0.8621588737610812, "learning_rate": 3.6816346984558697e-06, "loss": 0.204150390625, "step": 98775 }, { "epoch": 0.8541214516087193, "grad_norm": 6.250515131659706, "learning_rate": 3.6814362922689804e-06, "loss": 0.13179397583007812, "step": 98780 }, { "epoch": 0.8541646851302626, "grad_norm": 18.262820210611846, "learning_rate": 3.681237882939367e-06, "loss": 0.056536865234375, "step": 98785 }, { "epoch": 0.8542079186518059, "grad_norm": 0.3922200502828833, "learning_rate": 3.6810394704679453e-06, "loss": 0.10999374389648438, "step": 98790 }, { "epoch": 0.8542511521733491, "grad_norm": 6.890919245203244, "learning_rate": 3.6808410548556295e-06, "loss": 0.330401611328125, "step": 98795 }, { "epoch": 0.8542943856948924, "grad_norm": 1.0798941670382673, "learning_rate": 3.680642636103335e-06, "loss": 0.07179641723632812, "step": 98800 }, { "epoch": 0.8543376192164357, "grad_norm": 1.0484004599199666, "learning_rate": 3.680444214211976e-06, "loss": 0.041387939453125, "step": 98805 }, { "epoch": 0.8543808527379789, "grad_norm": 22.751899854631723, "learning_rate": 3.6802457891824686e-06, "loss": 0.11081295013427735, "step": 98810 }, { "epoch": 0.8544240862595222, "grad_norm": 0.5414337712361392, "learning_rate": 3.680047361015729e-06, "loss": 0.06972541809082031, "step": 98815 }, { "epoch": 0.8544673197810655, "grad_norm": 25.951458618633964, "learning_rate": 3.6798489297126692e-06, "loss": 0.15065536499023438, "step": 98820 }, { "epoch": 0.8545105533026087, "grad_norm": 0.36077596991008337, "learning_rate": 3.6796504952742076e-06, "loss": 0.08226509094238281, "step": 98825 }, { "epoch": 0.854553786824152, "grad_norm": 4.239811736962412, "learning_rate": 3.6794520577012583e-06, "loss": 0.05037384033203125, "step": 98830 }, { "epoch": 0.8545970203456953, "grad_norm": 0.3725042223218349, "learning_rate": 3.6792536169947347e-06, "loss": 0.11169967651367188, "step": 98835 }, { "epoch": 0.8546402538672385, "grad_norm": 4.0199741436473095, "learning_rate": 3.679055173155554e-06, "loss": 0.38118896484375, "step": 98840 }, { "epoch": 0.8546834873887817, "grad_norm": 6.74193937111747, "learning_rate": 3.6788567261846316e-06, "loss": 0.04378833770751953, "step": 98845 }, { "epoch": 0.854726720910325, "grad_norm": 2.8348652565855748, "learning_rate": 3.678658276082881e-06, "loss": 0.22269287109375, "step": 98850 }, { "epoch": 0.8547699544318683, "grad_norm": 0.16905879172329377, "learning_rate": 3.6784598228512187e-06, "loss": 0.042205810546875, "step": 98855 }, { "epoch": 0.8548131879534115, "grad_norm": 2.813041699329864, "learning_rate": 3.67826136649056e-06, "loss": 0.10533599853515625, "step": 98860 }, { "epoch": 0.8548564214749548, "grad_norm": 28.911326301850906, "learning_rate": 3.6780629070018196e-06, "loss": 0.3079845428466797, "step": 98865 }, { "epoch": 0.8548996549964981, "grad_norm": 1.8877517567123188, "learning_rate": 3.6778644443859126e-06, "loss": 0.057201385498046875, "step": 98870 }, { "epoch": 0.8549428885180413, "grad_norm": 53.129655468738854, "learning_rate": 3.677665978643755e-06, "loss": 0.4449270248413086, "step": 98875 }, { "epoch": 0.8549861220395846, "grad_norm": 3.047038315440402, "learning_rate": 3.677467509776262e-06, "loss": 0.10641937255859375, "step": 98880 }, { "epoch": 0.8550293555611279, "grad_norm": 6.813215444768657, "learning_rate": 3.677269037784348e-06, "loss": 0.027420234680175782, "step": 98885 }, { "epoch": 0.8550725890826711, "grad_norm": 1.0743030871171564, "learning_rate": 3.6770705626689295e-06, "loss": 0.08173828125, "step": 98890 }, { "epoch": 0.8551158226042144, "grad_norm": 23.874010489715296, "learning_rate": 3.676872084430922e-06, "loss": 0.08010711669921874, "step": 98895 }, { "epoch": 0.8551590561257577, "grad_norm": 2.3125163730984877, "learning_rate": 3.676673603071239e-06, "loss": 0.02951202392578125, "step": 98900 }, { "epoch": 0.8552022896473009, "grad_norm": 4.453597947133364, "learning_rate": 3.676475118590798e-06, "loss": 0.07342629432678223, "step": 98905 }, { "epoch": 0.8552455231688442, "grad_norm": 3.2564941262145006, "learning_rate": 3.6762766309905135e-06, "loss": 0.08145675659179688, "step": 98910 }, { "epoch": 0.8552887566903875, "grad_norm": 23.730544731652493, "learning_rate": 3.6760781402713006e-06, "loss": 0.06605300903320313, "step": 98915 }, { "epoch": 0.8553319902119307, "grad_norm": 1.090065093191987, "learning_rate": 3.675879646434075e-06, "loss": 0.36146087646484376, "step": 98920 }, { "epoch": 0.855375223733474, "grad_norm": 10.312489351563903, "learning_rate": 3.675681149479753e-06, "loss": 0.11201858520507812, "step": 98925 }, { "epoch": 0.8554184572550172, "grad_norm": 12.573974216833356, "learning_rate": 3.675482649409249e-06, "loss": 0.2823360443115234, "step": 98930 }, { "epoch": 0.8554616907765605, "grad_norm": 1.6079791771313028, "learning_rate": 3.6752841462234774e-06, "loss": 0.0247833251953125, "step": 98935 }, { "epoch": 0.8555049242981038, "grad_norm": 8.89655361032651, "learning_rate": 3.6750856399233564e-06, "loss": 0.025156402587890626, "step": 98940 }, { "epoch": 0.855548157819647, "grad_norm": 57.254409755536216, "learning_rate": 3.6748871305097993e-06, "loss": 0.14617595672607422, "step": 98945 }, { "epoch": 0.8555913913411903, "grad_norm": 24.066251136638137, "learning_rate": 3.6746886179837223e-06, "loss": 0.1079620361328125, "step": 98950 }, { "epoch": 0.8556346248627336, "grad_norm": 17.84392838301263, "learning_rate": 3.6744901023460418e-06, "loss": 0.13344459533691405, "step": 98955 }, { "epoch": 0.8556778583842768, "grad_norm": 6.985410662205238, "learning_rate": 3.6742915835976726e-06, "loss": 0.12972145080566405, "step": 98960 }, { "epoch": 0.8557210919058201, "grad_norm": 5.366131683905709, "learning_rate": 3.6740930617395295e-06, "loss": 0.13920936584472657, "step": 98965 }, { "epoch": 0.8557643254273634, "grad_norm": 0.31687465054652936, "learning_rate": 3.6738945367725294e-06, "loss": 0.3550224304199219, "step": 98970 }, { "epoch": 0.8558075589489066, "grad_norm": 27.52181472328777, "learning_rate": 3.6736960086975867e-06, "loss": 0.29616241455078124, "step": 98975 }, { "epoch": 0.8558507924704499, "grad_norm": 26.848187280214148, "learning_rate": 3.673497477515617e-06, "loss": 0.19516983032226562, "step": 98980 }, { "epoch": 0.8558940259919932, "grad_norm": 42.072813918373214, "learning_rate": 3.6732989432275377e-06, "loss": 0.3467041015625, "step": 98985 }, { "epoch": 0.8559372595135364, "grad_norm": 0.08356852390765081, "learning_rate": 3.6731004058342628e-06, "loss": 0.0117584228515625, "step": 98990 }, { "epoch": 0.8559804930350797, "grad_norm": 3.3533414904273986, "learning_rate": 3.6729018653367078e-06, "loss": 0.05022125244140625, "step": 98995 }, { "epoch": 0.856023726556623, "grad_norm": 15.333544668656728, "learning_rate": 3.6727033217357897e-06, "loss": 0.1268228530883789, "step": 99000 }, { "epoch": 0.8560669600781662, "grad_norm": 20.207091272422936, "learning_rate": 3.672504775032422e-06, "loss": 0.09752197265625, "step": 99005 }, { "epoch": 0.8561101935997095, "grad_norm": 15.288588601639152, "learning_rate": 3.672306225227522e-06, "loss": 0.15885772705078124, "step": 99010 }, { "epoch": 0.8561534271212528, "grad_norm": 27.391649175542348, "learning_rate": 3.672107672322006e-06, "loss": 0.3474620819091797, "step": 99015 }, { "epoch": 0.856196660642796, "grad_norm": 6.294924580988004, "learning_rate": 3.671909116316789e-06, "loss": 0.06078033447265625, "step": 99020 }, { "epoch": 0.8562398941643392, "grad_norm": 2.737170781366495, "learning_rate": 3.671710557212786e-06, "loss": 0.36162109375, "step": 99025 }, { "epoch": 0.8562831276858826, "grad_norm": 43.24919600930568, "learning_rate": 3.671511995010913e-06, "loss": 0.1570465087890625, "step": 99030 }, { "epoch": 0.8563263612074258, "grad_norm": 70.41370954885542, "learning_rate": 3.671313429712086e-06, "loss": 0.3712249755859375, "step": 99035 }, { "epoch": 0.856369594728969, "grad_norm": 20.650443058762683, "learning_rate": 3.671114861317221e-06, "loss": 0.31957321166992186, "step": 99040 }, { "epoch": 0.8564128282505123, "grad_norm": 50.94929442094642, "learning_rate": 3.670916289827233e-06, "loss": 0.24777793884277344, "step": 99045 }, { "epoch": 0.8564560617720556, "grad_norm": 0.8412900380582976, "learning_rate": 3.67071771524304e-06, "loss": 0.15097808837890625, "step": 99050 }, { "epoch": 0.8564992952935988, "grad_norm": 0.48477835875970837, "learning_rate": 3.670519137565555e-06, "loss": 0.2601646423339844, "step": 99055 }, { "epoch": 0.8565425288151421, "grad_norm": 9.795771058642027, "learning_rate": 3.6703205567956946e-06, "loss": 0.21046714782714843, "step": 99060 }, { "epoch": 0.8565857623366854, "grad_norm": 8.240620085541181, "learning_rate": 3.6701219729343755e-06, "loss": 0.11878242492675781, "step": 99065 }, { "epoch": 0.8566289958582286, "grad_norm": 3.272311606379228, "learning_rate": 3.6699233859825136e-06, "loss": 0.03701095581054688, "step": 99070 }, { "epoch": 0.8566722293797719, "grad_norm": 1.454364823903692, "learning_rate": 3.6697247959410227e-06, "loss": 0.08014907836914062, "step": 99075 }, { "epoch": 0.8567154629013152, "grad_norm": 9.600729379418995, "learning_rate": 3.669526202810822e-06, "loss": 0.42503814697265624, "step": 99080 }, { "epoch": 0.8567586964228584, "grad_norm": 3.4352893634018273, "learning_rate": 3.6693276065928245e-06, "loss": 0.0980804443359375, "step": 99085 }, { "epoch": 0.8568019299444017, "grad_norm": 4.053549558637871, "learning_rate": 3.6691290072879468e-06, "loss": 0.23173980712890624, "step": 99090 }, { "epoch": 0.856845163465945, "grad_norm": 0.588370227983191, "learning_rate": 3.6689304048971065e-06, "loss": 0.055294036865234375, "step": 99095 }, { "epoch": 0.8568883969874882, "grad_norm": 1.1995203558803227, "learning_rate": 3.668731799421218e-06, "loss": 0.0954010009765625, "step": 99100 }, { "epoch": 0.8569316305090314, "grad_norm": 26.436796624428997, "learning_rate": 3.668533190861198e-06, "loss": 0.46330108642578127, "step": 99105 }, { "epoch": 0.8569748640305748, "grad_norm": 2.8340218079772654, "learning_rate": 3.668334579217961e-06, "loss": 0.05623435974121094, "step": 99110 }, { "epoch": 0.857018097552118, "grad_norm": 2.7584437120702345, "learning_rate": 3.668135964492423e-06, "loss": 0.09141807556152344, "step": 99115 }, { "epoch": 0.8570613310736612, "grad_norm": 21.083642554296954, "learning_rate": 3.667937346685503e-06, "loss": 0.27501220703125, "step": 99120 }, { "epoch": 0.8571045645952046, "grad_norm": 8.631389647780665, "learning_rate": 3.667738725798114e-06, "loss": 0.0971588134765625, "step": 99125 }, { "epoch": 0.8571477981167478, "grad_norm": 32.98489135194454, "learning_rate": 3.6675401018311737e-06, "loss": 0.1646759033203125, "step": 99130 }, { "epoch": 0.857191031638291, "grad_norm": 3.250793023018658, "learning_rate": 3.667341474785597e-06, "loss": 0.22598876953125, "step": 99135 }, { "epoch": 0.8572342651598344, "grad_norm": 3.5182891685549484, "learning_rate": 3.6671428446622997e-06, "loss": 0.036449432373046875, "step": 99140 }, { "epoch": 0.8572774986813776, "grad_norm": 16.366779189300644, "learning_rate": 3.666944211462199e-06, "loss": 0.1607513427734375, "step": 99145 }, { "epoch": 0.8573207322029208, "grad_norm": 8.279204184531954, "learning_rate": 3.6667455751862107e-06, "loss": 0.08684120178222657, "step": 99150 }, { "epoch": 0.8573639657244642, "grad_norm": 1.4958286774927, "learning_rate": 3.6665469358352512e-06, "loss": 0.19161911010742189, "step": 99155 }, { "epoch": 0.8574071992460074, "grad_norm": 0.9334455152839242, "learning_rate": 3.6663482934102355e-06, "loss": 0.08244476318359376, "step": 99160 }, { "epoch": 0.8574504327675506, "grad_norm": 7.136629341605849, "learning_rate": 3.666149647912081e-06, "loss": 0.1847076416015625, "step": 99165 }, { "epoch": 0.857493666289094, "grad_norm": 30.15562978931556, "learning_rate": 3.6659509993417024e-06, "loss": 0.10137100219726562, "step": 99170 }, { "epoch": 0.8575368998106372, "grad_norm": 27.907608555392567, "learning_rate": 3.6657523477000166e-06, "loss": 0.16825103759765625, "step": 99175 }, { "epoch": 0.8575801333321804, "grad_norm": 1.3339687045644053, "learning_rate": 3.6655536929879405e-06, "loss": 0.0623291015625, "step": 99180 }, { "epoch": 0.8576233668537238, "grad_norm": 5.9211805655533025, "learning_rate": 3.665355035206389e-06, "loss": 0.06888580322265625, "step": 99185 }, { "epoch": 0.857666600375267, "grad_norm": 3.0041386860139574, "learning_rate": 3.6651563743562794e-06, "loss": 0.20275421142578126, "step": 99190 }, { "epoch": 0.8577098338968102, "grad_norm": 0.5207962462396575, "learning_rate": 3.6649577104385273e-06, "loss": 0.20257844924926757, "step": 99195 }, { "epoch": 0.8577530674183534, "grad_norm": 0.2516232953280649, "learning_rate": 3.664759043454048e-06, "loss": 0.05768518447875977, "step": 99200 }, { "epoch": 0.8577963009398968, "grad_norm": 0.9950527713967753, "learning_rate": 3.66456037340376e-06, "loss": 0.14402999877929687, "step": 99205 }, { "epoch": 0.85783953446144, "grad_norm": 1.8471504236592848, "learning_rate": 3.6643617002885778e-06, "loss": 0.1586456298828125, "step": 99210 }, { "epoch": 0.8578827679829832, "grad_norm": 0.5223477341627653, "learning_rate": 3.664163024109418e-06, "loss": 0.0945159912109375, "step": 99215 }, { "epoch": 0.8579260015045266, "grad_norm": 35.19849718992371, "learning_rate": 3.663964344867197e-06, "loss": 0.243402099609375, "step": 99220 }, { "epoch": 0.8579692350260698, "grad_norm": 38.59354673435668, "learning_rate": 3.6637656625628315e-06, "loss": 0.2334991455078125, "step": 99225 }, { "epoch": 0.858012468547613, "grad_norm": 7.3929082735105665, "learning_rate": 3.6635669771972366e-06, "loss": 0.04127044677734375, "step": 99230 }, { "epoch": 0.8580557020691564, "grad_norm": 1.1007523387480944, "learning_rate": 3.66336828877133e-06, "loss": 0.1679656982421875, "step": 99235 }, { "epoch": 0.8580989355906996, "grad_norm": 3.643170004277672, "learning_rate": 3.6631695972860274e-06, "loss": 0.0246246337890625, "step": 99240 }, { "epoch": 0.8581421691122428, "grad_norm": 1.9452523268979343, "learning_rate": 3.6629709027422457e-06, "loss": 0.14178466796875, "step": 99245 }, { "epoch": 0.8581854026337862, "grad_norm": 32.86362806387038, "learning_rate": 3.6627722051409e-06, "loss": 0.261517333984375, "step": 99250 }, { "epoch": 0.8582286361553294, "grad_norm": 0.7703275682591452, "learning_rate": 3.6625735044829082e-06, "loss": 0.12191696166992187, "step": 99255 }, { "epoch": 0.8582718696768726, "grad_norm": 25.483092038977396, "learning_rate": 3.662374800769185e-06, "loss": 0.09503173828125, "step": 99260 }, { "epoch": 0.858315103198416, "grad_norm": 18.192027541783485, "learning_rate": 3.662176094000648e-06, "loss": 0.25516719818115235, "step": 99265 }, { "epoch": 0.8583583367199592, "grad_norm": 8.527434728690032, "learning_rate": 3.6619773841782144e-06, "loss": 0.04538803100585938, "step": 99270 }, { "epoch": 0.8584015702415024, "grad_norm": 0.8355854308508437, "learning_rate": 3.6617786713027987e-06, "loss": 0.04853515625, "step": 99275 }, { "epoch": 0.8584448037630457, "grad_norm": 110.3929210995476, "learning_rate": 3.661579955375319e-06, "loss": 0.1819122314453125, "step": 99280 }, { "epoch": 0.858488037284589, "grad_norm": 9.410015913955863, "learning_rate": 3.66138123639669e-06, "loss": 0.082489013671875, "step": 99285 }, { "epoch": 0.8585312708061322, "grad_norm": 9.47235902100779, "learning_rate": 3.6611825143678292e-06, "loss": 0.3391548156738281, "step": 99290 }, { "epoch": 0.8585745043276755, "grad_norm": 0.11002310603149053, "learning_rate": 3.6609837892896536e-06, "loss": 0.0598480224609375, "step": 99295 }, { "epoch": 0.8586177378492188, "grad_norm": 15.507728753246457, "learning_rate": 3.660785061163079e-06, "loss": 0.1281494140625, "step": 99300 }, { "epoch": 0.858660971370762, "grad_norm": 0.34113547725640125, "learning_rate": 3.660586329989022e-06, "loss": 0.2217620849609375, "step": 99305 }, { "epoch": 0.8587042048923053, "grad_norm": 16.398896072661255, "learning_rate": 3.6603875957684e-06, "loss": 0.1522045135498047, "step": 99310 }, { "epoch": 0.8587474384138486, "grad_norm": 13.504320972231117, "learning_rate": 3.6601888585021272e-06, "loss": 0.28764801025390624, "step": 99315 }, { "epoch": 0.8587906719353918, "grad_norm": 38.679119840096035, "learning_rate": 3.659990118191123e-06, "loss": 0.17983360290527345, "step": 99320 }, { "epoch": 0.858833905456935, "grad_norm": 1.9538593163382199, "learning_rate": 3.6597913748363022e-06, "loss": 0.04821720123291016, "step": 99325 }, { "epoch": 0.8588771389784784, "grad_norm": 9.89396857096615, "learning_rate": 3.6595926284385822e-06, "loss": 0.129150390625, "step": 99330 }, { "epoch": 0.8589203725000216, "grad_norm": 12.536513574160107, "learning_rate": 3.659393878998879e-06, "loss": 0.04070892333984375, "step": 99335 }, { "epoch": 0.8589636060215649, "grad_norm": 1.815764469722617, "learning_rate": 3.6591951265181092e-06, "loss": 0.09800567626953124, "step": 99340 }, { "epoch": 0.8590068395431082, "grad_norm": 0.8619834906144961, "learning_rate": 3.65899637099719e-06, "loss": 0.09238624572753906, "step": 99345 }, { "epoch": 0.8590500730646514, "grad_norm": 7.442531606578804, "learning_rate": 3.6587976124370375e-06, "loss": 0.1391357421875, "step": 99350 }, { "epoch": 0.8590933065861946, "grad_norm": 1.5001620729477008, "learning_rate": 3.6585988508385697e-06, "loss": 0.1829925537109375, "step": 99355 }, { "epoch": 0.859136540107738, "grad_norm": 29.101672795548144, "learning_rate": 3.658400086202701e-06, "loss": 0.09498367309570313, "step": 99360 }, { "epoch": 0.8591797736292812, "grad_norm": 0.368321566786785, "learning_rate": 3.6582013185303497e-06, "loss": 0.23197021484375, "step": 99365 }, { "epoch": 0.8592230071508244, "grad_norm": 0.5456737177494839, "learning_rate": 3.658002547822432e-06, "loss": 0.14669303894042968, "step": 99370 }, { "epoch": 0.8592662406723677, "grad_norm": 25.053021613662708, "learning_rate": 3.6578037740798653e-06, "loss": 0.11493358612060547, "step": 99375 }, { "epoch": 0.859309474193911, "grad_norm": 2.709584136693148, "learning_rate": 3.6576049973035645e-06, "loss": 0.09907493591308594, "step": 99380 }, { "epoch": 0.8593527077154542, "grad_norm": 1.5602185677311757, "learning_rate": 3.657406217494449e-06, "loss": 0.12145462036132812, "step": 99385 }, { "epoch": 0.8593959412369975, "grad_norm": 0.3207521143933478, "learning_rate": 3.657207434653434e-06, "loss": 0.20123138427734374, "step": 99390 }, { "epoch": 0.8594391747585408, "grad_norm": 1.755650193471016, "learning_rate": 3.6570086487814352e-06, "loss": 0.0502471923828125, "step": 99395 }, { "epoch": 0.859482408280084, "grad_norm": 0.07889286899929313, "learning_rate": 3.6568098598793707e-06, "loss": 0.07590370178222657, "step": 99400 }, { "epoch": 0.8595256418016273, "grad_norm": 9.229003581419182, "learning_rate": 3.6566110679481575e-06, "loss": 0.303956413269043, "step": 99405 }, { "epoch": 0.8595688753231706, "grad_norm": 0.9015580820525648, "learning_rate": 3.6564122729887122e-06, "loss": 0.09214324951171875, "step": 99410 }, { "epoch": 0.8596121088447138, "grad_norm": 7.906206975113359, "learning_rate": 3.656213475001952e-06, "loss": 0.05262451171875, "step": 99415 }, { "epoch": 0.8596553423662571, "grad_norm": 1.0902519079891728, "learning_rate": 3.6560146739887932e-06, "loss": 0.1063232421875, "step": 99420 }, { "epoch": 0.8596985758878004, "grad_norm": 3.6361194465081796, "learning_rate": 3.6558158699501517e-06, "loss": 0.09023818969726563, "step": 99425 }, { "epoch": 0.8597418094093436, "grad_norm": 11.48967480628369, "learning_rate": 3.655617062886945e-06, "loss": 0.10335693359375, "step": 99430 }, { "epoch": 0.8597850429308869, "grad_norm": 18.927351836591594, "learning_rate": 3.655418252800092e-06, "loss": 0.191864013671875, "step": 99435 }, { "epoch": 0.8598282764524302, "grad_norm": 21.479857932371942, "learning_rate": 3.6552194396905064e-06, "loss": 0.08702545166015625, "step": 99440 }, { "epoch": 0.8598715099739734, "grad_norm": 2.0133833295276746, "learning_rate": 3.6550206235591075e-06, "loss": 0.04880599975585938, "step": 99445 }, { "epoch": 0.8599147434955167, "grad_norm": 6.6851558737274175, "learning_rate": 3.654821804406812e-06, "loss": 0.3411407470703125, "step": 99450 }, { "epoch": 0.8599579770170599, "grad_norm": 0.9644486943865191, "learning_rate": 3.6546229822345352e-06, "loss": 0.21787109375, "step": 99455 }, { "epoch": 0.8600012105386032, "grad_norm": 16.45283406623162, "learning_rate": 3.6544241570431953e-06, "loss": 0.12417449951171874, "step": 99460 }, { "epoch": 0.8600444440601465, "grad_norm": 23.97777188761897, "learning_rate": 3.654225328833709e-06, "loss": 0.52169189453125, "step": 99465 }, { "epoch": 0.8600876775816897, "grad_norm": 7.169417066687929, "learning_rate": 3.654026497606993e-06, "loss": 0.23518447875976561, "step": 99470 }, { "epoch": 0.860130911103233, "grad_norm": 0.7531157121677302, "learning_rate": 3.6538276633639647e-06, "loss": 0.05797576904296875, "step": 99475 }, { "epoch": 0.8601741446247763, "grad_norm": 27.63157239708744, "learning_rate": 3.653628826105542e-06, "loss": 0.24141082763671876, "step": 99480 }, { "epoch": 0.8602173781463195, "grad_norm": 29.644215308494775, "learning_rate": 3.6534299858326396e-06, "loss": 0.18251953125, "step": 99485 }, { "epoch": 0.8602606116678628, "grad_norm": 3.5916039009704575, "learning_rate": 3.653231142546176e-06, "loss": 0.21141433715820312, "step": 99490 }, { "epoch": 0.8603038451894061, "grad_norm": 12.526757980357528, "learning_rate": 3.6530322962470688e-06, "loss": 0.11859893798828125, "step": 99495 }, { "epoch": 0.8603470787109493, "grad_norm": 4.222157594009258, "learning_rate": 3.652833446936234e-06, "loss": 0.0663442611694336, "step": 99500 }, { "epoch": 0.8603903122324926, "grad_norm": 40.681168692359115, "learning_rate": 3.6526345946145884e-06, "loss": 0.4327831268310547, "step": 99505 }, { "epoch": 0.8604335457540359, "grad_norm": 8.638741711758502, "learning_rate": 3.652435739283051e-06, "loss": 0.11646728515625, "step": 99510 }, { "epoch": 0.8604767792755791, "grad_norm": 5.151311833746438, "learning_rate": 3.6522368809425367e-06, "loss": 0.185968017578125, "step": 99515 }, { "epoch": 0.8605200127971224, "grad_norm": 10.424053735385435, "learning_rate": 3.6520380195939635e-06, "loss": 0.048928451538085935, "step": 99520 }, { "epoch": 0.8605632463186657, "grad_norm": 4.235309007764313, "learning_rate": 3.6518391552382485e-06, "loss": 0.0776824951171875, "step": 99525 }, { "epoch": 0.8606064798402089, "grad_norm": 14.802409288364238, "learning_rate": 3.65164028787631e-06, "loss": 0.21904296875, "step": 99530 }, { "epoch": 0.8606497133617522, "grad_norm": 3.707028689150336, "learning_rate": 3.6514414175090627e-06, "loss": 0.03556137084960938, "step": 99535 }, { "epoch": 0.8606929468832955, "grad_norm": 15.919944475921046, "learning_rate": 3.651242544137425e-06, "loss": 0.09430770874023438, "step": 99540 }, { "epoch": 0.8607361804048387, "grad_norm": 2.0503470774103376, "learning_rate": 3.6510436677623156e-06, "loss": 0.2592315673828125, "step": 99545 }, { "epoch": 0.8607794139263819, "grad_norm": 29.84752965794069, "learning_rate": 3.650844788384651e-06, "loss": 0.1847808837890625, "step": 99550 }, { "epoch": 0.8608226474479252, "grad_norm": 12.762517439957923, "learning_rate": 3.6506459060053453e-06, "loss": 0.21710205078125, "step": 99555 }, { "epoch": 0.8608658809694685, "grad_norm": 29.38263023821441, "learning_rate": 3.6504470206253197e-06, "loss": 0.1059722900390625, "step": 99560 }, { "epoch": 0.8609091144910117, "grad_norm": 1.410233990834089, "learning_rate": 3.6502481322454895e-06, "loss": 0.254730224609375, "step": 99565 }, { "epoch": 0.860952348012555, "grad_norm": 7.416859822449647, "learning_rate": 3.650049240866772e-06, "loss": 0.18867645263671876, "step": 99570 }, { "epoch": 0.8609955815340983, "grad_norm": 15.897361955734564, "learning_rate": 3.6498503464900853e-06, "loss": 0.1008209228515625, "step": 99575 }, { "epoch": 0.8610388150556415, "grad_norm": 6.500853687107848, "learning_rate": 3.649651449116347e-06, "loss": 0.2219970703125, "step": 99580 }, { "epoch": 0.8610820485771848, "grad_norm": 3.7467940285778014, "learning_rate": 3.649452548746472e-06, "loss": 0.2094867706298828, "step": 99585 }, { "epoch": 0.8611252820987281, "grad_norm": 51.746631594283045, "learning_rate": 3.6492536453813802e-06, "loss": 0.2053985595703125, "step": 99590 }, { "epoch": 0.8611685156202713, "grad_norm": 2.8903407877482956, "learning_rate": 3.649054739021988e-06, "loss": 0.197137451171875, "step": 99595 }, { "epoch": 0.8612117491418146, "grad_norm": 3.321227667252735, "learning_rate": 3.6488558296692124e-06, "loss": 0.01994171142578125, "step": 99600 }, { "epoch": 0.8612549826633579, "grad_norm": 35.990305703027644, "learning_rate": 3.6486569173239713e-06, "loss": 0.2622314453125, "step": 99605 }, { "epoch": 0.8612982161849011, "grad_norm": 13.041107368136611, "learning_rate": 3.6484580019871816e-06, "loss": 0.1689910888671875, "step": 99610 }, { "epoch": 0.8613414497064444, "grad_norm": 1.9049874842443209, "learning_rate": 3.6482590836597605e-06, "loss": 0.035695648193359374, "step": 99615 }, { "epoch": 0.8613846832279877, "grad_norm": 6.796419728011115, "learning_rate": 3.648060162342627e-06, "loss": 0.1266357421875, "step": 99620 }, { "epoch": 0.8614279167495309, "grad_norm": 23.761488134853217, "learning_rate": 3.647861238036696e-06, "loss": 0.1608428955078125, "step": 99625 }, { "epoch": 0.8614711502710741, "grad_norm": 1.3676611431727148, "learning_rate": 3.6476623107428874e-06, "loss": 0.18073348999023436, "step": 99630 }, { "epoch": 0.8615143837926175, "grad_norm": 3.099645546423644, "learning_rate": 3.647463380462116e-06, "loss": 0.0823944091796875, "step": 99635 }, { "epoch": 0.8615576173141607, "grad_norm": 3.5958369594328983, "learning_rate": 3.6472644471953018e-06, "loss": 0.05441780090332031, "step": 99640 }, { "epoch": 0.8616008508357039, "grad_norm": 3.51539130589177, "learning_rate": 3.6470655109433616e-06, "loss": 0.14817543029785157, "step": 99645 }, { "epoch": 0.8616440843572473, "grad_norm": 0.3135240355017921, "learning_rate": 3.646866571707211e-06, "loss": 0.0754638671875, "step": 99650 }, { "epoch": 0.8616873178787905, "grad_norm": 21.45025811546398, "learning_rate": 3.6466676294877694e-06, "loss": 0.3444873809814453, "step": 99655 }, { "epoch": 0.8617305514003337, "grad_norm": 12.43148205836113, "learning_rate": 3.646468684285954e-06, "loss": 0.4046875, "step": 99660 }, { "epoch": 0.8617737849218771, "grad_norm": 25.28719526266654, "learning_rate": 3.6462697361026825e-06, "loss": 0.05851593017578125, "step": 99665 }, { "epoch": 0.8618170184434203, "grad_norm": 9.81638371293277, "learning_rate": 3.646070784938872e-06, "loss": 0.16956405639648436, "step": 99670 }, { "epoch": 0.8618602519649635, "grad_norm": 15.08172657336606, "learning_rate": 3.64587183079544e-06, "loss": 0.15782661437988282, "step": 99675 }, { "epoch": 0.8619034854865069, "grad_norm": 25.13725554576609, "learning_rate": 3.6456728736733042e-06, "loss": 0.1618183135986328, "step": 99680 }, { "epoch": 0.8619467190080501, "grad_norm": 14.728074456784174, "learning_rate": 3.6454739135733814e-06, "loss": 0.13233489990234376, "step": 99685 }, { "epoch": 0.8619899525295933, "grad_norm": 2.4383460607752356, "learning_rate": 3.645274950496591e-06, "loss": 0.024000930786132812, "step": 99690 }, { "epoch": 0.8620331860511367, "grad_norm": 5.248634010660204, "learning_rate": 3.6450759844438484e-06, "loss": 0.09850921630859374, "step": 99695 }, { "epoch": 0.8620764195726799, "grad_norm": 10.10244537378734, "learning_rate": 3.6448770154160734e-06, "loss": 0.091796875, "step": 99700 }, { "epoch": 0.8621196530942231, "grad_norm": 43.10709106681292, "learning_rate": 3.6446780434141827e-06, "loss": 0.3607738494873047, "step": 99705 }, { "epoch": 0.8621628866157665, "grad_norm": 0.7432284090386523, "learning_rate": 3.6444790684390933e-06, "loss": 0.020954322814941407, "step": 99710 }, { "epoch": 0.8622061201373097, "grad_norm": 9.171615904578854, "learning_rate": 3.6442800904917238e-06, "loss": 0.0830780029296875, "step": 99715 }, { "epoch": 0.8622493536588529, "grad_norm": 2.5758230895969882, "learning_rate": 3.644081109572991e-06, "loss": 0.219354248046875, "step": 99720 }, { "epoch": 0.8622925871803961, "grad_norm": 1.3049775887051243, "learning_rate": 3.6438821256838133e-06, "loss": 0.20632171630859375, "step": 99725 }, { "epoch": 0.8623358207019395, "grad_norm": 15.385583899462654, "learning_rate": 3.6436831388251083e-06, "loss": 0.21878433227539062, "step": 99730 }, { "epoch": 0.8623790542234827, "grad_norm": 26.375990232911068, "learning_rate": 3.643484148997793e-06, "loss": 0.3064178466796875, "step": 99735 }, { "epoch": 0.8624222877450259, "grad_norm": 3.6515474431004904, "learning_rate": 3.643285156202786e-06, "loss": 0.042083740234375, "step": 99740 }, { "epoch": 0.8624655212665693, "grad_norm": 26.65143715441742, "learning_rate": 3.6430861604410044e-06, "loss": 0.4100013732910156, "step": 99745 }, { "epoch": 0.8625087547881125, "grad_norm": 12.715446168791106, "learning_rate": 3.6428871617133667e-06, "loss": 0.11178970336914062, "step": 99750 }, { "epoch": 0.8625519883096557, "grad_norm": 0.1708186765453017, "learning_rate": 3.64268816002079e-06, "loss": 0.07411346435546876, "step": 99755 }, { "epoch": 0.8625952218311991, "grad_norm": 21.73104011491802, "learning_rate": 3.642489155364192e-06, "loss": 0.12313232421875, "step": 99760 }, { "epoch": 0.8626384553527423, "grad_norm": 1.768734676662629, "learning_rate": 3.6422901477444913e-06, "loss": 0.07135200500488281, "step": 99765 }, { "epoch": 0.8626816888742855, "grad_norm": 14.179053785617953, "learning_rate": 3.6420911371626047e-06, "loss": 0.448724365234375, "step": 99770 }, { "epoch": 0.8627249223958289, "grad_norm": 3.8452224456455597, "learning_rate": 3.641892123619451e-06, "loss": 0.08869132995605469, "step": 99775 }, { "epoch": 0.8627681559173721, "grad_norm": 0.7826457563939678, "learning_rate": 3.6416931071159473e-06, "loss": 0.16040878295898436, "step": 99780 }, { "epoch": 0.8628113894389153, "grad_norm": 0.4600114274552123, "learning_rate": 3.641494087653012e-06, "loss": 0.05609474182128906, "step": 99785 }, { "epoch": 0.8628546229604587, "grad_norm": 8.944515578217729, "learning_rate": 3.6412950652315623e-06, "loss": 0.043231201171875, "step": 99790 }, { "epoch": 0.8628978564820019, "grad_norm": 9.70167392211567, "learning_rate": 3.641096039852516e-06, "loss": 0.1851957321166992, "step": 99795 }, { "epoch": 0.8629410900035451, "grad_norm": 1.2424646468749416, "learning_rate": 3.6408970115167926e-06, "loss": 0.07832298278808594, "step": 99800 }, { "epoch": 0.8629843235250884, "grad_norm": 4.25813785394158, "learning_rate": 3.640697980225308e-06, "loss": 0.0501129150390625, "step": 99805 }, { "epoch": 0.8630275570466317, "grad_norm": 47.485650350375685, "learning_rate": 3.640498945978981e-06, "loss": 0.1502349853515625, "step": 99810 }, { "epoch": 0.8630707905681749, "grad_norm": 0.25310835477270777, "learning_rate": 3.6402999087787297e-06, "loss": 0.05827903747558594, "step": 99815 }, { "epoch": 0.8631140240897182, "grad_norm": 5.172483264448454, "learning_rate": 3.640100868625471e-06, "loss": 0.28063411712646485, "step": 99820 }, { "epoch": 0.8631572576112615, "grad_norm": 43.624808972278025, "learning_rate": 3.639901825520124e-06, "loss": 0.097833251953125, "step": 99825 }, { "epoch": 0.8632004911328047, "grad_norm": 0.4403551649360387, "learning_rate": 3.639702779463607e-06, "loss": 0.07122726440429687, "step": 99830 }, { "epoch": 0.863243724654348, "grad_norm": 1.4625783712938678, "learning_rate": 3.639503730456837e-06, "loss": 0.09935646057128907, "step": 99835 }, { "epoch": 0.8632869581758913, "grad_norm": 7.536029363064273, "learning_rate": 3.6393046785007323e-06, "loss": 0.135302734375, "step": 99840 }, { "epoch": 0.8633301916974345, "grad_norm": 0.10035424408955233, "learning_rate": 3.639105623596211e-06, "loss": 0.06510467529296875, "step": 99845 }, { "epoch": 0.8633734252189778, "grad_norm": 5.698995199410221, "learning_rate": 3.638906565744191e-06, "loss": 0.5415412902832031, "step": 99850 }, { "epoch": 0.8634166587405211, "grad_norm": 20.747035213047212, "learning_rate": 3.638707504945589e-06, "loss": 0.30817108154296874, "step": 99855 }, { "epoch": 0.8634598922620643, "grad_norm": 4.196010524255187, "learning_rate": 3.638508441201326e-06, "loss": 0.146783447265625, "step": 99860 }, { "epoch": 0.8635031257836076, "grad_norm": 1.0445476411462573, "learning_rate": 3.638309374512319e-06, "loss": 0.1157012939453125, "step": 99865 }, { "epoch": 0.8635463593051509, "grad_norm": 5.080923185761142, "learning_rate": 3.638110304879484e-06, "loss": 0.0634368896484375, "step": 99870 }, { "epoch": 0.8635895928266941, "grad_norm": 4.062933274751851, "learning_rate": 3.6379112323037414e-06, "loss": 0.35913887023925783, "step": 99875 }, { "epoch": 0.8636328263482373, "grad_norm": 2.3106391368295824, "learning_rate": 3.6377121567860084e-06, "loss": 0.11296043395996094, "step": 99880 }, { "epoch": 0.8636760598697807, "grad_norm": 1.6971390470710734, "learning_rate": 3.637513078327204e-06, "loss": 0.265313720703125, "step": 99885 }, { "epoch": 0.8637192933913239, "grad_norm": 9.166172388342897, "learning_rate": 3.637313996928245e-06, "loss": 0.21145782470703126, "step": 99890 }, { "epoch": 0.8637625269128671, "grad_norm": 0.3770697366734355, "learning_rate": 3.63711491259005e-06, "loss": 0.25881805419921877, "step": 99895 }, { "epoch": 0.8638057604344104, "grad_norm": 0.5924028160730488, "learning_rate": 3.636915825313538e-06, "loss": 0.30564117431640625, "step": 99900 }, { "epoch": 0.8638489939559537, "grad_norm": 32.25923046312668, "learning_rate": 3.636716735099626e-06, "loss": 0.3978546142578125, "step": 99905 }, { "epoch": 0.8638922274774969, "grad_norm": 7.936824061309965, "learning_rate": 3.636517641949232e-06, "loss": 0.15621337890625, "step": 99910 }, { "epoch": 0.8639354609990402, "grad_norm": 17.388336741929468, "learning_rate": 3.6363185458632757e-06, "loss": 0.08317413330078124, "step": 99915 }, { "epoch": 0.8639786945205835, "grad_norm": 1.1983587917752765, "learning_rate": 3.6361194468426744e-06, "loss": 0.14173583984375, "step": 99920 }, { "epoch": 0.8640219280421267, "grad_norm": 9.870121294590392, "learning_rate": 3.635920344888347e-06, "loss": 0.24534568786621094, "step": 99925 }, { "epoch": 0.86406516156367, "grad_norm": 1.159112419113973, "learning_rate": 3.6357212400012106e-06, "loss": 0.060894775390625, "step": 99930 }, { "epoch": 0.8641083950852133, "grad_norm": 1.9458981888366476, "learning_rate": 3.635522132182183e-06, "loss": 0.12945175170898438, "step": 99935 }, { "epoch": 0.8641516286067565, "grad_norm": 1.3949183001090995, "learning_rate": 3.6353230214321847e-06, "loss": 0.026533126831054688, "step": 99940 }, { "epoch": 0.8641948621282998, "grad_norm": 45.06919921716946, "learning_rate": 3.635123907752133e-06, "loss": 0.12824249267578125, "step": 99945 }, { "epoch": 0.8642380956498431, "grad_norm": 9.780407446317799, "learning_rate": 3.634924791142945e-06, "loss": 0.8239532470703125, "step": 99950 }, { "epoch": 0.8642813291713863, "grad_norm": 18.6601819803161, "learning_rate": 3.63472567160554e-06, "loss": 0.42256851196289064, "step": 99955 }, { "epoch": 0.8643245626929296, "grad_norm": 2.2936130209767946, "learning_rate": 3.6345265491408368e-06, "loss": 0.0603179931640625, "step": 99960 }, { "epoch": 0.8643677962144729, "grad_norm": 4.152919134547556, "learning_rate": 3.6343274237497526e-06, "loss": 0.1119873046875, "step": 99965 }, { "epoch": 0.8644110297360161, "grad_norm": 3.047459333175666, "learning_rate": 3.6341282954332073e-06, "loss": 0.0719883918762207, "step": 99970 }, { "epoch": 0.8644542632575594, "grad_norm": 16.572463408657843, "learning_rate": 3.633929164192118e-06, "loss": 0.2827583312988281, "step": 99975 }, { "epoch": 0.8644974967791026, "grad_norm": 0.2133633952376349, "learning_rate": 3.6337300300274036e-06, "loss": 0.26595001220703124, "step": 99980 }, { "epoch": 0.8645407303006459, "grad_norm": 9.007740062815325, "learning_rate": 3.633530892939982e-06, "loss": 0.039878463745117186, "step": 99985 }, { "epoch": 0.8645839638221892, "grad_norm": 24.669649676682692, "learning_rate": 3.633331752930772e-06, "loss": 0.15145301818847656, "step": 99990 }, { "epoch": 0.8646271973437324, "grad_norm": 1.1774595789117819, "learning_rate": 3.633132610000692e-06, "loss": 0.26077117919921877, "step": 99995 }, { "epoch": 0.8646704308652757, "grad_norm": 31.405310985284956, "learning_rate": 3.6329334641506595e-06, "loss": 0.2587921142578125, "step": 100000 }, { "epoch": 0.864713664386819, "grad_norm": 0.9560510358799229, "learning_rate": 3.6327343153815947e-06, "loss": 0.22724685668945313, "step": 100005 }, { "epoch": 0.8647568979083622, "grad_norm": 15.081977671868945, "learning_rate": 3.632535163694415e-06, "loss": 0.0951171875, "step": 100010 }, { "epoch": 0.8648001314299055, "grad_norm": 8.866935380815507, "learning_rate": 3.6323360090900388e-06, "loss": 0.7194869995117188, "step": 100015 }, { "epoch": 0.8648433649514488, "grad_norm": 4.384642482209038, "learning_rate": 3.632136851569385e-06, "loss": 0.09219512939453126, "step": 100020 }, { "epoch": 0.864886598472992, "grad_norm": 1.7997816903983137, "learning_rate": 3.631937691133372e-06, "loss": 0.05974273681640625, "step": 100025 }, { "epoch": 0.8649298319945353, "grad_norm": 15.121883009945593, "learning_rate": 3.6317385277829176e-06, "loss": 0.08702392578125, "step": 100030 }, { "epoch": 0.8649730655160786, "grad_norm": 1.0849917243857008, "learning_rate": 3.631539361518942e-06, "loss": 0.20424652099609375, "step": 100035 }, { "epoch": 0.8650162990376218, "grad_norm": 2.018631609786138, "learning_rate": 3.6313401923423622e-06, "loss": 0.4216148376464844, "step": 100040 }, { "epoch": 0.8650595325591651, "grad_norm": 4.259728191378396, "learning_rate": 3.6311410202540967e-06, "loss": 0.123736572265625, "step": 100045 }, { "epoch": 0.8651027660807084, "grad_norm": 0.099645646308935, "learning_rate": 3.630941845255065e-06, "loss": 0.13331756591796876, "step": 100050 }, { "epoch": 0.8651459996022516, "grad_norm": 9.005938757074746, "learning_rate": 3.6307426673461854e-06, "loss": 0.1325439453125, "step": 100055 }, { "epoch": 0.8651892331237949, "grad_norm": 23.57197027238272, "learning_rate": 3.6305434865283755e-06, "loss": 0.14014892578125, "step": 100060 }, { "epoch": 0.8652324666453381, "grad_norm": 8.840327751996826, "learning_rate": 3.630344302802556e-06, "loss": 0.1192901611328125, "step": 100065 }, { "epoch": 0.8652757001668814, "grad_norm": 20.953788699893607, "learning_rate": 3.630145116169644e-06, "loss": 0.094757080078125, "step": 100070 }, { "epoch": 0.8653189336884246, "grad_norm": 0.6384931323895267, "learning_rate": 3.6299459266305577e-06, "loss": 0.13909149169921875, "step": 100075 }, { "epoch": 0.865362167209968, "grad_norm": 24.70370745558531, "learning_rate": 3.6297467341862165e-06, "loss": 0.2801490783691406, "step": 100080 }, { "epoch": 0.8654054007315112, "grad_norm": 61.75990086842527, "learning_rate": 3.62954753883754e-06, "loss": 0.030249786376953126, "step": 100085 }, { "epoch": 0.8654486342530544, "grad_norm": 7.626947550950345, "learning_rate": 3.629348340585445e-06, "loss": 0.05674591064453125, "step": 100090 }, { "epoch": 0.8654918677745977, "grad_norm": 30.706499327272983, "learning_rate": 3.629149139430851e-06, "loss": 0.09070892333984375, "step": 100095 }, { "epoch": 0.865535101296141, "grad_norm": 0.9310437530346626, "learning_rate": 3.6289499353746775e-06, "loss": 0.17506809234619142, "step": 100100 }, { "epoch": 0.8655783348176842, "grad_norm": 3.676519380998686, "learning_rate": 3.6287507284178416e-06, "loss": 0.10963363647460937, "step": 100105 }, { "epoch": 0.8656215683392275, "grad_norm": 3.8209337714183094, "learning_rate": 3.628551518561263e-06, "loss": 0.183837890625, "step": 100110 }, { "epoch": 0.8656648018607708, "grad_norm": 1.643523844490075, "learning_rate": 3.6283523058058616e-06, "loss": 0.1360321044921875, "step": 100115 }, { "epoch": 0.865708035382314, "grad_norm": 1.3152129036361158, "learning_rate": 3.6281530901525537e-06, "loss": 0.11571807861328125, "step": 100120 }, { "epoch": 0.8657512689038573, "grad_norm": 5.602691543403685, "learning_rate": 3.6279538716022594e-06, "loss": 0.09066886901855468, "step": 100125 }, { "epoch": 0.8657945024254006, "grad_norm": 2.8317083378081382, "learning_rate": 3.6277546501558975e-06, "loss": 0.33326263427734376, "step": 100130 }, { "epoch": 0.8658377359469438, "grad_norm": 5.513385803653576, "learning_rate": 3.6275554258143864e-06, "loss": 0.06377925872802734, "step": 100135 }, { "epoch": 0.8658809694684871, "grad_norm": 3.5823010224011647, "learning_rate": 3.627356198578645e-06, "loss": 0.1272918701171875, "step": 100140 }, { "epoch": 0.8659242029900304, "grad_norm": 17.633738057651836, "learning_rate": 3.6271569684495927e-06, "loss": 0.10619049072265625, "step": 100145 }, { "epoch": 0.8659674365115736, "grad_norm": 8.29099412978052, "learning_rate": 3.626957735428148e-06, "loss": 0.09256420135498047, "step": 100150 }, { "epoch": 0.8660106700331168, "grad_norm": 0.5344679322764085, "learning_rate": 3.6267584995152295e-06, "loss": 0.2182098388671875, "step": 100155 }, { "epoch": 0.8660539035546602, "grad_norm": 11.246016186221398, "learning_rate": 3.6265592607117556e-06, "loss": 0.12065963745117188, "step": 100160 }, { "epoch": 0.8660971370762034, "grad_norm": 3.6573958965423197, "learning_rate": 3.626360019018646e-06, "loss": 0.1439971923828125, "step": 100165 }, { "epoch": 0.8661403705977466, "grad_norm": 11.417010640424442, "learning_rate": 3.6261607744368203e-06, "loss": 0.0836181640625, "step": 100170 }, { "epoch": 0.86618360411929, "grad_norm": 16.617864897668728, "learning_rate": 3.625961526967195e-06, "loss": 0.0984710693359375, "step": 100175 }, { "epoch": 0.8662268376408332, "grad_norm": 7.050047931710661, "learning_rate": 3.6257622766106913e-06, "loss": 0.09199371337890624, "step": 100180 }, { "epoch": 0.8662700711623764, "grad_norm": 1.6301241909511697, "learning_rate": 3.6255630233682267e-06, "loss": 0.06841964721679687, "step": 100185 }, { "epoch": 0.8663133046839198, "grad_norm": 2.6391902103356344, "learning_rate": 3.625363767240721e-06, "loss": 0.11695518493652343, "step": 100190 }, { "epoch": 0.866356538205463, "grad_norm": 9.911721492777199, "learning_rate": 3.625164508229093e-06, "loss": 0.20245513916015626, "step": 100195 }, { "epoch": 0.8663997717270062, "grad_norm": 0.17960498580871828, "learning_rate": 3.624965246334262e-06, "loss": 0.06719207763671875, "step": 100200 }, { "epoch": 0.8664430052485496, "grad_norm": 43.54900119714648, "learning_rate": 3.624765981557146e-06, "loss": 0.14079132080078124, "step": 100205 }, { "epoch": 0.8664862387700928, "grad_norm": 15.317216494080215, "learning_rate": 3.6245667138986645e-06, "loss": 0.19847183227539061, "step": 100210 }, { "epoch": 0.866529472291636, "grad_norm": 18.38508851458028, "learning_rate": 3.6243674433597363e-06, "loss": 0.11251907348632813, "step": 100215 }, { "epoch": 0.8665727058131794, "grad_norm": 42.64831893035609, "learning_rate": 3.6241681699412806e-06, "loss": 0.4671062469482422, "step": 100220 }, { "epoch": 0.8666159393347226, "grad_norm": 15.671187824963745, "learning_rate": 3.623968893644217e-06, "loss": 0.14060134887695314, "step": 100225 }, { "epoch": 0.8666591728562658, "grad_norm": 5.067012015787299, "learning_rate": 3.623769614469464e-06, "loss": 0.5149276733398438, "step": 100230 }, { "epoch": 0.866702406377809, "grad_norm": 3.7070019282990865, "learning_rate": 3.6235703324179403e-06, "loss": 0.5281356811523438, "step": 100235 }, { "epoch": 0.8667456398993524, "grad_norm": 5.367581372640845, "learning_rate": 3.6233710474905646e-06, "loss": 0.2007659912109375, "step": 100240 }, { "epoch": 0.8667888734208956, "grad_norm": 7.543121634321979, "learning_rate": 3.6231717596882575e-06, "loss": 0.05796680450439453, "step": 100245 }, { "epoch": 0.8668321069424388, "grad_norm": 0.3204809976392452, "learning_rate": 3.6229724690119375e-06, "loss": 0.06873550415039062, "step": 100250 }, { "epoch": 0.8668753404639822, "grad_norm": 28.838868458727678, "learning_rate": 3.622773175462523e-06, "loss": 0.322857666015625, "step": 100255 }, { "epoch": 0.8669185739855254, "grad_norm": 12.43455165680192, "learning_rate": 3.622573879040934e-06, "loss": 0.1174285888671875, "step": 100260 }, { "epoch": 0.8669618075070686, "grad_norm": 13.761174306367433, "learning_rate": 3.6223745797480894e-06, "loss": 0.06397781372070313, "step": 100265 }, { "epoch": 0.867005041028612, "grad_norm": 1.8378919113442063, "learning_rate": 3.6221752775849082e-06, "loss": 0.1612534523010254, "step": 100270 }, { "epoch": 0.8670482745501552, "grad_norm": 4.414495055975251, "learning_rate": 3.6219759725523086e-06, "loss": 0.21709709167480468, "step": 100275 }, { "epoch": 0.8670915080716984, "grad_norm": 5.6435071286715175, "learning_rate": 3.6217766646512123e-06, "loss": 0.18117103576660157, "step": 100280 }, { "epoch": 0.8671347415932418, "grad_norm": 10.594216215951937, "learning_rate": 3.6215773538825356e-06, "loss": 0.19485321044921874, "step": 100285 }, { "epoch": 0.867177975114785, "grad_norm": 4.037512626126407, "learning_rate": 3.6213780402472e-06, "loss": 0.3881950378417969, "step": 100290 }, { "epoch": 0.8672212086363282, "grad_norm": 18.024994091339295, "learning_rate": 3.6211787237461244e-06, "loss": 0.21560020446777345, "step": 100295 }, { "epoch": 0.8672644421578716, "grad_norm": 16.57931999835965, "learning_rate": 3.620979404380226e-06, "loss": 0.09823417663574219, "step": 100300 }, { "epoch": 0.8673076756794148, "grad_norm": 11.627036173468431, "learning_rate": 3.620780082150426e-06, "loss": 0.113641357421875, "step": 100305 }, { "epoch": 0.867350909200958, "grad_norm": 3.594643055834642, "learning_rate": 3.620580757057643e-06, "loss": 0.22778816223144532, "step": 100310 }, { "epoch": 0.8673941427225014, "grad_norm": 1.2641106682963366, "learning_rate": 3.6203814291027963e-06, "loss": 0.0812469482421875, "step": 100315 }, { "epoch": 0.8674373762440446, "grad_norm": 0.24615838849513288, "learning_rate": 3.620182098286806e-06, "loss": 0.025695037841796876, "step": 100320 }, { "epoch": 0.8674806097655878, "grad_norm": 13.358159186543679, "learning_rate": 3.6199827646105903e-06, "loss": 0.1924356460571289, "step": 100325 }, { "epoch": 0.8675238432871311, "grad_norm": 7.632323172298017, "learning_rate": 3.619783428075068e-06, "loss": 0.087042236328125, "step": 100330 }, { "epoch": 0.8675670768086744, "grad_norm": 17.81393939638177, "learning_rate": 3.61958408868116e-06, "loss": 0.39130172729492185, "step": 100335 }, { "epoch": 0.8676103103302176, "grad_norm": 56.62048017771266, "learning_rate": 3.619384746429785e-06, "loss": 0.18841476440429689, "step": 100340 }, { "epoch": 0.8676535438517609, "grad_norm": 57.228054655877024, "learning_rate": 3.6191854013218627e-06, "loss": 0.42454376220703127, "step": 100345 }, { "epoch": 0.8676967773733042, "grad_norm": 5.207390619783738, "learning_rate": 3.618986053358311e-06, "loss": 0.16213226318359375, "step": 100350 }, { "epoch": 0.8677400108948474, "grad_norm": 0.9237344032388793, "learning_rate": 3.6187867025400515e-06, "loss": 0.12541351318359376, "step": 100355 }, { "epoch": 0.8677832444163907, "grad_norm": 12.990465780172537, "learning_rate": 3.618587348868001e-06, "loss": 0.1683868408203125, "step": 100360 }, { "epoch": 0.867826477937934, "grad_norm": 0.5409248206741645, "learning_rate": 3.6183879923430805e-06, "loss": 0.260418701171875, "step": 100365 }, { "epoch": 0.8678697114594772, "grad_norm": 3.0210540206593466, "learning_rate": 3.6181886329662103e-06, "loss": 0.08362236022949218, "step": 100370 }, { "epoch": 0.8679129449810205, "grad_norm": 0.34266653908301237, "learning_rate": 3.617989270738309e-06, "loss": 0.09687538146972656, "step": 100375 }, { "epoch": 0.8679561785025638, "grad_norm": 3.638583372728757, "learning_rate": 3.6177899056602944e-06, "loss": 0.053228759765625, "step": 100380 }, { "epoch": 0.867999412024107, "grad_norm": 8.109878827623314, "learning_rate": 3.6175905377330873e-06, "loss": 0.116424560546875, "step": 100385 }, { "epoch": 0.8680426455456502, "grad_norm": 61.57624958722208, "learning_rate": 3.6173911669576087e-06, "loss": 0.1244842529296875, "step": 100390 }, { "epoch": 0.8680858790671936, "grad_norm": 9.0864932725061, "learning_rate": 3.6171917933347754e-06, "loss": 0.09341583251953126, "step": 100395 }, { "epoch": 0.8681291125887368, "grad_norm": 12.646721501495895, "learning_rate": 3.6169924168655083e-06, "loss": 0.08817901611328124, "step": 100400 }, { "epoch": 0.86817234611028, "grad_norm": 14.427018467296076, "learning_rate": 3.6167930375507276e-06, "loss": 0.09105987548828125, "step": 100405 }, { "epoch": 0.8682155796318233, "grad_norm": 2.7784029388530045, "learning_rate": 3.616593655391351e-06, "loss": 0.07883758544921875, "step": 100410 }, { "epoch": 0.8682588131533666, "grad_norm": 29.66884659424678, "learning_rate": 3.6163942703882985e-06, "loss": 0.416412353515625, "step": 100415 }, { "epoch": 0.8683020466749098, "grad_norm": 6.4693246974917455, "learning_rate": 3.616194882542491e-06, "loss": 0.3149364471435547, "step": 100420 }, { "epoch": 0.8683452801964531, "grad_norm": 2.338149687706514, "learning_rate": 3.615995491854847e-06, "loss": 0.2884849548339844, "step": 100425 }, { "epoch": 0.8683885137179964, "grad_norm": 6.224661502239574, "learning_rate": 3.615796098326286e-06, "loss": 0.09085006713867187, "step": 100430 }, { "epoch": 0.8684317472395396, "grad_norm": 6.878624511317983, "learning_rate": 3.6155967019577282e-06, "loss": 0.020095062255859376, "step": 100435 }, { "epoch": 0.8684749807610829, "grad_norm": 4.0651218803467595, "learning_rate": 3.615397302750092e-06, "loss": 0.0191986083984375, "step": 100440 }, { "epoch": 0.8685182142826262, "grad_norm": 2.816226424499126, "learning_rate": 3.615197900704298e-06, "loss": 0.04365692138671875, "step": 100445 }, { "epoch": 0.8685614478041694, "grad_norm": 3.5932024997323984, "learning_rate": 3.614998495821266e-06, "loss": 0.1490997314453125, "step": 100450 }, { "epoch": 0.8686046813257127, "grad_norm": 0.5839128035537561, "learning_rate": 3.614799088101916e-06, "loss": 0.0667724609375, "step": 100455 }, { "epoch": 0.868647914847256, "grad_norm": 24.71455297259154, "learning_rate": 3.614599677547165e-06, "loss": 0.12690887451171876, "step": 100460 }, { "epoch": 0.8686911483687992, "grad_norm": 34.411423124293044, "learning_rate": 3.6144002641579367e-06, "loss": 0.16238441467285156, "step": 100465 }, { "epoch": 0.8687343818903425, "grad_norm": 59.77858224333604, "learning_rate": 3.614200847935147e-06, "loss": 0.18950653076171875, "step": 100470 }, { "epoch": 0.8687776154118858, "grad_norm": 10.984760224726125, "learning_rate": 3.6140014288797175e-06, "loss": 0.308819580078125, "step": 100475 }, { "epoch": 0.868820848933429, "grad_norm": 9.869221546514966, "learning_rate": 3.6138020069925688e-06, "loss": 0.14744796752929687, "step": 100480 }, { "epoch": 0.8688640824549723, "grad_norm": 20.386730576001685, "learning_rate": 3.6136025822746194e-06, "loss": 0.1775909423828125, "step": 100485 }, { "epoch": 0.8689073159765156, "grad_norm": 0.7132915114105353, "learning_rate": 3.6134031547267884e-06, "loss": 0.037103271484375, "step": 100490 }, { "epoch": 0.8689505494980588, "grad_norm": 3.8489969714555503, "learning_rate": 3.613203724349996e-06, "loss": 0.2997222900390625, "step": 100495 }, { "epoch": 0.8689937830196021, "grad_norm": 0.7371500730121943, "learning_rate": 3.6130042911451624e-06, "loss": 0.3211212158203125, "step": 100500 }, { "epoch": 0.8690370165411453, "grad_norm": 0.4089301666249937, "learning_rate": 3.612804855113208e-06, "loss": 0.16439857482910156, "step": 100505 }, { "epoch": 0.8690802500626886, "grad_norm": 0.2414884725277509, "learning_rate": 3.61260541625505e-06, "loss": 0.014413738250732422, "step": 100510 }, { "epoch": 0.8691234835842319, "grad_norm": 34.513891341550625, "learning_rate": 3.612405974571612e-06, "loss": 0.15042724609375, "step": 100515 }, { "epoch": 0.8691667171057751, "grad_norm": 16.66154863469796, "learning_rate": 3.612206530063811e-06, "loss": 0.04456253051757812, "step": 100520 }, { "epoch": 0.8692099506273184, "grad_norm": 1.1485002096838448, "learning_rate": 3.612007082732567e-06, "loss": 0.14338226318359376, "step": 100525 }, { "epoch": 0.8692531841488617, "grad_norm": 2.9834729159596836, "learning_rate": 3.6118076325788005e-06, "loss": 0.1509674072265625, "step": 100530 }, { "epoch": 0.8692964176704049, "grad_norm": 17.01550207275728, "learning_rate": 3.6116081796034318e-06, "loss": 0.09769134521484375, "step": 100535 }, { "epoch": 0.8693396511919482, "grad_norm": 1.8280014750477807, "learning_rate": 3.6114087238073798e-06, "loss": 0.1465911865234375, "step": 100540 }, { "epoch": 0.8693828847134915, "grad_norm": 1.3367270211968476, "learning_rate": 3.6112092651915653e-06, "loss": 0.02023735046386719, "step": 100545 }, { "epoch": 0.8694261182350347, "grad_norm": 18.2509390518845, "learning_rate": 3.6110098037569077e-06, "loss": 0.09395828247070312, "step": 100550 }, { "epoch": 0.869469351756578, "grad_norm": 2.5015643873014226, "learning_rate": 3.610810339504326e-06, "loss": 0.08817558288574219, "step": 100555 }, { "epoch": 0.8695125852781213, "grad_norm": 2.8339363166392695, "learning_rate": 3.610610872434741e-06, "loss": 0.41307964324951174, "step": 100560 }, { "epoch": 0.8695558187996645, "grad_norm": 0.863209536320399, "learning_rate": 3.6104114025490737e-06, "loss": 0.3281513214111328, "step": 100565 }, { "epoch": 0.8695990523212078, "grad_norm": 0.3853236131067141, "learning_rate": 3.6102119298482422e-06, "loss": 0.1645843505859375, "step": 100570 }, { "epoch": 0.869642285842751, "grad_norm": 10.341044998819019, "learning_rate": 3.6100124543331675e-06, "loss": 0.19566726684570312, "step": 100575 }, { "epoch": 0.8696855193642943, "grad_norm": 1.5168940500475179, "learning_rate": 3.6098129760047694e-06, "loss": 0.035001373291015624, "step": 100580 }, { "epoch": 0.8697287528858375, "grad_norm": 0.4952793862058429, "learning_rate": 3.609613494863967e-06, "loss": 0.10293197631835938, "step": 100585 }, { "epoch": 0.8697719864073808, "grad_norm": 3.6600953887097383, "learning_rate": 3.609414010911682e-06, "loss": 0.342877197265625, "step": 100590 }, { "epoch": 0.8698152199289241, "grad_norm": 43.48051941923755, "learning_rate": 3.6092145241488324e-06, "loss": 0.4229240417480469, "step": 100595 }, { "epoch": 0.8698584534504673, "grad_norm": 29.39381485787889, "learning_rate": 3.60901503457634e-06, "loss": 0.20551300048828125, "step": 100600 }, { "epoch": 0.8699016869720106, "grad_norm": 25.176814244401758, "learning_rate": 3.6088155421951237e-06, "loss": 0.20219268798828124, "step": 100605 }, { "epoch": 0.8699449204935539, "grad_norm": 0.8272736045743606, "learning_rate": 3.608616047006104e-06, "loss": 0.1108367919921875, "step": 100610 }, { "epoch": 0.8699881540150971, "grad_norm": 4.193146498281373, "learning_rate": 3.608416549010201e-06, "loss": 0.09903316497802735, "step": 100615 }, { "epoch": 0.8700313875366404, "grad_norm": 3.879605473040301, "learning_rate": 3.6082170482083346e-06, "loss": 0.14749298095703126, "step": 100620 }, { "epoch": 0.8700746210581837, "grad_norm": 0.3846880385135658, "learning_rate": 3.6080175446014253e-06, "loss": 0.08094100952148438, "step": 100625 }, { "epoch": 0.8701178545797269, "grad_norm": 35.05248512637155, "learning_rate": 3.6078180381903923e-06, "loss": 0.30181427001953126, "step": 100630 }, { "epoch": 0.8701610881012702, "grad_norm": 7.741699100811906, "learning_rate": 3.6076185289761566e-06, "loss": 0.16954193115234376, "step": 100635 }, { "epoch": 0.8702043216228135, "grad_norm": 9.365635018948401, "learning_rate": 3.607419016959637e-06, "loss": 0.13538360595703125, "step": 100640 }, { "epoch": 0.8702475551443567, "grad_norm": 14.818710333450248, "learning_rate": 3.6072195021417554e-06, "loss": 0.10098133087158204, "step": 100645 }, { "epoch": 0.8702907886659, "grad_norm": 7.4148678018425205, "learning_rate": 3.607019984523431e-06, "loss": 0.1363250732421875, "step": 100650 }, { "epoch": 0.8703340221874433, "grad_norm": 4.563529126799491, "learning_rate": 3.606820464105584e-06, "loss": 0.04750518798828125, "step": 100655 }, { "epoch": 0.8703772557089865, "grad_norm": 1.1904348365330168, "learning_rate": 3.6066209408891348e-06, "loss": 0.06490020751953125, "step": 100660 }, { "epoch": 0.8704204892305298, "grad_norm": 0.27109732157414673, "learning_rate": 3.606421414875003e-06, "loss": 0.23091583251953124, "step": 100665 }, { "epoch": 0.8704637227520731, "grad_norm": 1.5860374717500811, "learning_rate": 3.606221886064109e-06, "loss": 0.14062461853027344, "step": 100670 }, { "epoch": 0.8705069562736163, "grad_norm": 0.443323029602676, "learning_rate": 3.6060223544573743e-06, "loss": 0.146014404296875, "step": 100675 }, { "epoch": 0.8705501897951595, "grad_norm": 10.031716798582666, "learning_rate": 3.605822820055718e-06, "loss": 0.13746109008789062, "step": 100680 }, { "epoch": 0.8705934233167029, "grad_norm": 5.156634281054544, "learning_rate": 3.6056232828600593e-06, "loss": 0.250555419921875, "step": 100685 }, { "epoch": 0.8706366568382461, "grad_norm": 2.08129888176367, "learning_rate": 3.6054237428713205e-06, "loss": 0.37310791015625, "step": 100690 }, { "epoch": 0.8706798903597893, "grad_norm": 2.849489311327214, "learning_rate": 3.60522420009042e-06, "loss": 0.045477294921875, "step": 100695 }, { "epoch": 0.8707231238813327, "grad_norm": 4.258434086317555, "learning_rate": 3.605024654518279e-06, "loss": 0.15269622802734376, "step": 100700 }, { "epoch": 0.8707663574028759, "grad_norm": 0.40256865784409046, "learning_rate": 3.6048251061558185e-06, "loss": 0.0706085205078125, "step": 100705 }, { "epoch": 0.8708095909244191, "grad_norm": 0.5459738641700101, "learning_rate": 3.6046255550039584e-06, "loss": 0.09029474258422851, "step": 100710 }, { "epoch": 0.8708528244459625, "grad_norm": 2.9786570145403646, "learning_rate": 3.6044260010636176e-06, "loss": 0.18319091796875, "step": 100715 }, { "epoch": 0.8708960579675057, "grad_norm": 2.9833368520522114, "learning_rate": 3.604226444335718e-06, "loss": 0.12178306579589844, "step": 100720 }, { "epoch": 0.8709392914890489, "grad_norm": 21.7633649040049, "learning_rate": 3.6040268848211788e-06, "loss": 0.1848541259765625, "step": 100725 }, { "epoch": 0.8709825250105923, "grad_norm": 15.550064251815392, "learning_rate": 3.6038273225209214e-06, "loss": 0.350653076171875, "step": 100730 }, { "epoch": 0.8710257585321355, "grad_norm": 0.7415911182847831, "learning_rate": 3.603627757435866e-06, "loss": 0.03828125, "step": 100735 }, { "epoch": 0.8710689920536787, "grad_norm": 2.736662187766631, "learning_rate": 3.603428189566933e-06, "loss": 0.07059173583984375, "step": 100740 }, { "epoch": 0.871112225575222, "grad_norm": 3.589194035325373, "learning_rate": 3.603228618915042e-06, "loss": 0.19585418701171875, "step": 100745 }, { "epoch": 0.8711554590967653, "grad_norm": 3.98383358940503, "learning_rate": 3.6030290454811137e-06, "loss": 0.25041542053222654, "step": 100750 }, { "epoch": 0.8711986926183085, "grad_norm": 0.6087362425620677, "learning_rate": 3.602829469266069e-06, "loss": 0.07393875122070312, "step": 100755 }, { "epoch": 0.8712419261398517, "grad_norm": 9.623086820952961, "learning_rate": 3.602629890270828e-06, "loss": 0.18807373046875, "step": 100760 }, { "epoch": 0.8712851596613951, "grad_norm": 3.008554461204689, "learning_rate": 3.6024303084963116e-06, "loss": 0.050205230712890625, "step": 100765 }, { "epoch": 0.8713283931829383, "grad_norm": 35.67248827092504, "learning_rate": 3.60223072394344e-06, "loss": 0.0868804931640625, "step": 100770 }, { "epoch": 0.8713716267044815, "grad_norm": 4.62834099760176, "learning_rate": 3.6020311366131327e-06, "loss": 0.23239288330078126, "step": 100775 }, { "epoch": 0.8714148602260249, "grad_norm": 5.787335347001624, "learning_rate": 3.601831546506311e-06, "loss": 0.46418914794921873, "step": 100780 }, { "epoch": 0.8714580937475681, "grad_norm": 32.9503636305785, "learning_rate": 3.6016319536238954e-06, "loss": 0.2290802001953125, "step": 100785 }, { "epoch": 0.8715013272691113, "grad_norm": 0.1345282358047597, "learning_rate": 3.601432357966807e-06, "loss": 0.09250869750976562, "step": 100790 }, { "epoch": 0.8715445607906547, "grad_norm": 7.0544211441930935, "learning_rate": 3.601232759535965e-06, "loss": 0.07985076904296876, "step": 100795 }, { "epoch": 0.8715877943121979, "grad_norm": 7.840875853840199, "learning_rate": 3.6010331583322916e-06, "loss": 0.1158111572265625, "step": 100800 }, { "epoch": 0.8716310278337411, "grad_norm": 7.390954814991234, "learning_rate": 3.600833554356706e-06, "loss": 0.07766609191894532, "step": 100805 }, { "epoch": 0.8716742613552845, "grad_norm": 4.299823413624123, "learning_rate": 3.6006339476101283e-06, "loss": 0.20422821044921874, "step": 100810 }, { "epoch": 0.8717174948768277, "grad_norm": 4.47020322302151, "learning_rate": 3.6004343380934804e-06, "loss": 0.1148956298828125, "step": 100815 }, { "epoch": 0.8717607283983709, "grad_norm": 2.585568382355863, "learning_rate": 3.600234725807683e-06, "loss": 0.03338356018066406, "step": 100820 }, { "epoch": 0.8718039619199143, "grad_norm": 17.4983839520733, "learning_rate": 3.600035110753656e-06, "loss": 0.11882095336914063, "step": 100825 }, { "epoch": 0.8718471954414575, "grad_norm": 4.037871947755392, "learning_rate": 3.5998354929323197e-06, "loss": 0.0612945556640625, "step": 100830 }, { "epoch": 0.8718904289630007, "grad_norm": 9.584264559663483, "learning_rate": 3.599635872344595e-06, "loss": 0.13663787841796876, "step": 100835 }, { "epoch": 0.8719336624845441, "grad_norm": 2.7313568381507687, "learning_rate": 3.5994362489914025e-06, "loss": 0.2405517578125, "step": 100840 }, { "epoch": 0.8719768960060873, "grad_norm": 0.3231167977606285, "learning_rate": 3.599236622873663e-06, "loss": 0.10433120727539062, "step": 100845 }, { "epoch": 0.8720201295276305, "grad_norm": 0.689526704503462, "learning_rate": 3.5990369939922983e-06, "loss": 0.17262763977050782, "step": 100850 }, { "epoch": 0.8720633630491738, "grad_norm": 9.535923491452623, "learning_rate": 3.5988373623482275e-06, "loss": 0.30660400390625, "step": 100855 }, { "epoch": 0.8721065965707171, "grad_norm": 0.6471056137733894, "learning_rate": 3.5986377279423702e-06, "loss": 0.058597564697265625, "step": 100860 }, { "epoch": 0.8721498300922603, "grad_norm": 6.400509365955627, "learning_rate": 3.5984380907756497e-06, "loss": 0.31662445068359374, "step": 100865 }, { "epoch": 0.8721930636138036, "grad_norm": 3.1747406245384857, "learning_rate": 3.5982384508489854e-06, "loss": 0.05580463409423828, "step": 100870 }, { "epoch": 0.8722362971353469, "grad_norm": 5.993372953127072, "learning_rate": 3.5980388081632985e-06, "loss": 0.4608612060546875, "step": 100875 }, { "epoch": 0.8722795306568901, "grad_norm": 5.902867932999362, "learning_rate": 3.5978391627195097e-06, "loss": 0.07095775604248047, "step": 100880 }, { "epoch": 0.8723227641784334, "grad_norm": 21.611187741432268, "learning_rate": 3.5976395145185397e-06, "loss": 0.10908737182617187, "step": 100885 }, { "epoch": 0.8723659976999767, "grad_norm": 6.759505672292593, "learning_rate": 3.5974398635613075e-06, "loss": 0.226727294921875, "step": 100890 }, { "epoch": 0.8724092312215199, "grad_norm": 28.959409294712728, "learning_rate": 3.5972402098487364e-06, "loss": 0.14319686889648436, "step": 100895 }, { "epoch": 0.8724524647430631, "grad_norm": 0.2981387356228585, "learning_rate": 3.5970405533817464e-06, "loss": 0.3176296234130859, "step": 100900 }, { "epoch": 0.8724956982646065, "grad_norm": 2.7053713617907276, "learning_rate": 3.5968408941612576e-06, "loss": 0.11339035034179687, "step": 100905 }, { "epoch": 0.8725389317861497, "grad_norm": 14.22865690930774, "learning_rate": 3.596641232188192e-06, "loss": 0.14810943603515625, "step": 100910 }, { "epoch": 0.872582165307693, "grad_norm": 11.797739727193438, "learning_rate": 3.5964415674634694e-06, "loss": 0.28074111938476565, "step": 100915 }, { "epoch": 0.8726253988292363, "grad_norm": 3.5595425341151667, "learning_rate": 3.5962418999880114e-06, "loss": 0.15098876953125, "step": 100920 }, { "epoch": 0.8726686323507795, "grad_norm": 0.08232555207621357, "learning_rate": 3.5960422297627376e-06, "loss": 0.23621368408203125, "step": 100925 }, { "epoch": 0.8727118658723227, "grad_norm": 26.782186914840903, "learning_rate": 3.595842556788571e-06, "loss": 0.18599624633789064, "step": 100930 }, { "epoch": 0.872755099393866, "grad_norm": 27.585657719743455, "learning_rate": 3.59564288106643e-06, "loss": 0.215875244140625, "step": 100935 }, { "epoch": 0.8727983329154093, "grad_norm": 0.3296725315727957, "learning_rate": 3.5954432025972367e-06, "loss": 0.19420013427734376, "step": 100940 }, { "epoch": 0.8728415664369525, "grad_norm": 4.018295703655455, "learning_rate": 3.595243521381913e-06, "loss": 0.10286636352539062, "step": 100945 }, { "epoch": 0.8728847999584958, "grad_norm": 18.312458001705867, "learning_rate": 3.5950438374213773e-06, "loss": 0.2843742370605469, "step": 100950 }, { "epoch": 0.8729280334800391, "grad_norm": 1.3607416758083877, "learning_rate": 3.5948441507165528e-06, "loss": 0.14872894287109376, "step": 100955 }, { "epoch": 0.8729712670015823, "grad_norm": 7.216895528114752, "learning_rate": 3.59464446126836e-06, "loss": 0.4464366912841797, "step": 100960 }, { "epoch": 0.8730145005231256, "grad_norm": 2.052264547862304, "learning_rate": 3.5944447690777194e-06, "loss": 0.30111083984375, "step": 100965 }, { "epoch": 0.8730577340446689, "grad_norm": 0.606734262570417, "learning_rate": 3.5942450741455512e-06, "loss": 0.14995479583740234, "step": 100970 }, { "epoch": 0.8731009675662121, "grad_norm": 0.9596328897677816, "learning_rate": 3.5940453764727787e-06, "loss": 0.0631744384765625, "step": 100975 }, { "epoch": 0.8731442010877554, "grad_norm": 4.017444341228418, "learning_rate": 3.59384567606032e-06, "loss": 0.06550674438476563, "step": 100980 }, { "epoch": 0.8731874346092987, "grad_norm": 0.2763620350959409, "learning_rate": 3.5936459729090977e-06, "loss": 0.032411956787109376, "step": 100985 }, { "epoch": 0.8732306681308419, "grad_norm": 1.3203537881341294, "learning_rate": 3.593446267020033e-06, "loss": 0.06835098266601562, "step": 100990 }, { "epoch": 0.8732739016523852, "grad_norm": 3.571389105358171, "learning_rate": 3.593246558394047e-06, "loss": 0.04085235595703125, "step": 100995 }, { "epoch": 0.8733171351739285, "grad_norm": 39.779760687990404, "learning_rate": 3.5930468470320597e-06, "loss": 0.19575347900390624, "step": 101000 }, { "epoch": 0.8733603686954717, "grad_norm": 1.8425058949806155, "learning_rate": 3.592847132934993e-06, "loss": 0.040679931640625, "step": 101005 }, { "epoch": 0.873403602217015, "grad_norm": 4.85226461839162, "learning_rate": 3.5926474161037676e-06, "loss": 0.10084266662597656, "step": 101010 }, { "epoch": 0.8734468357385583, "grad_norm": 0.4306367206316716, "learning_rate": 3.5924476965393037e-06, "loss": 0.10836601257324219, "step": 101015 }, { "epoch": 0.8734900692601015, "grad_norm": 0.4908777375314338, "learning_rate": 3.592247974242525e-06, "loss": 0.43837432861328124, "step": 101020 }, { "epoch": 0.8735333027816448, "grad_norm": 2.0636017065722982, "learning_rate": 3.5920482492143507e-06, "loss": 0.07698440551757812, "step": 101025 }, { "epoch": 0.873576536303188, "grad_norm": 3.3836013731346104, "learning_rate": 3.591848521455701e-06, "loss": 0.03259735107421875, "step": 101030 }, { "epoch": 0.8736197698247313, "grad_norm": 2.908625566219144, "learning_rate": 3.5916487909674986e-06, "loss": 0.18316574096679689, "step": 101035 }, { "epoch": 0.8736630033462746, "grad_norm": 1.7756276137226201, "learning_rate": 3.591449057750665e-06, "loss": 0.054290771484375, "step": 101040 }, { "epoch": 0.8737062368678178, "grad_norm": 0.30061024587895907, "learning_rate": 3.591249321806121e-06, "loss": 0.160162353515625, "step": 101045 }, { "epoch": 0.8737494703893611, "grad_norm": 26.706566158389542, "learning_rate": 3.5910495831347862e-06, "loss": 0.09518470764160156, "step": 101050 }, { "epoch": 0.8737927039109044, "grad_norm": 25.785971503424033, "learning_rate": 3.5908498417375834e-06, "loss": 0.59322509765625, "step": 101055 }, { "epoch": 0.8738359374324476, "grad_norm": 0.31899995444435636, "learning_rate": 3.590650097615433e-06, "loss": 0.10245628356933593, "step": 101060 }, { "epoch": 0.8738791709539909, "grad_norm": 23.82389210649001, "learning_rate": 3.590450350769256e-06, "loss": 0.07741737365722656, "step": 101065 }, { "epoch": 0.8739224044755342, "grad_norm": 0.7341692587407285, "learning_rate": 3.590250601199976e-06, "loss": 0.30545654296875, "step": 101070 }, { "epoch": 0.8739656379970774, "grad_norm": 48.515466055222234, "learning_rate": 3.5900508489085116e-06, "loss": 0.5578006744384766, "step": 101075 }, { "epoch": 0.8740088715186207, "grad_norm": 14.654259405879147, "learning_rate": 3.589851093895784e-06, "loss": 0.16437530517578125, "step": 101080 }, { "epoch": 0.874052105040164, "grad_norm": 0.6006489846729225, "learning_rate": 3.5896513361627165e-06, "loss": 0.19444503784179687, "step": 101085 }, { "epoch": 0.8740953385617072, "grad_norm": 21.815224382444203, "learning_rate": 3.5894515757102285e-06, "loss": 0.12037277221679688, "step": 101090 }, { "epoch": 0.8741385720832505, "grad_norm": 4.978051543663957, "learning_rate": 3.5892518125392417e-06, "loss": 0.110235595703125, "step": 101095 }, { "epoch": 0.8741818056047937, "grad_norm": 3.8518397423519586, "learning_rate": 3.5890520466506776e-06, "loss": 0.06089324951171875, "step": 101100 }, { "epoch": 0.874225039126337, "grad_norm": 0.2598764298154745, "learning_rate": 3.5888522780454585e-06, "loss": 0.2500213623046875, "step": 101105 }, { "epoch": 0.8742682726478802, "grad_norm": 11.037050129511337, "learning_rate": 3.588652506724504e-06, "loss": 0.2303863525390625, "step": 101110 }, { "epoch": 0.8743115061694235, "grad_norm": 1.8871794777074968, "learning_rate": 3.588452732688736e-06, "loss": 0.1348541259765625, "step": 101115 }, { "epoch": 0.8743547396909668, "grad_norm": 2.519393612027648, "learning_rate": 3.588252955939076e-06, "loss": 0.033809661865234375, "step": 101120 }, { "epoch": 0.87439797321251, "grad_norm": 2.2093142914993757, "learning_rate": 3.5880531764764454e-06, "loss": 0.2540069580078125, "step": 101125 }, { "epoch": 0.8744412067340533, "grad_norm": 6.8750104619502785, "learning_rate": 3.5878533943017653e-06, "loss": 0.09310150146484375, "step": 101130 }, { "epoch": 0.8744844402555966, "grad_norm": 43.53788443822023, "learning_rate": 3.5876536094159577e-06, "loss": 0.20842132568359376, "step": 101135 }, { "epoch": 0.8745276737771398, "grad_norm": 1.0193750848359717, "learning_rate": 3.587453821819944e-06, "loss": 0.0782562255859375, "step": 101140 }, { "epoch": 0.8745709072986831, "grad_norm": 15.533996976295743, "learning_rate": 3.5872540315146447e-06, "loss": 0.08006134033203124, "step": 101145 }, { "epoch": 0.8746141408202264, "grad_norm": 7.389317245921459, "learning_rate": 3.5870542385009808e-06, "loss": 0.04894390106201172, "step": 101150 }, { "epoch": 0.8746573743417696, "grad_norm": 36.312885313642674, "learning_rate": 3.5868544427798756e-06, "loss": 0.09130859375, "step": 101155 }, { "epoch": 0.8747006078633129, "grad_norm": 30.52951703669858, "learning_rate": 3.5866546443522494e-06, "loss": 0.20424652099609375, "step": 101160 }, { "epoch": 0.8747438413848562, "grad_norm": 1.8243933618334003, "learning_rate": 3.586454843219024e-06, "loss": 0.13333587646484374, "step": 101165 }, { "epoch": 0.8747870749063994, "grad_norm": 0.14170892580342295, "learning_rate": 3.5862550393811207e-06, "loss": 0.1836761474609375, "step": 101170 }, { "epoch": 0.8748303084279427, "grad_norm": 0.48112734699829557, "learning_rate": 3.586055232839461e-06, "loss": 0.0856414794921875, "step": 101175 }, { "epoch": 0.874873541949486, "grad_norm": 2.108453100855991, "learning_rate": 3.5858554235949657e-06, "loss": 0.3091289520263672, "step": 101180 }, { "epoch": 0.8749167754710292, "grad_norm": 19.681060525466982, "learning_rate": 3.5856556116485577e-06, "loss": 0.2182018280029297, "step": 101185 }, { "epoch": 0.8749600089925725, "grad_norm": 35.65274774858315, "learning_rate": 3.585455797001157e-06, "loss": 0.37795867919921877, "step": 101190 }, { "epoch": 0.8750032425141158, "grad_norm": 23.299820139611384, "learning_rate": 3.585255979653687e-06, "loss": 0.3669647216796875, "step": 101195 }, { "epoch": 0.875046476035659, "grad_norm": 4.53778614367368, "learning_rate": 3.5850561596070678e-06, "loss": 0.07480316162109375, "step": 101200 }, { "epoch": 0.8750897095572022, "grad_norm": 0.16835287151315656, "learning_rate": 3.5848563368622204e-06, "loss": 0.2310314178466797, "step": 101205 }, { "epoch": 0.8751329430787456, "grad_norm": 340.70429170296285, "learning_rate": 3.584656511420067e-06, "loss": 0.1426422119140625, "step": 101210 }, { "epoch": 0.8751761766002888, "grad_norm": 6.182742672091343, "learning_rate": 3.5844566832815307e-06, "loss": 0.33092269897460935, "step": 101215 }, { "epoch": 0.875219410121832, "grad_norm": 1.5965889284307357, "learning_rate": 3.5842568524475323e-06, "loss": 0.07257881164550781, "step": 101220 }, { "epoch": 0.8752626436433754, "grad_norm": 0.8237356657139386, "learning_rate": 3.584057018918991e-06, "loss": 0.2558483123779297, "step": 101225 }, { "epoch": 0.8753058771649186, "grad_norm": 11.686390425723287, "learning_rate": 3.5838571826968316e-06, "loss": 0.08653564453125, "step": 101230 }, { "epoch": 0.8753491106864618, "grad_norm": 14.559485605102761, "learning_rate": 3.5836573437819745e-06, "loss": 0.09152069091796874, "step": 101235 }, { "epoch": 0.8753923442080052, "grad_norm": 0.03663698182137616, "learning_rate": 3.5834575021753403e-06, "loss": 0.03874187469482422, "step": 101240 }, { "epoch": 0.8754355777295484, "grad_norm": 51.11907843450184, "learning_rate": 3.5832576578778528e-06, "loss": 0.193731689453125, "step": 101245 }, { "epoch": 0.8754788112510916, "grad_norm": 12.057119092085033, "learning_rate": 3.5830578108904325e-06, "loss": 0.07875518798828125, "step": 101250 }, { "epoch": 0.875522044772635, "grad_norm": 2.7381938399806813, "learning_rate": 3.5828579612140002e-06, "loss": 0.194158935546875, "step": 101255 }, { "epoch": 0.8755652782941782, "grad_norm": 0.7952619966931915, "learning_rate": 3.582658108849479e-06, "loss": 0.081951904296875, "step": 101260 }, { "epoch": 0.8756085118157214, "grad_norm": 4.112262740679304, "learning_rate": 3.58245825379779e-06, "loss": 0.03918399810791016, "step": 101265 }, { "epoch": 0.8756517453372648, "grad_norm": 0.11062169030569054, "learning_rate": 3.582258396059855e-06, "loss": 0.006484222412109375, "step": 101270 }, { "epoch": 0.875694978858808, "grad_norm": 1.2078959915529182, "learning_rate": 3.5820585356365957e-06, "loss": 0.235198974609375, "step": 101275 }, { "epoch": 0.8757382123803512, "grad_norm": 0.7625651120244276, "learning_rate": 3.5818586725289348e-06, "loss": 0.088995361328125, "step": 101280 }, { "epoch": 0.8757814459018944, "grad_norm": 11.832540883629731, "learning_rate": 3.5816588067377913e-06, "loss": 0.1696624755859375, "step": 101285 }, { "epoch": 0.8758246794234378, "grad_norm": 14.099185602758594, "learning_rate": 3.58145893826409e-06, "loss": 0.322259521484375, "step": 101290 }, { "epoch": 0.875867912944981, "grad_norm": 5.445554723406695, "learning_rate": 3.5812590671087516e-06, "loss": 0.08798370361328126, "step": 101295 }, { "epoch": 0.8759111464665242, "grad_norm": 5.7095930420005265, "learning_rate": 3.5810591932726976e-06, "loss": 0.137410306930542, "step": 101300 }, { "epoch": 0.8759543799880676, "grad_norm": 4.734219894128999, "learning_rate": 3.580859316756849e-06, "loss": 0.09842643737792969, "step": 101305 }, { "epoch": 0.8759976135096108, "grad_norm": 3.862247138035238, "learning_rate": 3.5806594375621296e-06, "loss": 0.08259735107421876, "step": 101310 }, { "epoch": 0.876040847031154, "grad_norm": 12.185521096004408, "learning_rate": 3.5804595556894596e-06, "loss": 0.19311866760253907, "step": 101315 }, { "epoch": 0.8760840805526974, "grad_norm": 2.9388144408308365, "learning_rate": 3.5802596711397615e-06, "loss": 0.07694244384765625, "step": 101320 }, { "epoch": 0.8761273140742406, "grad_norm": 2.821480398119184, "learning_rate": 3.5800597839139576e-06, "loss": 0.05678825378417969, "step": 101325 }, { "epoch": 0.8761705475957838, "grad_norm": 3.6372611194511926, "learning_rate": 3.5798598940129687e-06, "loss": 0.06365032196044922, "step": 101330 }, { "epoch": 0.8762137811173272, "grad_norm": 0.36115618203037914, "learning_rate": 3.5796600014377173e-06, "loss": 0.1400909423828125, "step": 101335 }, { "epoch": 0.8762570146388704, "grad_norm": 22.392522003094932, "learning_rate": 3.5794601061891256e-06, "loss": 0.09854087829589844, "step": 101340 }, { "epoch": 0.8763002481604136, "grad_norm": 3.0631890599142904, "learning_rate": 3.579260208268114e-06, "loss": 0.06380062103271485, "step": 101345 }, { "epoch": 0.876343481681957, "grad_norm": 17.070250630524374, "learning_rate": 3.5790603076756065e-06, "loss": 0.2984569549560547, "step": 101350 }, { "epoch": 0.8763867152035002, "grad_norm": 1.3285891035424076, "learning_rate": 3.5788604044125234e-06, "loss": 0.07791748046875, "step": 101355 }, { "epoch": 0.8764299487250434, "grad_norm": 0.1469111119556094, "learning_rate": 3.578660498479788e-06, "loss": 0.066766357421875, "step": 101360 }, { "epoch": 0.8764731822465868, "grad_norm": 0.22255078559255306, "learning_rate": 3.578460589878321e-06, "loss": 0.17732982635498046, "step": 101365 }, { "epoch": 0.87651641576813, "grad_norm": 0.41982146449093355, "learning_rate": 3.578260678609046e-06, "loss": 0.19860992431640626, "step": 101370 }, { "epoch": 0.8765596492896732, "grad_norm": 1.453163088678669, "learning_rate": 3.5780607646728818e-06, "loss": 0.1419891357421875, "step": 101375 }, { "epoch": 0.8766028828112165, "grad_norm": 3.286323205625941, "learning_rate": 3.577860848070754e-06, "loss": 0.1360595703125, "step": 101380 }, { "epoch": 0.8766461163327598, "grad_norm": 3.2611467397476877, "learning_rate": 3.577660928803582e-06, "loss": 0.30423431396484374, "step": 101385 }, { "epoch": 0.876689349854303, "grad_norm": 1.9858698067134979, "learning_rate": 3.5774610068722896e-06, "loss": 0.07761611938476562, "step": 101390 }, { "epoch": 0.8767325833758463, "grad_norm": 1.2232664729238756, "learning_rate": 3.577261082277798e-06, "loss": 0.06662330627441407, "step": 101395 }, { "epoch": 0.8767758168973896, "grad_norm": 4.1020675000515, "learning_rate": 3.577061155021029e-06, "loss": 0.0848358154296875, "step": 101400 }, { "epoch": 0.8768190504189328, "grad_norm": 4.729180170102494, "learning_rate": 3.5768612251029046e-06, "loss": 0.171990966796875, "step": 101405 }, { "epoch": 0.876862283940476, "grad_norm": 2.484522894035191, "learning_rate": 3.5766612925243483e-06, "loss": 0.07844390869140624, "step": 101410 }, { "epoch": 0.8769055174620194, "grad_norm": 14.361610941486411, "learning_rate": 3.5764613572862805e-06, "loss": 0.13904037475585937, "step": 101415 }, { "epoch": 0.8769487509835626, "grad_norm": 14.48892705733945, "learning_rate": 3.5762614193896236e-06, "loss": 0.15247802734375, "step": 101420 }, { "epoch": 0.8769919845051058, "grad_norm": 6.441024356176173, "learning_rate": 3.5760614788353005e-06, "loss": 0.05004615783691406, "step": 101425 }, { "epoch": 0.8770352180266492, "grad_norm": 2.080205662577626, "learning_rate": 3.5758615356242314e-06, "loss": 0.6852127075195312, "step": 101430 }, { "epoch": 0.8770784515481924, "grad_norm": 1.7337136204751693, "learning_rate": 3.575661589757341e-06, "loss": 0.24479217529296876, "step": 101435 }, { "epoch": 0.8771216850697356, "grad_norm": 17.452269181475927, "learning_rate": 3.5754616412355505e-06, "loss": 0.3136474609375, "step": 101440 }, { "epoch": 0.877164918591279, "grad_norm": 1.108738187674609, "learning_rate": 3.5752616900597807e-06, "loss": 0.3142425537109375, "step": 101445 }, { "epoch": 0.8772081521128222, "grad_norm": 14.969883045231121, "learning_rate": 3.5750617362309562e-06, "loss": 0.05867767333984375, "step": 101450 }, { "epoch": 0.8772513856343654, "grad_norm": 0.37382818466784257, "learning_rate": 3.5748617797499967e-06, "loss": 0.1497112274169922, "step": 101455 }, { "epoch": 0.8772946191559087, "grad_norm": 7.713276580253324, "learning_rate": 3.5746618206178257e-06, "loss": 0.26551284790039065, "step": 101460 }, { "epoch": 0.877337852677452, "grad_norm": 1.867352984199584, "learning_rate": 3.574461858835365e-06, "loss": 0.17701873779296876, "step": 101465 }, { "epoch": 0.8773810861989952, "grad_norm": 7.9613380955282125, "learning_rate": 3.5742618944035374e-06, "loss": 0.21169586181640626, "step": 101470 }, { "epoch": 0.8774243197205385, "grad_norm": 3.891907417561949, "learning_rate": 3.574061927323265e-06, "loss": 0.0909423828125, "step": 101475 }, { "epoch": 0.8774675532420818, "grad_norm": 15.260501108406999, "learning_rate": 3.573861957595468e-06, "loss": 0.12506561279296874, "step": 101480 }, { "epoch": 0.877510786763625, "grad_norm": 0.3056409815297064, "learning_rate": 3.5736619852210716e-06, "loss": 0.09248085021972656, "step": 101485 }, { "epoch": 0.8775540202851683, "grad_norm": 3.4495826054769445, "learning_rate": 3.573462010200997e-06, "loss": 0.04079704284667969, "step": 101490 }, { "epoch": 0.8775972538067116, "grad_norm": 13.847068432307399, "learning_rate": 3.573262032536166e-06, "loss": 0.29104766845703123, "step": 101495 }, { "epoch": 0.8776404873282548, "grad_norm": 1.2275022261039636, "learning_rate": 3.5730620522275013e-06, "loss": 0.038678741455078124, "step": 101500 }, { "epoch": 0.8776837208497981, "grad_norm": 30.774504175498993, "learning_rate": 3.5728620692759253e-06, "loss": 0.2406097412109375, "step": 101505 }, { "epoch": 0.8777269543713414, "grad_norm": 3.1127902076911167, "learning_rate": 3.5726620836823595e-06, "loss": 0.1747039794921875, "step": 101510 }, { "epoch": 0.8777701878928846, "grad_norm": 13.371569083033537, "learning_rate": 3.572462095447726e-06, "loss": 0.1163330078125, "step": 101515 }, { "epoch": 0.8778134214144279, "grad_norm": 22.323187933180208, "learning_rate": 3.5722621045729494e-06, "loss": 0.1414276123046875, "step": 101520 }, { "epoch": 0.8778566549359712, "grad_norm": 1.1963315755270665, "learning_rate": 3.5720621110589496e-06, "loss": 0.1186065673828125, "step": 101525 }, { "epoch": 0.8778998884575144, "grad_norm": 2.14887103289354, "learning_rate": 3.5718621149066506e-06, "loss": 0.06127853393554687, "step": 101530 }, { "epoch": 0.8779431219790577, "grad_norm": 1.9721869673288108, "learning_rate": 3.5716621161169747e-06, "loss": 0.0809173583984375, "step": 101535 }, { "epoch": 0.877986355500601, "grad_norm": 42.54663716818522, "learning_rate": 3.571462114690842e-06, "loss": 0.31136112213134765, "step": 101540 }, { "epoch": 0.8780295890221442, "grad_norm": 17.770898656315094, "learning_rate": 3.571262110629177e-06, "loss": 0.11820831298828124, "step": 101545 }, { "epoch": 0.8780728225436875, "grad_norm": 33.2563985374273, "learning_rate": 3.571062103932902e-06, "loss": 0.302606201171875, "step": 101550 }, { "epoch": 0.8781160560652307, "grad_norm": 2.697367612049156, "learning_rate": 3.570862094602939e-06, "loss": 0.2094573974609375, "step": 101555 }, { "epoch": 0.878159289586774, "grad_norm": 2.4576649836679145, "learning_rate": 3.57066208264021e-06, "loss": 0.047852706909179685, "step": 101560 }, { "epoch": 0.8782025231083173, "grad_norm": 5.191395619080606, "learning_rate": 3.5704620680456386e-06, "loss": 0.086932373046875, "step": 101565 }, { "epoch": 0.8782457566298605, "grad_norm": 1.3965580919906084, "learning_rate": 3.570262050820146e-06, "loss": 0.037329483032226565, "step": 101570 }, { "epoch": 0.8782889901514038, "grad_norm": 3.726383163961197, "learning_rate": 3.570062030964655e-06, "loss": 0.1973358154296875, "step": 101575 }, { "epoch": 0.878332223672947, "grad_norm": 1.5034263944731487, "learning_rate": 3.5698620084800887e-06, "loss": 0.20080413818359374, "step": 101580 }, { "epoch": 0.8783754571944903, "grad_norm": 1.048338908556383, "learning_rate": 3.5696619833673694e-06, "loss": 0.0346282958984375, "step": 101585 }, { "epoch": 0.8784186907160336, "grad_norm": 9.504469175754265, "learning_rate": 3.569461955627419e-06, "loss": 0.0757080078125, "step": 101590 }, { "epoch": 0.8784619242375769, "grad_norm": 16.967743682906924, "learning_rate": 3.5692619252611607e-06, "loss": 0.1106353759765625, "step": 101595 }, { "epoch": 0.8785051577591201, "grad_norm": 5.09942524883917, "learning_rate": 3.5690618922695162e-06, "loss": 0.04017906188964844, "step": 101600 }, { "epoch": 0.8785483912806634, "grad_norm": 32.571467753531465, "learning_rate": 3.568861856653409e-06, "loss": 0.6247303009033203, "step": 101605 }, { "epoch": 0.8785916248022066, "grad_norm": 30.507392400063083, "learning_rate": 3.5686618184137608e-06, "loss": 0.153045654296875, "step": 101610 }, { "epoch": 0.8786348583237499, "grad_norm": 4.547735828798684, "learning_rate": 3.568461777551495e-06, "loss": 0.04881973266601562, "step": 101615 }, { "epoch": 0.8786780918452932, "grad_norm": 0.8048389729759912, "learning_rate": 3.5682617340675336e-06, "loss": 0.08512306213378906, "step": 101620 }, { "epoch": 0.8787213253668364, "grad_norm": 0.6875409967750059, "learning_rate": 3.5680616879627993e-06, "loss": 0.10732879638671874, "step": 101625 }, { "epoch": 0.8787645588883797, "grad_norm": 11.216688725499578, "learning_rate": 3.5678616392382137e-06, "loss": 0.10264205932617188, "step": 101630 }, { "epoch": 0.8788077924099229, "grad_norm": 17.247311640338825, "learning_rate": 3.5676615878947017e-06, "loss": 0.2760345458984375, "step": 101635 }, { "epoch": 0.8788510259314662, "grad_norm": 17.998715839183795, "learning_rate": 3.5674615339331837e-06, "loss": 0.19362030029296876, "step": 101640 }, { "epoch": 0.8788942594530095, "grad_norm": 6.298653694422525, "learning_rate": 3.5672614773545846e-06, "loss": 0.13597869873046875, "step": 101645 }, { "epoch": 0.8789374929745527, "grad_norm": 33.13047572283895, "learning_rate": 3.567061418159825e-06, "loss": 0.23078155517578125, "step": 101650 }, { "epoch": 0.878980726496096, "grad_norm": 68.73803344919378, "learning_rate": 3.5668613563498264e-06, "loss": 0.20349960327148436, "step": 101655 }, { "epoch": 0.8790239600176393, "grad_norm": 9.412732608332535, "learning_rate": 3.566661291925516e-06, "loss": 0.12421607971191406, "step": 101660 }, { "epoch": 0.8790671935391825, "grad_norm": 11.959201115816272, "learning_rate": 3.5664612248878125e-06, "loss": 0.26992034912109375, "step": 101665 }, { "epoch": 0.8791104270607258, "grad_norm": 2.3534925341338844, "learning_rate": 3.5662611552376405e-06, "loss": 0.12491226196289062, "step": 101670 }, { "epoch": 0.8791536605822691, "grad_norm": 0.3330049266774537, "learning_rate": 3.566061082975922e-06, "loss": 0.08915138244628906, "step": 101675 }, { "epoch": 0.8791968941038123, "grad_norm": 40.07579849081915, "learning_rate": 3.5658610081035786e-06, "loss": 0.1878204345703125, "step": 101680 }, { "epoch": 0.8792401276253556, "grad_norm": 3.45254745818708, "learning_rate": 3.5656609306215354e-06, "loss": 0.07742843627929688, "step": 101685 }, { "epoch": 0.8792833611468989, "grad_norm": 109.1582118690888, "learning_rate": 3.5654608505307132e-06, "loss": 0.3653472900390625, "step": 101690 }, { "epoch": 0.8793265946684421, "grad_norm": 1.7592338396600133, "learning_rate": 3.5652607678320367e-06, "loss": 0.1429229736328125, "step": 101695 }, { "epoch": 0.8793698281899854, "grad_norm": 1.0902289722934435, "learning_rate": 3.565060682526427e-06, "loss": 0.04779129028320313, "step": 101700 }, { "epoch": 0.8794130617115287, "grad_norm": 1.6180644959249906, "learning_rate": 3.564860594614807e-06, "loss": 0.277044677734375, "step": 101705 }, { "epoch": 0.8794562952330719, "grad_norm": 2.09108124216466, "learning_rate": 3.5646605040980994e-06, "loss": 0.07780971527099609, "step": 101710 }, { "epoch": 0.8794995287546151, "grad_norm": 6.784776431325465, "learning_rate": 3.5644604109772284e-06, "loss": 0.3074951171875, "step": 101715 }, { "epoch": 0.8795427622761585, "grad_norm": 39.643059838749195, "learning_rate": 3.564260315253115e-06, "loss": 0.35509605407714845, "step": 101720 }, { "epoch": 0.8795859957977017, "grad_norm": 22.170213252410942, "learning_rate": 3.564060216926684e-06, "loss": 0.0951019287109375, "step": 101725 }, { "epoch": 0.8796292293192449, "grad_norm": 15.207411543475846, "learning_rate": 3.563860115998857e-06, "loss": 0.10286865234375, "step": 101730 }, { "epoch": 0.8796724628407883, "grad_norm": 16.63107471088997, "learning_rate": 3.563660012470556e-06, "loss": 0.34539260864257815, "step": 101735 }, { "epoch": 0.8797156963623315, "grad_norm": 0.2165121007793559, "learning_rate": 3.563459906342705e-06, "loss": 0.041679763793945314, "step": 101740 }, { "epoch": 0.8797589298838747, "grad_norm": 0.8803436119090311, "learning_rate": 3.563259797616228e-06, "loss": 0.22846565246582032, "step": 101745 }, { "epoch": 0.8798021634054181, "grad_norm": 43.11520409475488, "learning_rate": 3.563059686292045e-06, "loss": 0.3732881546020508, "step": 101750 }, { "epoch": 0.8798453969269613, "grad_norm": 5.652618527504383, "learning_rate": 3.5628595723710814e-06, "loss": 0.03963813781738281, "step": 101755 }, { "epoch": 0.8798886304485045, "grad_norm": 0.03865236897447805, "learning_rate": 3.562659455854259e-06, "loss": 0.12637062072753907, "step": 101760 }, { "epoch": 0.8799318639700479, "grad_norm": 25.399073605806052, "learning_rate": 3.562459336742501e-06, "loss": 0.15162982940673828, "step": 101765 }, { "epoch": 0.8799750974915911, "grad_norm": 4.9325980395425795, "learning_rate": 3.5622592150367296e-06, "loss": 0.1155181884765625, "step": 101770 }, { "epoch": 0.8800183310131343, "grad_norm": 1.934844444656028, "learning_rate": 3.562059090737869e-06, "loss": 0.2329925537109375, "step": 101775 }, { "epoch": 0.8800615645346777, "grad_norm": 29.90696763154105, "learning_rate": 3.561858963846841e-06, "loss": 0.3512092590332031, "step": 101780 }, { "epoch": 0.8801047980562209, "grad_norm": 1.4740490837360565, "learning_rate": 3.56165883436457e-06, "loss": 0.100537109375, "step": 101785 }, { "epoch": 0.8801480315777641, "grad_norm": 25.605614312527738, "learning_rate": 3.5614587022919774e-06, "loss": 0.1994293212890625, "step": 101790 }, { "epoch": 0.8801912650993075, "grad_norm": 38.67315301524677, "learning_rate": 3.5612585676299865e-06, "loss": 0.32297821044921876, "step": 101795 }, { "epoch": 0.8802344986208507, "grad_norm": 0.7223946903654747, "learning_rate": 3.561058430379522e-06, "loss": 0.1947784423828125, "step": 101800 }, { "epoch": 0.8802777321423939, "grad_norm": 9.751493888809424, "learning_rate": 3.5608582905415045e-06, "loss": 0.12694664001464845, "step": 101805 }, { "epoch": 0.8803209656639371, "grad_norm": 1.847938129196579, "learning_rate": 3.5606581481168586e-06, "loss": 0.03956184387207031, "step": 101810 }, { "epoch": 0.8803641991854805, "grad_norm": 0.15771181439594087, "learning_rate": 3.5604580031065066e-06, "loss": 0.021432876586914062, "step": 101815 }, { "epoch": 0.8804074327070237, "grad_norm": 4.011124146675865, "learning_rate": 3.560257855511372e-06, "loss": 0.09148788452148438, "step": 101820 }, { "epoch": 0.8804506662285669, "grad_norm": 0.6308278055379105, "learning_rate": 3.5600577053323776e-06, "loss": 0.02714691162109375, "step": 101825 }, { "epoch": 0.8804938997501103, "grad_norm": 8.18970560800291, "learning_rate": 3.559857552570446e-06, "loss": 0.0395965576171875, "step": 101830 }, { "epoch": 0.8805371332716535, "grad_norm": 0.5436865793947603, "learning_rate": 3.559657397226502e-06, "loss": 0.06828079223632813, "step": 101835 }, { "epoch": 0.8805803667931967, "grad_norm": 2.1872588388435803, "learning_rate": 3.559457239301467e-06, "loss": 0.0194671630859375, "step": 101840 }, { "epoch": 0.8806236003147401, "grad_norm": 16.115193182942797, "learning_rate": 3.559257078796264e-06, "loss": 0.5649642944335938, "step": 101845 }, { "epoch": 0.8806668338362833, "grad_norm": 0.8641380827728737, "learning_rate": 3.559056915711818e-06, "loss": 0.0931121826171875, "step": 101850 }, { "epoch": 0.8807100673578265, "grad_norm": 0.31636213740279207, "learning_rate": 3.55885675004905e-06, "loss": 0.1569000244140625, "step": 101855 }, { "epoch": 0.8807533008793699, "grad_norm": 4.762657922933808, "learning_rate": 3.5586565818088833e-06, "loss": 0.19491653442382811, "step": 101860 }, { "epoch": 0.8807965344009131, "grad_norm": 21.604255292454877, "learning_rate": 3.558456410992243e-06, "loss": 0.29626922607421874, "step": 101865 }, { "epoch": 0.8808397679224563, "grad_norm": 10.27552980203877, "learning_rate": 3.5582562376000515e-06, "loss": 0.0855133056640625, "step": 101870 }, { "epoch": 0.8808830014439997, "grad_norm": 1.0833335880334654, "learning_rate": 3.558056061633231e-06, "loss": 0.16785736083984376, "step": 101875 }, { "epoch": 0.8809262349655429, "grad_norm": 6.711833601686437, "learning_rate": 3.557855883092705e-06, "loss": 0.1394805908203125, "step": 101880 }, { "epoch": 0.8809694684870861, "grad_norm": 3.0124751062144908, "learning_rate": 3.557655701979397e-06, "loss": 0.049151611328125, "step": 101885 }, { "epoch": 0.8810127020086294, "grad_norm": 68.08912812592493, "learning_rate": 3.5574555182942305e-06, "loss": 0.1923095703125, "step": 101890 }, { "epoch": 0.8810559355301727, "grad_norm": 16.84989544648189, "learning_rate": 3.557255332038128e-06, "loss": 0.3061737060546875, "step": 101895 }, { "epoch": 0.8810991690517159, "grad_norm": 1.6894183332962167, "learning_rate": 3.557055143212014e-06, "loss": 0.07907257080078126, "step": 101900 }, { "epoch": 0.8811424025732592, "grad_norm": 1.1179306840411498, "learning_rate": 3.5568549518168095e-06, "loss": 0.13328094482421876, "step": 101905 }, { "epoch": 0.8811856360948025, "grad_norm": 0.9444639130844471, "learning_rate": 3.5566547578534398e-06, "loss": 0.13880767822265624, "step": 101910 }, { "epoch": 0.8812288696163457, "grad_norm": 1.0054930348915458, "learning_rate": 3.556454561322828e-06, "loss": 0.124615478515625, "step": 101915 }, { "epoch": 0.881272103137889, "grad_norm": 12.891837310419088, "learning_rate": 3.556254362225897e-06, "loss": 0.0873504638671875, "step": 101920 }, { "epoch": 0.8813153366594323, "grad_norm": 14.047924108077284, "learning_rate": 3.556054160563569e-06, "loss": 0.2684593200683594, "step": 101925 }, { "epoch": 0.8813585701809755, "grad_norm": 7.4883297499774475, "learning_rate": 3.5558539563367693e-06, "loss": 0.158282470703125, "step": 101930 }, { "epoch": 0.8814018037025187, "grad_norm": 19.169773970703123, "learning_rate": 3.55565374954642e-06, "loss": 0.2320892333984375, "step": 101935 }, { "epoch": 0.8814450372240621, "grad_norm": 3.817982870451789, "learning_rate": 3.5554535401934442e-06, "loss": 0.0747161865234375, "step": 101940 }, { "epoch": 0.8814882707456053, "grad_norm": 0.4561378956651293, "learning_rate": 3.5552533282787663e-06, "loss": 0.03403263092041016, "step": 101945 }, { "epoch": 0.8815315042671485, "grad_norm": 5.47511106709058, "learning_rate": 3.5550531138033097e-06, "loss": 0.08565101623535157, "step": 101950 }, { "epoch": 0.8815747377886919, "grad_norm": 4.740687746477542, "learning_rate": 3.554852896767996e-06, "loss": 0.10922508239746094, "step": 101955 }, { "epoch": 0.8816179713102351, "grad_norm": 2.376090323950065, "learning_rate": 3.554652677173751e-06, "loss": 0.017716217041015624, "step": 101960 }, { "epoch": 0.8816612048317783, "grad_norm": 4.889288099239614, "learning_rate": 3.5544524550214956e-06, "loss": 0.09061260223388672, "step": 101965 }, { "epoch": 0.8817044383533217, "grad_norm": 0.9297206593505668, "learning_rate": 3.5542522303121542e-06, "loss": 0.07343063354492188, "step": 101970 }, { "epoch": 0.8817476718748649, "grad_norm": 6.075662685378685, "learning_rate": 3.554052003046652e-06, "loss": 0.148370361328125, "step": 101975 }, { "epoch": 0.8817909053964081, "grad_norm": 1.990197200556104, "learning_rate": 3.5538517732259107e-06, "loss": 0.1937896728515625, "step": 101980 }, { "epoch": 0.8818341389179514, "grad_norm": 14.364259201385204, "learning_rate": 3.553651540850854e-06, "loss": 0.1305816650390625, "step": 101985 }, { "epoch": 0.8818773724394947, "grad_norm": 1.7991457352568836, "learning_rate": 3.5534513059224043e-06, "loss": 0.0203125, "step": 101990 }, { "epoch": 0.8819206059610379, "grad_norm": 3.7013198118518424, "learning_rate": 3.553251068441486e-06, "loss": 0.21518478393554688, "step": 101995 }, { "epoch": 0.8819638394825812, "grad_norm": 43.92024460501172, "learning_rate": 3.5530508284090236e-06, "loss": 0.17639808654785155, "step": 102000 }, { "epoch": 0.8820070730041245, "grad_norm": 30.949315284844207, "learning_rate": 3.5528505858259394e-06, "loss": 0.182763671875, "step": 102005 }, { "epoch": 0.8820503065256677, "grad_norm": 4.573660782081177, "learning_rate": 3.5526503406931573e-06, "loss": 0.8120525360107422, "step": 102010 }, { "epoch": 0.882093540047211, "grad_norm": 30.496454791264725, "learning_rate": 3.5524500930116e-06, "loss": 0.14827117919921876, "step": 102015 }, { "epoch": 0.8821367735687543, "grad_norm": 0.9179700890680039, "learning_rate": 3.552249842782192e-06, "loss": 0.0853302001953125, "step": 102020 }, { "epoch": 0.8821800070902975, "grad_norm": 1.0656602479796302, "learning_rate": 3.552049590005856e-06, "loss": 0.36363525390625, "step": 102025 }, { "epoch": 0.8822232406118408, "grad_norm": 2.1290795203499626, "learning_rate": 3.551849334683517e-06, "loss": 0.0370635986328125, "step": 102030 }, { "epoch": 0.8822664741333841, "grad_norm": 1.6747088421571776, "learning_rate": 3.551649076816097e-06, "loss": 0.074713134765625, "step": 102035 }, { "epoch": 0.8823097076549273, "grad_norm": 8.666637562055856, "learning_rate": 3.5514488164045205e-06, "loss": 0.08282470703125, "step": 102040 }, { "epoch": 0.8823529411764706, "grad_norm": 1.1289227145400684, "learning_rate": 3.5512485534497116e-06, "loss": 0.030712890625, "step": 102045 }, { "epoch": 0.8823961746980139, "grad_norm": 5.5449636816361245, "learning_rate": 3.5510482879525918e-06, "loss": 0.32034454345703123, "step": 102050 }, { "epoch": 0.8824394082195571, "grad_norm": 3.7384688502337284, "learning_rate": 3.5508480199140864e-06, "loss": 0.27437772750854494, "step": 102055 }, { "epoch": 0.8824826417411004, "grad_norm": 2.268048744907366, "learning_rate": 3.5506477493351187e-06, "loss": 0.05029296875, "step": 102060 }, { "epoch": 0.8825258752626436, "grad_norm": 14.385777855315116, "learning_rate": 3.5504474762166128e-06, "loss": 0.091796875, "step": 102065 }, { "epoch": 0.8825691087841869, "grad_norm": 6.307489339489128, "learning_rate": 3.550247200559491e-06, "loss": 0.10646133422851563, "step": 102070 }, { "epoch": 0.8826123423057302, "grad_norm": 37.04797144388934, "learning_rate": 3.5500469223646774e-06, "loss": 0.4322498321533203, "step": 102075 }, { "epoch": 0.8826555758272734, "grad_norm": 2.1559718007455966, "learning_rate": 3.5498466416330965e-06, "loss": 0.3894866943359375, "step": 102080 }, { "epoch": 0.8826988093488167, "grad_norm": 23.62333506597152, "learning_rate": 3.549646358365671e-06, "loss": 0.18422622680664064, "step": 102085 }, { "epoch": 0.88274204287036, "grad_norm": 0.19034960371999607, "learning_rate": 3.5494460725633266e-06, "loss": 0.017539405822753908, "step": 102090 }, { "epoch": 0.8827852763919032, "grad_norm": 22.095522259270613, "learning_rate": 3.5492457842269844e-06, "loss": 0.1384124755859375, "step": 102095 }, { "epoch": 0.8828285099134465, "grad_norm": 2.6716773427806535, "learning_rate": 3.5490454933575686e-06, "loss": 0.1061065673828125, "step": 102100 }, { "epoch": 0.8828717434349898, "grad_norm": 0.5210106879121682, "learning_rate": 3.548845199956004e-06, "loss": 0.13619384765625, "step": 102105 }, { "epoch": 0.882914976956533, "grad_norm": 7.703721154840961, "learning_rate": 3.5486449040232143e-06, "loss": 0.38456268310546876, "step": 102110 }, { "epoch": 0.8829582104780763, "grad_norm": 17.126977193317018, "learning_rate": 3.548444605560122e-06, "loss": 0.06295623779296874, "step": 102115 }, { "epoch": 0.8830014439996196, "grad_norm": 2.735266686923094, "learning_rate": 3.5482443045676522e-06, "loss": 0.23781261444091797, "step": 102120 }, { "epoch": 0.8830446775211628, "grad_norm": 0.23740521019491326, "learning_rate": 3.5480440010467287e-06, "loss": 0.0919830322265625, "step": 102125 }, { "epoch": 0.8830879110427061, "grad_norm": 7.3940549941155655, "learning_rate": 3.5478436949982733e-06, "loss": 0.2643096923828125, "step": 102130 }, { "epoch": 0.8831311445642493, "grad_norm": 7.827781637473362, "learning_rate": 3.5476433864232117e-06, "loss": 0.378936767578125, "step": 102135 }, { "epoch": 0.8831743780857926, "grad_norm": 4.387018605135554, "learning_rate": 3.5474430753224673e-06, "loss": 0.3643310546875, "step": 102140 }, { "epoch": 0.8832176116073359, "grad_norm": 2.97133925270106, "learning_rate": 3.547242761696964e-06, "loss": 0.18978347778320312, "step": 102145 }, { "epoch": 0.8832608451288791, "grad_norm": 17.38786604743324, "learning_rate": 3.5470424455476252e-06, "loss": 0.1266510009765625, "step": 102150 }, { "epoch": 0.8833040786504224, "grad_norm": 0.6627532128728333, "learning_rate": 3.5468421268753753e-06, "loss": 0.07996063232421875, "step": 102155 }, { "epoch": 0.8833473121719656, "grad_norm": 6.213638604365339, "learning_rate": 3.5466418056811373e-06, "loss": 0.14765968322753906, "step": 102160 }, { "epoch": 0.8833905456935089, "grad_norm": 13.389059638436036, "learning_rate": 3.5464414819658353e-06, "loss": 0.38787078857421875, "step": 102165 }, { "epoch": 0.8834337792150522, "grad_norm": 24.745131718570548, "learning_rate": 3.5462411557303947e-06, "loss": 0.09127120971679688, "step": 102170 }, { "epoch": 0.8834770127365954, "grad_norm": 3.4609843447623505, "learning_rate": 3.546040826975738e-06, "loss": 0.024969863891601562, "step": 102175 }, { "epoch": 0.8835202462581387, "grad_norm": 1.5201768855665674, "learning_rate": 3.5458404957027884e-06, "loss": 0.35773696899414065, "step": 102180 }, { "epoch": 0.883563479779682, "grad_norm": 1.183894908399828, "learning_rate": 3.5456401619124708e-06, "loss": 0.024631500244140625, "step": 102185 }, { "epoch": 0.8836067133012252, "grad_norm": 23.998982733327697, "learning_rate": 3.5454398256057094e-06, "loss": 0.20500946044921875, "step": 102190 }, { "epoch": 0.8836499468227685, "grad_norm": 6.46871810610965, "learning_rate": 3.545239486783427e-06, "loss": 0.03528594970703125, "step": 102195 }, { "epoch": 0.8836931803443118, "grad_norm": 12.164721061770383, "learning_rate": 3.545039145446549e-06, "loss": 0.05586233139038086, "step": 102200 }, { "epoch": 0.883736413865855, "grad_norm": 5.039096846647937, "learning_rate": 3.544838801595999e-06, "loss": 0.10323333740234375, "step": 102205 }, { "epoch": 0.8837796473873983, "grad_norm": 3.4217117310646468, "learning_rate": 3.5446384552326996e-06, "loss": 0.1342803955078125, "step": 102210 }, { "epoch": 0.8838228809089416, "grad_norm": 0.18781557937470136, "learning_rate": 3.544438106357576e-06, "loss": 0.04784736633300781, "step": 102215 }, { "epoch": 0.8838661144304848, "grad_norm": 0.7301376436959013, "learning_rate": 3.5442377549715525e-06, "loss": 0.062744140625, "step": 102220 }, { "epoch": 0.8839093479520281, "grad_norm": 7.097395671081739, "learning_rate": 3.5440374010755514e-06, "loss": 0.22936172485351564, "step": 102225 }, { "epoch": 0.8839525814735714, "grad_norm": 2.6301999512080623, "learning_rate": 3.5438370446704994e-06, "loss": 0.0760833740234375, "step": 102230 }, { "epoch": 0.8839958149951146, "grad_norm": 2.1831764163065297, "learning_rate": 3.543636685757319e-06, "loss": 0.1056365966796875, "step": 102235 }, { "epoch": 0.8840390485166578, "grad_norm": 26.800121275630726, "learning_rate": 3.543436324336933e-06, "loss": 0.25761566162109373, "step": 102240 }, { "epoch": 0.8840822820382012, "grad_norm": 5.164099397685939, "learning_rate": 3.543235960410267e-06, "loss": 0.03503341674804687, "step": 102245 }, { "epoch": 0.8841255155597444, "grad_norm": 14.668495945103963, "learning_rate": 3.543035593978245e-06, "loss": 0.196630859375, "step": 102250 }, { "epoch": 0.8841687490812876, "grad_norm": 2.381304986447472, "learning_rate": 3.542835225041791e-06, "loss": 0.10712966918945313, "step": 102255 }, { "epoch": 0.884211982602831, "grad_norm": 89.49457101661915, "learning_rate": 3.5426348536018282e-06, "loss": 0.3893402099609375, "step": 102260 }, { "epoch": 0.8842552161243742, "grad_norm": 3.103106953371401, "learning_rate": 3.5424344796592826e-06, "loss": 0.28418846130371095, "step": 102265 }, { "epoch": 0.8842984496459174, "grad_norm": 1.3765948737484253, "learning_rate": 3.5422341032150764e-06, "loss": 0.14233551025390626, "step": 102270 }, { "epoch": 0.8843416831674608, "grad_norm": 13.842940884375677, "learning_rate": 3.5420337242701337e-06, "loss": 0.130865478515625, "step": 102275 }, { "epoch": 0.884384916689004, "grad_norm": 13.781336306679087, "learning_rate": 3.5418333428253804e-06, "loss": 0.34635162353515625, "step": 102280 }, { "epoch": 0.8844281502105472, "grad_norm": 4.511009414424014, "learning_rate": 3.5416329588817397e-06, "loss": 0.05001678466796875, "step": 102285 }, { "epoch": 0.8844713837320906, "grad_norm": 7.8522062368156, "learning_rate": 3.541432572440135e-06, "loss": 0.125054931640625, "step": 102290 }, { "epoch": 0.8845146172536338, "grad_norm": 13.021317522354055, "learning_rate": 3.5412321835014914e-06, "loss": 0.12070465087890625, "step": 102295 }, { "epoch": 0.884557850775177, "grad_norm": 25.672917930658254, "learning_rate": 3.5410317920667324e-06, "loss": 0.08935089111328125, "step": 102300 }, { "epoch": 0.8846010842967204, "grad_norm": 2.3629015350352276, "learning_rate": 3.5408313981367825e-06, "loss": 0.036210250854492185, "step": 102305 }, { "epoch": 0.8846443178182636, "grad_norm": 4.2449260805640785, "learning_rate": 3.540631001712567e-06, "loss": 0.18351821899414061, "step": 102310 }, { "epoch": 0.8846875513398068, "grad_norm": 5.921349628544897, "learning_rate": 3.5404306027950085e-06, "loss": 0.1305419921875, "step": 102315 }, { "epoch": 0.8847307848613502, "grad_norm": 10.846586576426358, "learning_rate": 3.5402302013850313e-06, "loss": 0.069049072265625, "step": 102320 }, { "epoch": 0.8847740183828934, "grad_norm": 25.55620042897571, "learning_rate": 3.5400297974835613e-06, "loss": 0.16668701171875, "step": 102325 }, { "epoch": 0.8848172519044366, "grad_norm": 0.7688241507072058, "learning_rate": 3.5398293910915206e-06, "loss": 0.17338409423828124, "step": 102330 }, { "epoch": 0.8848604854259798, "grad_norm": 22.994401462723992, "learning_rate": 3.5396289822098353e-06, "loss": 0.2041534423828125, "step": 102335 }, { "epoch": 0.8849037189475232, "grad_norm": 1.1912272150933665, "learning_rate": 3.5394285708394275e-06, "loss": 0.07698574066162109, "step": 102340 }, { "epoch": 0.8849469524690664, "grad_norm": 1.5057798427006572, "learning_rate": 3.5392281569812246e-06, "loss": 0.23085708618164064, "step": 102345 }, { "epoch": 0.8849901859906096, "grad_norm": 1.872141031731945, "learning_rate": 3.539027740636148e-06, "loss": 0.06446533203125, "step": 102350 }, { "epoch": 0.885033419512153, "grad_norm": 17.317070234474585, "learning_rate": 3.538827321805123e-06, "loss": 0.0922760009765625, "step": 102355 }, { "epoch": 0.8850766530336962, "grad_norm": 30.195792843775536, "learning_rate": 3.5386269004890735e-06, "loss": 0.1166046142578125, "step": 102360 }, { "epoch": 0.8851198865552394, "grad_norm": 14.930525413466038, "learning_rate": 3.538426476688926e-06, "loss": 0.1309732437133789, "step": 102365 }, { "epoch": 0.8851631200767828, "grad_norm": 0.12767101442163442, "learning_rate": 3.5382260504056014e-06, "loss": 0.13384170532226564, "step": 102370 }, { "epoch": 0.885206353598326, "grad_norm": 7.931843940923768, "learning_rate": 3.538025621640027e-06, "loss": 0.14735450744628906, "step": 102375 }, { "epoch": 0.8852495871198692, "grad_norm": 17.867587130338553, "learning_rate": 3.537825190393126e-06, "loss": 0.12789154052734375, "step": 102380 }, { "epoch": 0.8852928206414126, "grad_norm": 13.178602387953012, "learning_rate": 3.5376247566658224e-06, "loss": 0.141571044921875, "step": 102385 }, { "epoch": 0.8853360541629558, "grad_norm": 74.42772340864677, "learning_rate": 3.53742432045904e-06, "loss": 0.44880027770996095, "step": 102390 }, { "epoch": 0.885379287684499, "grad_norm": 1.6818135367463787, "learning_rate": 3.5372238817737056e-06, "loss": 0.06220550537109375, "step": 102395 }, { "epoch": 0.8854225212060424, "grad_norm": 0.16211647914616345, "learning_rate": 3.5370234406107413e-06, "loss": 0.12173843383789062, "step": 102400 }, { "epoch": 0.8854657547275856, "grad_norm": 0.21209781407474476, "learning_rate": 3.536822996971072e-06, "loss": 0.04910011291503906, "step": 102405 }, { "epoch": 0.8855089882491288, "grad_norm": 30.585818391801528, "learning_rate": 3.536622550855624e-06, "loss": 0.4830068588256836, "step": 102410 }, { "epoch": 0.885552221770672, "grad_norm": 24.23433822926106, "learning_rate": 3.5364221022653187e-06, "loss": 0.19359893798828126, "step": 102415 }, { "epoch": 0.8855954552922154, "grad_norm": 7.528613382124228, "learning_rate": 3.5362216512010822e-06, "loss": 0.0370335578918457, "step": 102420 }, { "epoch": 0.8856386888137586, "grad_norm": 0.13880913935430708, "learning_rate": 3.53602119766384e-06, "loss": 0.10435905456542968, "step": 102425 }, { "epoch": 0.8856819223353019, "grad_norm": 1.2386227223723492, "learning_rate": 3.535820741654515e-06, "loss": 0.17010078430175782, "step": 102430 }, { "epoch": 0.8857251558568452, "grad_norm": 0.08724462715960454, "learning_rate": 3.5356202831740315e-06, "loss": 0.23714752197265626, "step": 102435 }, { "epoch": 0.8857683893783884, "grad_norm": 4.891701459055476, "learning_rate": 3.5354198222233145e-06, "loss": 0.0805999755859375, "step": 102440 }, { "epoch": 0.8858116228999316, "grad_norm": 50.6444076858931, "learning_rate": 3.535219358803289e-06, "loss": 0.11913871765136719, "step": 102445 }, { "epoch": 0.885854856421475, "grad_norm": 11.457561388764018, "learning_rate": 3.5350188929148785e-06, "loss": 0.1655609130859375, "step": 102450 }, { "epoch": 0.8858980899430182, "grad_norm": 6.511850954257005, "learning_rate": 3.534818424559009e-06, "loss": 0.06964244842529296, "step": 102455 }, { "epoch": 0.8859413234645614, "grad_norm": 19.28074706221991, "learning_rate": 3.534617953736604e-06, "loss": 0.1259002685546875, "step": 102460 }, { "epoch": 0.8859845569861048, "grad_norm": 0.8684198944267518, "learning_rate": 3.5344174804485877e-06, "loss": 0.0165435791015625, "step": 102465 }, { "epoch": 0.886027790507648, "grad_norm": 1.66387389972233, "learning_rate": 3.534217004695886e-06, "loss": 0.16210556030273438, "step": 102470 }, { "epoch": 0.8860710240291912, "grad_norm": 2.1004348880937003, "learning_rate": 3.5340165264794223e-06, "loss": 0.0427581787109375, "step": 102475 }, { "epoch": 0.8861142575507346, "grad_norm": 4.286192430070307, "learning_rate": 3.5338160458001205e-06, "loss": 0.153802490234375, "step": 102480 }, { "epoch": 0.8861574910722778, "grad_norm": 23.51224874858234, "learning_rate": 3.533615562658908e-06, "loss": 0.17445449829101561, "step": 102485 }, { "epoch": 0.886200724593821, "grad_norm": 0.9626669575387776, "learning_rate": 3.533415077056707e-06, "loss": 0.18279266357421875, "step": 102490 }, { "epoch": 0.8862439581153644, "grad_norm": 9.483906380396501, "learning_rate": 3.5332145889944424e-06, "loss": 0.1817913055419922, "step": 102495 }, { "epoch": 0.8862871916369076, "grad_norm": 0.15997042967536546, "learning_rate": 3.5330140984730395e-06, "loss": 0.06743049621582031, "step": 102500 }, { "epoch": 0.8863304251584508, "grad_norm": 14.411957990022094, "learning_rate": 3.5328136054934225e-06, "loss": 0.1437957763671875, "step": 102505 }, { "epoch": 0.8863736586799941, "grad_norm": 10.620430363384887, "learning_rate": 3.532613110056517e-06, "loss": 0.07627716064453124, "step": 102510 }, { "epoch": 0.8864168922015374, "grad_norm": 14.320008101030647, "learning_rate": 3.5324126121632464e-06, "loss": 0.15676116943359375, "step": 102515 }, { "epoch": 0.8864601257230806, "grad_norm": 1.0505378946212456, "learning_rate": 3.532212111814536e-06, "loss": 0.2287311553955078, "step": 102520 }, { "epoch": 0.8865033592446239, "grad_norm": 8.943545530299183, "learning_rate": 3.53201160901131e-06, "loss": 0.18006668090820313, "step": 102525 }, { "epoch": 0.8865465927661672, "grad_norm": 12.041242812031197, "learning_rate": 3.5318111037544933e-06, "loss": 0.08473052978515624, "step": 102530 }, { "epoch": 0.8865898262877104, "grad_norm": 8.385846748834789, "learning_rate": 3.5316105960450114e-06, "loss": 0.258038330078125, "step": 102535 }, { "epoch": 0.8866330598092537, "grad_norm": 2.9541299763955147, "learning_rate": 3.5314100858837885e-06, "loss": 0.07529029846191407, "step": 102540 }, { "epoch": 0.886676293330797, "grad_norm": 18.26744480453724, "learning_rate": 3.531209573271749e-06, "loss": 0.21860275268554688, "step": 102545 }, { "epoch": 0.8867195268523402, "grad_norm": 0.9524184304869231, "learning_rate": 3.531009058209818e-06, "loss": 0.08935394287109374, "step": 102550 }, { "epoch": 0.8867627603738835, "grad_norm": 2.243334436950814, "learning_rate": 3.5308085406989197e-06, "loss": 0.078265380859375, "step": 102555 }, { "epoch": 0.8868059938954268, "grad_norm": 47.52486086048568, "learning_rate": 3.530608020739979e-06, "loss": 0.30717926025390624, "step": 102560 }, { "epoch": 0.88684922741697, "grad_norm": 4.089508877266293, "learning_rate": 3.530407498333922e-06, "loss": 0.1660898208618164, "step": 102565 }, { "epoch": 0.8868924609385133, "grad_norm": 1.420318703873718, "learning_rate": 3.5302069734816724e-06, "loss": 0.14432373046875, "step": 102570 }, { "epoch": 0.8869356944600566, "grad_norm": 4.900783251688077, "learning_rate": 3.5300064461841542e-06, "loss": 0.0656005859375, "step": 102575 }, { "epoch": 0.8869789279815998, "grad_norm": 2.3659935841093986, "learning_rate": 3.529805916442294e-06, "loss": 0.02229766845703125, "step": 102580 }, { "epoch": 0.8870221615031431, "grad_norm": 9.891092363477428, "learning_rate": 3.5296053842570153e-06, "loss": 0.121868896484375, "step": 102585 }, { "epoch": 0.8870653950246863, "grad_norm": 0.0625182540137649, "learning_rate": 3.529404849629243e-06, "loss": 0.04558906555175781, "step": 102590 }, { "epoch": 0.8871086285462296, "grad_norm": 5.94227373461823, "learning_rate": 3.5292043125599027e-06, "loss": 0.14615478515625, "step": 102595 }, { "epoch": 0.8871518620677729, "grad_norm": 2.3744929939685746, "learning_rate": 3.5290037730499195e-06, "loss": 0.09447174072265625, "step": 102600 }, { "epoch": 0.8871950955893161, "grad_norm": 31.197288276697964, "learning_rate": 3.528803231100217e-06, "loss": 0.05932884216308594, "step": 102605 }, { "epoch": 0.8872383291108594, "grad_norm": 5.3197200391306, "learning_rate": 3.52860268671172e-06, "loss": 0.34259033203125, "step": 102610 }, { "epoch": 0.8872815626324027, "grad_norm": 3.0412460386002547, "learning_rate": 3.5284021398853555e-06, "loss": 0.10373497009277344, "step": 102615 }, { "epoch": 0.8873247961539459, "grad_norm": 1.2395059153486165, "learning_rate": 3.5282015906220466e-06, "loss": 0.04138946533203125, "step": 102620 }, { "epoch": 0.8873680296754892, "grad_norm": 16.73208178920937, "learning_rate": 3.5280010389227176e-06, "loss": 0.19077777862548828, "step": 102625 }, { "epoch": 0.8874112631970325, "grad_norm": 0.18176059872330078, "learning_rate": 3.5278004847882963e-06, "loss": 0.26531219482421875, "step": 102630 }, { "epoch": 0.8874544967185757, "grad_norm": 0.2398345663502306, "learning_rate": 3.5275999282197047e-06, "loss": 0.057108306884765626, "step": 102635 }, { "epoch": 0.887497730240119, "grad_norm": 22.79399033503597, "learning_rate": 3.5273993692178688e-06, "loss": 0.21614990234375, "step": 102640 }, { "epoch": 0.8875409637616622, "grad_norm": 12.135369414808757, "learning_rate": 3.5271988077837137e-06, "loss": 0.086474609375, "step": 102645 }, { "epoch": 0.8875841972832055, "grad_norm": 1.5847707728602405, "learning_rate": 3.5269982439181646e-06, "loss": 0.1749034881591797, "step": 102650 }, { "epoch": 0.8876274308047488, "grad_norm": 3.5886913180991735, "learning_rate": 3.5267976776221455e-06, "loss": 0.08815879821777343, "step": 102655 }, { "epoch": 0.887670664326292, "grad_norm": 9.125550591358367, "learning_rate": 3.5265971088965826e-06, "loss": 0.2313201904296875, "step": 102660 }, { "epoch": 0.8877138978478353, "grad_norm": 3.8915581012237443, "learning_rate": 3.526396537742401e-06, "loss": 0.1871723175048828, "step": 102665 }, { "epoch": 0.8877571313693786, "grad_norm": 0.19982524396993342, "learning_rate": 3.5261959641605233e-06, "loss": 0.14723434448242187, "step": 102670 }, { "epoch": 0.8878003648909218, "grad_norm": 50.86487425195838, "learning_rate": 3.5259953881518772e-06, "loss": 0.2942935943603516, "step": 102675 }, { "epoch": 0.8878435984124651, "grad_norm": 2.9071648539705084, "learning_rate": 3.5257948097173877e-06, "loss": 0.3270111083984375, "step": 102680 }, { "epoch": 0.8878868319340083, "grad_norm": 8.403996214858283, "learning_rate": 3.5255942288579787e-06, "loss": 0.20135650634765626, "step": 102685 }, { "epoch": 0.8879300654555516, "grad_norm": 3.261428087289502, "learning_rate": 3.5253936455745745e-06, "loss": 0.15764312744140624, "step": 102690 }, { "epoch": 0.8879732989770949, "grad_norm": 0.7678605636849024, "learning_rate": 3.5251930598681025e-06, "loss": 0.2157318115234375, "step": 102695 }, { "epoch": 0.8880165324986381, "grad_norm": 0.7757647771361511, "learning_rate": 3.524992471739485e-06, "loss": 0.3965057373046875, "step": 102700 }, { "epoch": 0.8880597660201814, "grad_norm": 9.782521535129515, "learning_rate": 3.5247918811896496e-06, "loss": 0.19683837890625, "step": 102705 }, { "epoch": 0.8881029995417247, "grad_norm": 5.332537202703479, "learning_rate": 3.5245912882195207e-06, "loss": 0.7533462524414063, "step": 102710 }, { "epoch": 0.8881462330632679, "grad_norm": 10.086431753488553, "learning_rate": 3.524390692830023e-06, "loss": 0.05200042724609375, "step": 102715 }, { "epoch": 0.8881894665848112, "grad_norm": 3.987021728230105, "learning_rate": 3.524190095022081e-06, "loss": 0.08584365844726563, "step": 102720 }, { "epoch": 0.8882327001063545, "grad_norm": 6.31477865873, "learning_rate": 3.5239894947966204e-06, "loss": 0.09575214385986328, "step": 102725 }, { "epoch": 0.8882759336278977, "grad_norm": 0.6343219273752511, "learning_rate": 3.5237888921545674e-06, "loss": 0.22900962829589844, "step": 102730 }, { "epoch": 0.888319167149441, "grad_norm": 0.9201433342101918, "learning_rate": 3.5235882870968464e-06, "loss": 0.40165252685546876, "step": 102735 }, { "epoch": 0.8883624006709843, "grad_norm": 12.287010536123066, "learning_rate": 3.5233876796243826e-06, "loss": 0.13325347900390624, "step": 102740 }, { "epoch": 0.8884056341925275, "grad_norm": 3.861371037777479, "learning_rate": 3.523187069738101e-06, "loss": 0.13587646484375, "step": 102745 }, { "epoch": 0.8884488677140708, "grad_norm": 0.5171639802431136, "learning_rate": 3.522986457438926e-06, "loss": 0.039057159423828126, "step": 102750 }, { "epoch": 0.8884921012356141, "grad_norm": 0.8470786379828267, "learning_rate": 3.522785842727784e-06, "loss": 0.05233001708984375, "step": 102755 }, { "epoch": 0.8885353347571573, "grad_norm": 3.4913534714478986, "learning_rate": 3.5225852256056006e-06, "loss": 0.08708343505859376, "step": 102760 }, { "epoch": 0.8885785682787005, "grad_norm": 0.11251677491390712, "learning_rate": 3.5223846060733e-06, "loss": 0.1130340576171875, "step": 102765 }, { "epoch": 0.8886218018002439, "grad_norm": 32.29069611263616, "learning_rate": 3.5221839841318075e-06, "loss": 0.1142059326171875, "step": 102770 }, { "epoch": 0.8886650353217871, "grad_norm": 0.3049625647645763, "learning_rate": 3.521983359782049e-06, "loss": 0.022177886962890626, "step": 102775 }, { "epoch": 0.8887082688433303, "grad_norm": 0.14127787346681447, "learning_rate": 3.5217827330249488e-06, "loss": 0.2148406982421875, "step": 102780 }, { "epoch": 0.8887515023648737, "grad_norm": 4.522934611452241, "learning_rate": 3.521582103861433e-06, "loss": 0.09207687377929688, "step": 102785 }, { "epoch": 0.8887947358864169, "grad_norm": 1.0442632486319163, "learning_rate": 3.5213814722924263e-06, "loss": 0.5361557006835938, "step": 102790 }, { "epoch": 0.8888379694079601, "grad_norm": 7.319441529374449, "learning_rate": 3.5211808383188554e-06, "loss": 0.07345809936523437, "step": 102795 }, { "epoch": 0.8888812029295035, "grad_norm": 6.475479711465377, "learning_rate": 3.5209802019416435e-06, "loss": 0.0681549072265625, "step": 102800 }, { "epoch": 0.8889244364510467, "grad_norm": 3.4210977212502236, "learning_rate": 3.520779563161717e-06, "loss": 0.1384002685546875, "step": 102805 }, { "epoch": 0.8889676699725899, "grad_norm": 3.476745388745459, "learning_rate": 3.5205789219800013e-06, "loss": 0.06737060546875, "step": 102810 }, { "epoch": 0.8890109034941333, "grad_norm": 17.331035780498375, "learning_rate": 3.5203782783974215e-06, "loss": 0.1008941650390625, "step": 102815 }, { "epoch": 0.8890541370156765, "grad_norm": 6.936020580323903, "learning_rate": 3.520177632414904e-06, "loss": 0.045733642578125, "step": 102820 }, { "epoch": 0.8890973705372197, "grad_norm": 7.991035123754767, "learning_rate": 3.5199769840333724e-06, "loss": 0.08838462829589844, "step": 102825 }, { "epoch": 0.889140604058763, "grad_norm": 7.771115367778047, "learning_rate": 3.5197763332537526e-06, "loss": 0.041742134094238284, "step": 102830 }, { "epoch": 0.8891838375803063, "grad_norm": 6.4562947630367535, "learning_rate": 3.5195756800769716e-06, "loss": 0.187384033203125, "step": 102835 }, { "epoch": 0.8892270711018495, "grad_norm": 13.279570622338408, "learning_rate": 3.5193750245039517e-06, "loss": 0.10601348876953125, "step": 102840 }, { "epoch": 0.8892703046233928, "grad_norm": 2.8075138429828383, "learning_rate": 3.5191743665356205e-06, "loss": 0.04993133544921875, "step": 102845 }, { "epoch": 0.8893135381449361, "grad_norm": 1.6617139039378745, "learning_rate": 3.518973706172904e-06, "loss": 0.05054931640625, "step": 102850 }, { "epoch": 0.8893567716664793, "grad_norm": 0.825085927399086, "learning_rate": 3.518773043416726e-06, "loss": 0.47245330810546876, "step": 102855 }, { "epoch": 0.8894000051880225, "grad_norm": 15.894594954642638, "learning_rate": 3.518572378268013e-06, "loss": 0.050237274169921874, "step": 102860 }, { "epoch": 0.8894432387095659, "grad_norm": 1.1125872346548251, "learning_rate": 3.5183717107276895e-06, "loss": 0.28780364990234375, "step": 102865 }, { "epoch": 0.8894864722311091, "grad_norm": 55.79578828605443, "learning_rate": 3.518171040796681e-06, "loss": 0.46532669067382815, "step": 102870 }, { "epoch": 0.8895297057526523, "grad_norm": 2.5137990341794723, "learning_rate": 3.5179703684759145e-06, "loss": 0.0580780029296875, "step": 102875 }, { "epoch": 0.8895729392741957, "grad_norm": 0.5554233072804444, "learning_rate": 3.5177696937663138e-06, "loss": 0.11977462768554688, "step": 102880 }, { "epoch": 0.8896161727957389, "grad_norm": 6.287140412181198, "learning_rate": 3.5175690166688056e-06, "loss": 0.25574684143066406, "step": 102885 }, { "epoch": 0.8896594063172821, "grad_norm": 25.888068304072622, "learning_rate": 3.517368337184315e-06, "loss": 0.2691051483154297, "step": 102890 }, { "epoch": 0.8897026398388255, "grad_norm": 0.8586455201273858, "learning_rate": 3.5171676553137665e-06, "loss": 0.0501861572265625, "step": 102895 }, { "epoch": 0.8897458733603687, "grad_norm": 19.812164656656773, "learning_rate": 3.5169669710580862e-06, "loss": 0.4310333251953125, "step": 102900 }, { "epoch": 0.8897891068819119, "grad_norm": 13.478263898719366, "learning_rate": 3.5167662844182005e-06, "loss": 0.14791011810302734, "step": 102905 }, { "epoch": 0.8898323404034553, "grad_norm": 5.463716025325904, "learning_rate": 3.5165655953950343e-06, "loss": 0.1911041259765625, "step": 102910 }, { "epoch": 0.8898755739249985, "grad_norm": 2.1790489904377504, "learning_rate": 3.5163649039895134e-06, "loss": 0.1827301025390625, "step": 102915 }, { "epoch": 0.8899188074465417, "grad_norm": 1.8024470140924587, "learning_rate": 3.5161642102025636e-06, "loss": 0.2368896484375, "step": 102920 }, { "epoch": 0.8899620409680851, "grad_norm": 2.1456692681158747, "learning_rate": 3.5159635140351085e-06, "loss": 0.1175628662109375, "step": 102925 }, { "epoch": 0.8900052744896283, "grad_norm": 12.17001018038693, "learning_rate": 3.5157628154880762e-06, "loss": 0.0899169921875, "step": 102930 }, { "epoch": 0.8900485080111715, "grad_norm": 2.118901460001233, "learning_rate": 3.515562114562392e-06, "loss": 0.149200439453125, "step": 102935 }, { "epoch": 0.8900917415327148, "grad_norm": 10.747852286558443, "learning_rate": 3.515361411258981e-06, "loss": 0.11504974365234374, "step": 102940 }, { "epoch": 0.8901349750542581, "grad_norm": 0.39108906199418847, "learning_rate": 3.515160705578767e-06, "loss": 0.14014129638671874, "step": 102945 }, { "epoch": 0.8901782085758013, "grad_norm": 13.53581748243925, "learning_rate": 3.514959997522678e-06, "loss": 0.06708984375, "step": 102950 }, { "epoch": 0.8902214420973446, "grad_norm": 9.395742205588624, "learning_rate": 3.51475928709164e-06, "loss": 0.2467184066772461, "step": 102955 }, { "epoch": 0.8902646756188879, "grad_norm": 0.1832922412345889, "learning_rate": 3.5145585742865763e-06, "loss": 0.019428062438964843, "step": 102960 }, { "epoch": 0.8903079091404311, "grad_norm": 0.03804310460769525, "learning_rate": 3.514357859108415e-06, "loss": 0.20803146362304686, "step": 102965 }, { "epoch": 0.8903511426619743, "grad_norm": 12.566493593796364, "learning_rate": 3.514157141558081e-06, "loss": 0.11069221496582031, "step": 102970 }, { "epoch": 0.8903943761835177, "grad_norm": 0.5067666400437985, "learning_rate": 3.513956421636498e-06, "loss": 0.12983245849609376, "step": 102975 }, { "epoch": 0.8904376097050609, "grad_norm": 11.842062086568239, "learning_rate": 3.5137556993445947e-06, "loss": 0.04672927856445312, "step": 102980 }, { "epoch": 0.8904808432266041, "grad_norm": 0.5494337092692592, "learning_rate": 3.5135549746832956e-06, "loss": 0.13285179138183595, "step": 102985 }, { "epoch": 0.8905240767481475, "grad_norm": 20.757653342410812, "learning_rate": 3.5133542476535256e-06, "loss": 0.12749786376953126, "step": 102990 }, { "epoch": 0.8905673102696907, "grad_norm": 10.341034112043456, "learning_rate": 3.513153518256212e-06, "loss": 0.1538116455078125, "step": 102995 }, { "epoch": 0.8906105437912339, "grad_norm": 5.684493642683359, "learning_rate": 3.5129527864922796e-06, "loss": 0.074932861328125, "step": 103000 }, { "epoch": 0.8906537773127773, "grad_norm": 55.99339937563329, "learning_rate": 3.5127520523626535e-06, "loss": 0.29107208251953126, "step": 103005 }, { "epoch": 0.8906970108343205, "grad_norm": 0.21873455665153296, "learning_rate": 3.51255131586826e-06, "loss": 0.03445510864257813, "step": 103010 }, { "epoch": 0.8907402443558637, "grad_norm": 4.447629948290647, "learning_rate": 3.5123505770100265e-06, "loss": 0.15279464721679686, "step": 103015 }, { "epoch": 0.8907834778774071, "grad_norm": 0.4148464160794674, "learning_rate": 3.5121498357888768e-06, "loss": 0.282720947265625, "step": 103020 }, { "epoch": 0.8908267113989503, "grad_norm": 1.4038115475091684, "learning_rate": 3.511949092205737e-06, "loss": 0.036565399169921874, "step": 103025 }, { "epoch": 0.8908699449204935, "grad_norm": 0.9224107561612216, "learning_rate": 3.5117483462615337e-06, "loss": 0.2690155029296875, "step": 103030 }, { "epoch": 0.8909131784420368, "grad_norm": 0.9481660623910134, "learning_rate": 3.511547597957192e-06, "loss": 0.12056427001953125, "step": 103035 }, { "epoch": 0.8909564119635801, "grad_norm": 16.309776519842426, "learning_rate": 3.5113468472936375e-06, "loss": 0.266998291015625, "step": 103040 }, { "epoch": 0.8909996454851233, "grad_norm": 3.1976005359814668, "learning_rate": 3.511146094271797e-06, "loss": 0.0829864501953125, "step": 103045 }, { "epoch": 0.8910428790066666, "grad_norm": 33.81535580780105, "learning_rate": 3.5109453388925966e-06, "loss": 0.2684186935424805, "step": 103050 }, { "epoch": 0.8910861125282099, "grad_norm": 40.04377278961801, "learning_rate": 3.5107445811569607e-06, "loss": 0.22532272338867188, "step": 103055 }, { "epoch": 0.8911293460497531, "grad_norm": 29.218707132536842, "learning_rate": 3.510543821065816e-06, "loss": 0.10371932983398438, "step": 103060 }, { "epoch": 0.8911725795712964, "grad_norm": 16.395406356653737, "learning_rate": 3.510343058620088e-06, "loss": 0.23482208251953124, "step": 103065 }, { "epoch": 0.8912158130928397, "grad_norm": 52.894634659349656, "learning_rate": 3.510142293820703e-06, "loss": 0.3586452484130859, "step": 103070 }, { "epoch": 0.8912590466143829, "grad_norm": 27.102982349121515, "learning_rate": 3.509941526668587e-06, "loss": 0.24171600341796876, "step": 103075 }, { "epoch": 0.8913022801359262, "grad_norm": 4.5770460634817836, "learning_rate": 3.5097407571646657e-06, "loss": 0.10423355102539063, "step": 103080 }, { "epoch": 0.8913455136574695, "grad_norm": 11.336788793956234, "learning_rate": 3.509539985309865e-06, "loss": 0.15217132568359376, "step": 103085 }, { "epoch": 0.8913887471790127, "grad_norm": 2.1986701922133025, "learning_rate": 3.5093392111051113e-06, "loss": 0.05160255432128906, "step": 103090 }, { "epoch": 0.891431980700556, "grad_norm": 3.3866792913660704, "learning_rate": 3.5091384345513294e-06, "loss": 0.4348121643066406, "step": 103095 }, { "epoch": 0.8914752142220993, "grad_norm": 6.100229549231265, "learning_rate": 3.508937655649446e-06, "loss": 0.29596900939941406, "step": 103100 }, { "epoch": 0.8915184477436425, "grad_norm": 19.673344103989113, "learning_rate": 3.5087368744003883e-06, "loss": 0.24117050170898438, "step": 103105 }, { "epoch": 0.8915616812651858, "grad_norm": 1.5214107134594725, "learning_rate": 3.5085360908050805e-06, "loss": 0.095654296875, "step": 103110 }, { "epoch": 0.891604914786729, "grad_norm": 6.219862477620611, "learning_rate": 3.5083353048644493e-06, "loss": 0.08723678588867187, "step": 103115 }, { "epoch": 0.8916481483082723, "grad_norm": 3.9926692915956834, "learning_rate": 3.5081345165794203e-06, "loss": 0.02552337646484375, "step": 103120 }, { "epoch": 0.8916913818298156, "grad_norm": 25.89370258992231, "learning_rate": 3.5079337259509196e-06, "loss": 0.098406982421875, "step": 103125 }, { "epoch": 0.8917346153513588, "grad_norm": 3.1630918278251183, "learning_rate": 3.507732932979874e-06, "loss": 0.029919815063476563, "step": 103130 }, { "epoch": 0.8917778488729021, "grad_norm": 14.714852437043222, "learning_rate": 3.507532137667208e-06, "loss": 0.0694793701171875, "step": 103135 }, { "epoch": 0.8918210823944454, "grad_norm": 5.513725387681501, "learning_rate": 3.5073313400138495e-06, "loss": 0.03540802001953125, "step": 103140 }, { "epoch": 0.8918643159159886, "grad_norm": 3.7357399444600037, "learning_rate": 3.507130540020724e-06, "loss": 0.16532440185546876, "step": 103145 }, { "epoch": 0.8919075494375319, "grad_norm": 19.182973218099356, "learning_rate": 3.5069297376887555e-06, "loss": 0.21095771789550782, "step": 103150 }, { "epoch": 0.8919507829590752, "grad_norm": 1.5643842509557575, "learning_rate": 3.5067289330188736e-06, "loss": 0.16482086181640626, "step": 103155 }, { "epoch": 0.8919940164806184, "grad_norm": 7.168849230485674, "learning_rate": 3.506528126012003e-06, "loss": 0.09579315185546874, "step": 103160 }, { "epoch": 0.8920372500021617, "grad_norm": 8.964155420604268, "learning_rate": 3.5063273166690677e-06, "loss": 0.04667816162109375, "step": 103165 }, { "epoch": 0.892080483523705, "grad_norm": 0.22024825718949634, "learning_rate": 3.506126504990997e-06, "loss": 0.0526458740234375, "step": 103170 }, { "epoch": 0.8921237170452482, "grad_norm": 15.508852765091776, "learning_rate": 3.505925690978715e-06, "loss": 0.4494926452636719, "step": 103175 }, { "epoch": 0.8921669505667915, "grad_norm": 1.2968516050335173, "learning_rate": 3.5057248746331478e-06, "loss": 0.04616546630859375, "step": 103180 }, { "epoch": 0.8922101840883347, "grad_norm": 11.873363163582393, "learning_rate": 3.505524055955223e-06, "loss": 0.09429702758789063, "step": 103185 }, { "epoch": 0.892253417609878, "grad_norm": 37.13833716003552, "learning_rate": 3.5053232349458664e-06, "loss": 0.19507598876953125, "step": 103190 }, { "epoch": 0.8922966511314212, "grad_norm": 10.87875224587134, "learning_rate": 3.5051224116060027e-06, "loss": 0.13218841552734376, "step": 103195 }, { "epoch": 0.8923398846529645, "grad_norm": 2.898216154428331, "learning_rate": 3.504921585936559e-06, "loss": 0.12173919677734375, "step": 103200 }, { "epoch": 0.8923831181745078, "grad_norm": 0.10257961365435277, "learning_rate": 3.504720757938462e-06, "loss": 0.2576385498046875, "step": 103205 }, { "epoch": 0.892426351696051, "grad_norm": 27.77183357204441, "learning_rate": 3.504519927612638e-06, "loss": 0.17277679443359376, "step": 103210 }, { "epoch": 0.8924695852175943, "grad_norm": 1.5290721296905487, "learning_rate": 3.5043190949600115e-06, "loss": 0.0535980224609375, "step": 103215 }, { "epoch": 0.8925128187391376, "grad_norm": 2.980459471522609, "learning_rate": 3.5041182599815108e-06, "loss": 0.12919921875, "step": 103220 }, { "epoch": 0.8925560522606808, "grad_norm": 18.50295646228552, "learning_rate": 3.5039174226780616e-06, "loss": 0.118963623046875, "step": 103225 }, { "epoch": 0.8925992857822241, "grad_norm": 3.203733807745206, "learning_rate": 3.5037165830505888e-06, "loss": 0.063372802734375, "step": 103230 }, { "epoch": 0.8926425193037674, "grad_norm": 0.7676239609322921, "learning_rate": 3.5035157411000196e-06, "loss": 0.12580642700195313, "step": 103235 }, { "epoch": 0.8926857528253106, "grad_norm": 1.2641649419819367, "learning_rate": 3.5033148968272804e-06, "loss": 0.14253997802734375, "step": 103240 }, { "epoch": 0.8927289863468539, "grad_norm": 9.16958444125858, "learning_rate": 3.5031140502332973e-06, "loss": 0.23739681243896485, "step": 103245 }, { "epoch": 0.8927722198683972, "grad_norm": 2.8874984056319057, "learning_rate": 3.502913201318997e-06, "loss": 0.10773296356201172, "step": 103250 }, { "epoch": 0.8928154533899404, "grad_norm": 1.2795972951873786, "learning_rate": 3.5027123500853056e-06, "loss": 0.0350677490234375, "step": 103255 }, { "epoch": 0.8928586869114837, "grad_norm": 1.52691327081589, "learning_rate": 3.502511496533149e-06, "loss": 0.1845001220703125, "step": 103260 }, { "epoch": 0.892901920433027, "grad_norm": 6.03168258411888, "learning_rate": 3.5023106406634538e-06, "loss": 0.15419845581054686, "step": 103265 }, { "epoch": 0.8929451539545702, "grad_norm": 9.788024916589556, "learning_rate": 3.5021097824771464e-06, "loss": 0.0599151611328125, "step": 103270 }, { "epoch": 0.8929883874761135, "grad_norm": 4.789115188355143, "learning_rate": 3.501908921975153e-06, "loss": 0.09192848205566406, "step": 103275 }, { "epoch": 0.8930316209976568, "grad_norm": 0.10110543420432963, "learning_rate": 3.5017080591584002e-06, "loss": 0.13875808715820312, "step": 103280 }, { "epoch": 0.8930748545192, "grad_norm": 4.036614130426427, "learning_rate": 3.5015071940278135e-06, "loss": 0.1575927734375, "step": 103285 }, { "epoch": 0.8931180880407432, "grad_norm": 4.530091460958457, "learning_rate": 3.5013063265843207e-06, "loss": 0.3307762145996094, "step": 103290 }, { "epoch": 0.8931613215622866, "grad_norm": 8.09377073296585, "learning_rate": 3.5011054568288464e-06, "loss": 0.17898521423339844, "step": 103295 }, { "epoch": 0.8932045550838298, "grad_norm": 3.4077731473175668, "learning_rate": 3.5009045847623194e-06, "loss": 0.24094810485839843, "step": 103300 }, { "epoch": 0.893247788605373, "grad_norm": 13.090162001146348, "learning_rate": 3.500703710385664e-06, "loss": 0.1344991683959961, "step": 103305 }, { "epoch": 0.8932910221269164, "grad_norm": 4.806663873786169, "learning_rate": 3.500502833699807e-06, "loss": 0.24295120239257811, "step": 103310 }, { "epoch": 0.8933342556484596, "grad_norm": 30.436628502816166, "learning_rate": 3.500301954705676e-06, "loss": 0.09773635864257812, "step": 103315 }, { "epoch": 0.8933774891700028, "grad_norm": 2.856511863509687, "learning_rate": 3.500101073404196e-06, "loss": 0.047336578369140625, "step": 103320 }, { "epoch": 0.8934207226915462, "grad_norm": 24.76330029084567, "learning_rate": 3.499900189796294e-06, "loss": 0.22479248046875, "step": 103325 }, { "epoch": 0.8934639562130894, "grad_norm": 11.257402649208586, "learning_rate": 3.4996993038828966e-06, "loss": 0.1484954833984375, "step": 103330 }, { "epoch": 0.8935071897346326, "grad_norm": 4.037894785571029, "learning_rate": 3.4994984156649306e-06, "loss": 0.11052684783935547, "step": 103335 }, { "epoch": 0.893550423256176, "grad_norm": 52.699796535605806, "learning_rate": 3.499297525143321e-06, "loss": 0.39630279541015623, "step": 103340 }, { "epoch": 0.8935936567777192, "grad_norm": 9.121827698687301, "learning_rate": 3.4990966323189957e-06, "loss": 0.074755859375, "step": 103345 }, { "epoch": 0.8936368902992624, "grad_norm": 3.2926302248500723, "learning_rate": 3.498895737192881e-06, "loss": 0.17292556762695313, "step": 103350 }, { "epoch": 0.8936801238208057, "grad_norm": 16.061470086983363, "learning_rate": 3.498694839765904e-06, "loss": 0.5939521789550781, "step": 103355 }, { "epoch": 0.893723357342349, "grad_norm": 17.353726584232835, "learning_rate": 3.4984939400389894e-06, "loss": 0.09475231170654297, "step": 103360 }, { "epoch": 0.8937665908638922, "grad_norm": 0.6616017686408779, "learning_rate": 3.4982930380130655e-06, "loss": 0.13200454711914061, "step": 103365 }, { "epoch": 0.8938098243854354, "grad_norm": 4.00423546755181, "learning_rate": 3.498092133689058e-06, "loss": 0.08490657806396484, "step": 103370 }, { "epoch": 0.8938530579069788, "grad_norm": 6.0698438539318875, "learning_rate": 3.497891227067893e-06, "loss": 0.24834442138671875, "step": 103375 }, { "epoch": 0.893896291428522, "grad_norm": 0.3423014262511932, "learning_rate": 3.4976903181504985e-06, "loss": 0.37170867919921874, "step": 103380 }, { "epoch": 0.8939395249500652, "grad_norm": 0.41344572036576877, "learning_rate": 3.4974894069378e-06, "loss": 0.06688423156738281, "step": 103385 }, { "epoch": 0.8939827584716086, "grad_norm": 2.0440071753104383, "learning_rate": 3.497288493430724e-06, "loss": 0.016162109375, "step": 103390 }, { "epoch": 0.8940259919931518, "grad_norm": 11.505047236298758, "learning_rate": 3.4970875776301977e-06, "loss": 0.1080596923828125, "step": 103395 }, { "epoch": 0.894069225514695, "grad_norm": 0.4050152813603518, "learning_rate": 3.496886659537147e-06, "loss": 0.1591033935546875, "step": 103400 }, { "epoch": 0.8941124590362384, "grad_norm": 4.864586545028007, "learning_rate": 3.496685739152499e-06, "loss": 0.0794107437133789, "step": 103405 }, { "epoch": 0.8941556925577816, "grad_norm": 0.525696652461726, "learning_rate": 3.49648481647718e-06, "loss": 0.07129440307617188, "step": 103410 }, { "epoch": 0.8941989260793248, "grad_norm": 3.919550145889767, "learning_rate": 3.4962838915121187e-06, "loss": 0.4815185546875, "step": 103415 }, { "epoch": 0.8942421596008682, "grad_norm": 14.098894031683024, "learning_rate": 3.4960829642582382e-06, "loss": 0.28897247314453123, "step": 103420 }, { "epoch": 0.8942853931224114, "grad_norm": 2.6233198280389556, "learning_rate": 3.495882034716467e-06, "loss": 0.21072845458984374, "step": 103425 }, { "epoch": 0.8943286266439546, "grad_norm": 0.5477038333321547, "learning_rate": 3.4956811028877317e-06, "loss": 0.10326690673828125, "step": 103430 }, { "epoch": 0.894371860165498, "grad_norm": 7.035154213198692, "learning_rate": 3.495480168772959e-06, "loss": 0.05743331909179687, "step": 103435 }, { "epoch": 0.8944150936870412, "grad_norm": 1.4434378066330993, "learning_rate": 3.495279232373076e-06, "loss": 0.15582046508789063, "step": 103440 }, { "epoch": 0.8944583272085844, "grad_norm": 34.93092702808316, "learning_rate": 3.4950782936890094e-06, "loss": 0.37218246459960935, "step": 103445 }, { "epoch": 0.8945015607301278, "grad_norm": 7.183526370460374, "learning_rate": 3.494877352721684e-06, "loss": 0.085443115234375, "step": 103450 }, { "epoch": 0.894544794251671, "grad_norm": 2.901425999527272, "learning_rate": 3.494676409472029e-06, "loss": 0.16406707763671874, "step": 103455 }, { "epoch": 0.8945880277732142, "grad_norm": 8.087423303739714, "learning_rate": 3.4944754639409695e-06, "loss": 0.17590713500976562, "step": 103460 }, { "epoch": 0.8946312612947575, "grad_norm": 3.7718761495560558, "learning_rate": 3.494274516129434e-06, "loss": 0.0509490966796875, "step": 103465 }, { "epoch": 0.8946744948163008, "grad_norm": 47.32176085942549, "learning_rate": 3.4940735660383466e-06, "loss": 0.21419677734375, "step": 103470 }, { "epoch": 0.894717728337844, "grad_norm": 1.8533136225376667, "learning_rate": 3.493872613668637e-06, "loss": 0.31121673583984377, "step": 103475 }, { "epoch": 0.8947609618593872, "grad_norm": 3.2714827620737585, "learning_rate": 3.49367165902123e-06, "loss": 0.21199722290039064, "step": 103480 }, { "epoch": 0.8948041953809306, "grad_norm": 0.41850660986569094, "learning_rate": 3.4934707020970525e-06, "loss": 0.05123443603515625, "step": 103485 }, { "epoch": 0.8948474289024738, "grad_norm": 51.19184316166783, "learning_rate": 3.4932697428970314e-06, "loss": 0.47150726318359376, "step": 103490 }, { "epoch": 0.894890662424017, "grad_norm": 3.770844370864399, "learning_rate": 3.493068781422095e-06, "loss": 0.47213134765625, "step": 103495 }, { "epoch": 0.8949338959455604, "grad_norm": 4.00843591144031, "learning_rate": 3.4928678176731685e-06, "loss": 0.11311874389648438, "step": 103500 }, { "epoch": 0.8949771294671036, "grad_norm": 0.20500904981137313, "learning_rate": 3.4926668516511788e-06, "loss": 0.08944549560546874, "step": 103505 }, { "epoch": 0.8950203629886468, "grad_norm": 14.72228357523145, "learning_rate": 3.4924658833570536e-06, "loss": 0.14821701049804686, "step": 103510 }, { "epoch": 0.8950635965101902, "grad_norm": 1.2211728359639107, "learning_rate": 3.492264912791719e-06, "loss": 0.07129058837890626, "step": 103515 }, { "epoch": 0.8951068300317334, "grad_norm": 0.23687491853519352, "learning_rate": 3.4920639399561017e-06, "loss": 0.01675758361816406, "step": 103520 }, { "epoch": 0.8951500635532766, "grad_norm": 5.946692458732817, "learning_rate": 3.4918629648511295e-06, "loss": 0.012497329711914062, "step": 103525 }, { "epoch": 0.89519329707482, "grad_norm": 35.4525247155794, "learning_rate": 3.4916619874777285e-06, "loss": 0.441455078125, "step": 103530 }, { "epoch": 0.8952365305963632, "grad_norm": 4.819218873988789, "learning_rate": 3.4914610078368263e-06, "loss": 0.13603668212890624, "step": 103535 }, { "epoch": 0.8952797641179064, "grad_norm": 18.060005393446275, "learning_rate": 3.49126002592935e-06, "loss": 0.11382007598876953, "step": 103540 }, { "epoch": 0.8953229976394497, "grad_norm": 22.59330583530386, "learning_rate": 3.491059041756224e-06, "loss": 0.258807373046875, "step": 103545 }, { "epoch": 0.895366231160993, "grad_norm": 1.1453248170436425, "learning_rate": 3.490858055318378e-06, "loss": 0.08352813720703126, "step": 103550 }, { "epoch": 0.8954094646825362, "grad_norm": 3.464668275022272, "learning_rate": 3.490657066616738e-06, "loss": 0.06953125, "step": 103555 }, { "epoch": 0.8954526982040795, "grad_norm": 11.784947310309981, "learning_rate": 3.4904560756522318e-06, "loss": 0.0480708122253418, "step": 103560 }, { "epoch": 0.8954959317256228, "grad_norm": 1.4044459746751259, "learning_rate": 3.490255082425784e-06, "loss": 0.1874217987060547, "step": 103565 }, { "epoch": 0.895539165247166, "grad_norm": 22.467074084179252, "learning_rate": 3.4900540869383236e-06, "loss": 0.1608642578125, "step": 103570 }, { "epoch": 0.8955823987687093, "grad_norm": 3.6567984315851296, "learning_rate": 3.4898530891907775e-06, "loss": 0.165740966796875, "step": 103575 }, { "epoch": 0.8956256322902526, "grad_norm": 49.612190248301204, "learning_rate": 3.489652089184072e-06, "loss": 0.1035247802734375, "step": 103580 }, { "epoch": 0.8956688658117958, "grad_norm": 0.6177425959593527, "learning_rate": 3.4894510869191345e-06, "loss": 0.2250753402709961, "step": 103585 }, { "epoch": 0.8957120993333391, "grad_norm": 0.6405553150606731, "learning_rate": 3.4892500823968923e-06, "loss": 0.1411041259765625, "step": 103590 }, { "epoch": 0.8957553328548824, "grad_norm": 0.2560831598506516, "learning_rate": 3.489049075618271e-06, "loss": 0.14966926574707032, "step": 103595 }, { "epoch": 0.8957985663764256, "grad_norm": 16.94572679005635, "learning_rate": 3.4888480665841982e-06, "loss": 0.4137073516845703, "step": 103600 }, { "epoch": 0.8958417998979689, "grad_norm": 2.751135301739487, "learning_rate": 3.4886470552956027e-06, "loss": 0.0272003173828125, "step": 103605 }, { "epoch": 0.8958850334195122, "grad_norm": 13.77522769886376, "learning_rate": 3.4884460417534095e-06, "loss": 0.04090194702148438, "step": 103610 }, { "epoch": 0.8959282669410554, "grad_norm": 1.181298367636766, "learning_rate": 3.4882450259585466e-06, "loss": 0.007702255249023437, "step": 103615 }, { "epoch": 0.8959715004625987, "grad_norm": 1.5442008441059007, "learning_rate": 3.488044007911941e-06, "loss": 0.08826332092285157, "step": 103620 }, { "epoch": 0.896014733984142, "grad_norm": 27.55300819748928, "learning_rate": 3.4878429876145182e-06, "loss": 0.214208984375, "step": 103625 }, { "epoch": 0.8960579675056852, "grad_norm": 3.3118630573835794, "learning_rate": 3.4876419650672078e-06, "loss": 0.20397186279296875, "step": 103630 }, { "epoch": 0.8961012010272285, "grad_norm": 0.14731039921198055, "learning_rate": 3.487440940270935e-06, "loss": 0.3369232177734375, "step": 103635 }, { "epoch": 0.8961444345487717, "grad_norm": 2.3527530659670166, "learning_rate": 3.487239913226629e-06, "loss": 0.050031280517578124, "step": 103640 }, { "epoch": 0.896187668070315, "grad_norm": 18.665145550264214, "learning_rate": 3.487038883935214e-06, "loss": 0.46771240234375, "step": 103645 }, { "epoch": 0.8962309015918583, "grad_norm": 1.377376292003318, "learning_rate": 3.48683785239762e-06, "loss": 0.042940521240234376, "step": 103650 }, { "epoch": 0.8962741351134015, "grad_norm": 1.8090207186813647, "learning_rate": 3.4866368186147716e-06, "loss": 0.042799758911132815, "step": 103655 }, { "epoch": 0.8963173686349448, "grad_norm": 2.0156912334113026, "learning_rate": 3.4864357825875975e-06, "loss": 0.057234668731689455, "step": 103660 }, { "epoch": 0.896360602156488, "grad_norm": 2.949504273781094, "learning_rate": 3.4862347443170254e-06, "loss": 0.21556472778320312, "step": 103665 }, { "epoch": 0.8964038356780313, "grad_norm": 5.177242357219483, "learning_rate": 3.486033703803981e-06, "loss": 0.1433135986328125, "step": 103670 }, { "epoch": 0.8964470691995746, "grad_norm": 0.7364715730601366, "learning_rate": 3.485832661049392e-06, "loss": 0.1980438232421875, "step": 103675 }, { "epoch": 0.8964903027211178, "grad_norm": 40.01650039590277, "learning_rate": 3.4856316160541864e-06, "loss": 0.05094833374023437, "step": 103680 }, { "epoch": 0.8965335362426611, "grad_norm": 1.175738404307269, "learning_rate": 3.4854305688192897e-06, "loss": 0.33814849853515627, "step": 103685 }, { "epoch": 0.8965767697642044, "grad_norm": 0.8318773730151505, "learning_rate": 3.4852295193456307e-06, "loss": 0.10349960327148437, "step": 103690 }, { "epoch": 0.8966200032857476, "grad_norm": 4.895184856173584, "learning_rate": 3.485028467634136e-06, "loss": 0.13482666015625, "step": 103695 }, { "epoch": 0.8966632368072909, "grad_norm": 1.452581621541031, "learning_rate": 3.4848274136857328e-06, "loss": 0.11764984130859375, "step": 103700 }, { "epoch": 0.8967064703288342, "grad_norm": 2.223377619845525, "learning_rate": 3.484626357501348e-06, "loss": 0.0842315673828125, "step": 103705 }, { "epoch": 0.8967497038503774, "grad_norm": 12.51445393825231, "learning_rate": 3.4844252990819102e-06, "loss": 0.12413406372070312, "step": 103710 }, { "epoch": 0.8967929373719207, "grad_norm": 27.1833945362479, "learning_rate": 3.484224238428345e-06, "loss": 0.2737335205078125, "step": 103715 }, { "epoch": 0.8968361708934639, "grad_norm": 0.3141201582694486, "learning_rate": 3.4840231755415805e-06, "loss": 0.2541511535644531, "step": 103720 }, { "epoch": 0.8968794044150072, "grad_norm": 1.638708375888913, "learning_rate": 3.4838221104225445e-06, "loss": 0.1275177001953125, "step": 103725 }, { "epoch": 0.8969226379365505, "grad_norm": 47.611135198370725, "learning_rate": 3.4836210430721627e-06, "loss": 0.20169448852539062, "step": 103730 }, { "epoch": 0.8969658714580937, "grad_norm": 2.821281365853871, "learning_rate": 3.483419973491364e-06, "loss": 0.108734130859375, "step": 103735 }, { "epoch": 0.897009104979637, "grad_norm": 25.55800069021372, "learning_rate": 3.4832189016810753e-06, "loss": 0.2087890625, "step": 103740 }, { "epoch": 0.8970523385011803, "grad_norm": 33.43449916603235, "learning_rate": 3.483017827642223e-06, "loss": 0.097613525390625, "step": 103745 }, { "epoch": 0.8970955720227235, "grad_norm": 1.8947851897602528, "learning_rate": 3.4828167513757356e-06, "loss": 0.2750244140625, "step": 103750 }, { "epoch": 0.8971388055442668, "grad_norm": 4.572564443423334, "learning_rate": 3.4826156728825393e-06, "loss": 0.09649810791015626, "step": 103755 }, { "epoch": 0.8971820390658101, "grad_norm": 19.532363222935043, "learning_rate": 3.4824145921635636e-06, "loss": 0.5132080078125, "step": 103760 }, { "epoch": 0.8972252725873533, "grad_norm": 1.3594899197796713, "learning_rate": 3.482213509219734e-06, "loss": 0.11216278076171875, "step": 103765 }, { "epoch": 0.8972685061088966, "grad_norm": 10.692346291293571, "learning_rate": 3.482012424051977e-06, "loss": 0.1183868408203125, "step": 103770 }, { "epoch": 0.8973117396304399, "grad_norm": 4.090976162492265, "learning_rate": 3.481811336661222e-06, "loss": 0.18288536071777345, "step": 103775 }, { "epoch": 0.8973549731519831, "grad_norm": 36.97445580292768, "learning_rate": 3.4816102470483968e-06, "loss": 0.11964645385742187, "step": 103780 }, { "epoch": 0.8973982066735264, "grad_norm": 16.152824032394484, "learning_rate": 3.481409155214426e-06, "loss": 0.3655364990234375, "step": 103785 }, { "epoch": 0.8974414401950697, "grad_norm": 6.003543512279005, "learning_rate": 3.4812080611602397e-06, "loss": 0.1309659004211426, "step": 103790 }, { "epoch": 0.8974846737166129, "grad_norm": 0.6115713477975221, "learning_rate": 3.481006964886764e-06, "loss": 0.07870769500732422, "step": 103795 }, { "epoch": 0.8975279072381562, "grad_norm": 1.8153642443700648, "learning_rate": 3.4808058663949262e-06, "loss": 0.0469573974609375, "step": 103800 }, { "epoch": 0.8975711407596995, "grad_norm": 3.010386503257527, "learning_rate": 3.480604765685656e-06, "loss": 0.624072265625, "step": 103805 }, { "epoch": 0.8976143742812427, "grad_norm": 7.263049578955417, "learning_rate": 3.480403662759878e-06, "loss": 0.15974655151367187, "step": 103810 }, { "epoch": 0.8976576078027859, "grad_norm": 4.7973270148519935, "learning_rate": 3.480202557618521e-06, "loss": 0.16694049835205077, "step": 103815 }, { "epoch": 0.8977008413243293, "grad_norm": 4.586854970184069, "learning_rate": 3.4800014502625113e-06, "loss": 0.09929275512695312, "step": 103820 }, { "epoch": 0.8977440748458725, "grad_norm": 51.40652914818979, "learning_rate": 3.4798003406927777e-06, "loss": 0.3766014099121094, "step": 103825 }, { "epoch": 0.8977873083674157, "grad_norm": 3.606477320132878, "learning_rate": 3.4795992289102477e-06, "loss": 0.4503753662109375, "step": 103830 }, { "epoch": 0.897830541888959, "grad_norm": 0.11925612264252969, "learning_rate": 3.4793981149158483e-06, "loss": 0.10757942199707031, "step": 103835 }, { "epoch": 0.8978737754105023, "grad_norm": 1.6878350397473727, "learning_rate": 3.4791969987105077e-06, "loss": 0.0372894287109375, "step": 103840 }, { "epoch": 0.8979170089320455, "grad_norm": 0.28808510507914376, "learning_rate": 3.478995880295153e-06, "loss": 0.1904937744140625, "step": 103845 }, { "epoch": 0.8979602424535889, "grad_norm": 10.803660132716006, "learning_rate": 3.47879475967071e-06, "loss": 0.1674407958984375, "step": 103850 }, { "epoch": 0.8980034759751321, "grad_norm": 0.21615179579350433, "learning_rate": 3.4785936368381084e-06, "loss": 0.23646774291992187, "step": 103855 }, { "epoch": 0.8980467094966753, "grad_norm": 2.874275529348038, "learning_rate": 3.478392511798276e-06, "loss": 0.055487060546875, "step": 103860 }, { "epoch": 0.8980899430182187, "grad_norm": 1.6870554205175947, "learning_rate": 3.478191384552139e-06, "loss": 0.15437088012695313, "step": 103865 }, { "epoch": 0.8981331765397619, "grad_norm": 5.473822703530698, "learning_rate": 3.4779902551006266e-06, "loss": 0.1915752410888672, "step": 103870 }, { "epoch": 0.8981764100613051, "grad_norm": 8.406794727411082, "learning_rate": 3.477789123444665e-06, "loss": 0.269049072265625, "step": 103875 }, { "epoch": 0.8982196435828484, "grad_norm": 2.1440077246396836, "learning_rate": 3.4775879895851815e-06, "loss": 0.030515289306640624, "step": 103880 }, { "epoch": 0.8982628771043917, "grad_norm": 2.1480778899619324, "learning_rate": 3.4773868535231043e-06, "loss": 0.2548057556152344, "step": 103885 }, { "epoch": 0.8983061106259349, "grad_norm": 23.828136950067574, "learning_rate": 3.477185715259362e-06, "loss": 0.342694091796875, "step": 103890 }, { "epoch": 0.8983493441474781, "grad_norm": 22.956846930344977, "learning_rate": 3.4769845747948808e-06, "loss": 0.19602432250976562, "step": 103895 }, { "epoch": 0.8983925776690215, "grad_norm": 12.089485083519165, "learning_rate": 3.476783432130589e-06, "loss": 0.12049942016601563, "step": 103900 }, { "epoch": 0.8984358111905647, "grad_norm": 0.4260290337945546, "learning_rate": 3.4765822872674143e-06, "loss": 0.12990226745605468, "step": 103905 }, { "epoch": 0.8984790447121079, "grad_norm": 7.115996112299003, "learning_rate": 3.4763811402062835e-06, "loss": 0.05236968994140625, "step": 103910 }, { "epoch": 0.8985222782336513, "grad_norm": 15.263080198460337, "learning_rate": 3.4761799909481254e-06, "loss": 0.10960540771484376, "step": 103915 }, { "epoch": 0.8985655117551945, "grad_norm": 0.5529597282824495, "learning_rate": 3.4759788394938675e-06, "loss": 0.013987922668457031, "step": 103920 }, { "epoch": 0.8986087452767377, "grad_norm": 7.939497008166319, "learning_rate": 3.4757776858444377e-06, "loss": 0.1364501953125, "step": 103925 }, { "epoch": 0.8986519787982811, "grad_norm": 2.1352317887870593, "learning_rate": 3.4755765300007623e-06, "loss": 0.23687515258789063, "step": 103930 }, { "epoch": 0.8986952123198243, "grad_norm": 6.879062480589737, "learning_rate": 3.4753753719637705e-06, "loss": 0.06307373046875, "step": 103935 }, { "epoch": 0.8987384458413675, "grad_norm": 8.206331334073424, "learning_rate": 3.4751742117343894e-06, "loss": 0.344677734375, "step": 103940 }, { "epoch": 0.8987816793629109, "grad_norm": 1.1813973245750369, "learning_rate": 3.4749730493135463e-06, "loss": 0.04578399658203125, "step": 103945 }, { "epoch": 0.8988249128844541, "grad_norm": 8.749572687630135, "learning_rate": 3.47477188470217e-06, "loss": 0.045542144775390626, "step": 103950 }, { "epoch": 0.8988681464059973, "grad_norm": 1.5238416084278987, "learning_rate": 3.474570717901188e-06, "loss": 0.1097076416015625, "step": 103955 }, { "epoch": 0.8989113799275407, "grad_norm": 0.1847737220103169, "learning_rate": 3.474369548911527e-06, "loss": 0.142584228515625, "step": 103960 }, { "epoch": 0.8989546134490839, "grad_norm": 9.044837243562542, "learning_rate": 3.4741683777341168e-06, "loss": 0.03407211303710937, "step": 103965 }, { "epoch": 0.8989978469706271, "grad_norm": 0.03251270454432755, "learning_rate": 3.473967204369882e-06, "loss": 0.2637022018432617, "step": 103970 }, { "epoch": 0.8990410804921705, "grad_norm": 2.2670192499188566, "learning_rate": 3.4737660288197535e-06, "loss": 0.09003715515136719, "step": 103975 }, { "epoch": 0.8990843140137137, "grad_norm": 3.5976238525088795, "learning_rate": 3.473564851084658e-06, "loss": 0.0798675537109375, "step": 103980 }, { "epoch": 0.8991275475352569, "grad_norm": 16.591025552698415, "learning_rate": 3.473363671165524e-06, "loss": 0.10345802307128907, "step": 103985 }, { "epoch": 0.8991707810568002, "grad_norm": 3.6089101343782715, "learning_rate": 3.473162489063278e-06, "loss": 0.0293365478515625, "step": 103990 }, { "epoch": 0.8992140145783435, "grad_norm": 0.6898470111018261, "learning_rate": 3.4729613047788475e-06, "loss": 0.09939727783203126, "step": 103995 }, { "epoch": 0.8992572480998867, "grad_norm": 2.451728160211166, "learning_rate": 3.4727601183131616e-06, "loss": 0.1275970458984375, "step": 104000 }, { "epoch": 0.89930048162143, "grad_norm": 26.449485360850453, "learning_rate": 3.4725589296671486e-06, "loss": 0.43281917572021483, "step": 104005 }, { "epoch": 0.8993437151429733, "grad_norm": 6.686735887077024, "learning_rate": 3.4723577388417345e-06, "loss": 0.23984909057617188, "step": 104010 }, { "epoch": 0.8993869486645165, "grad_norm": 3.0637724010427787, "learning_rate": 3.4721565458378496e-06, "loss": 0.08150787353515625, "step": 104015 }, { "epoch": 0.8994301821860597, "grad_norm": 1.90445962902067, "learning_rate": 3.471955350656419e-06, "loss": 0.20167236328125, "step": 104020 }, { "epoch": 0.8994734157076031, "grad_norm": 10.38738726498998, "learning_rate": 3.4717541532983724e-06, "loss": 0.0369140625, "step": 104025 }, { "epoch": 0.8995166492291463, "grad_norm": 1.4042883786757594, "learning_rate": 3.471552953764638e-06, "loss": 0.061658477783203124, "step": 104030 }, { "epoch": 0.8995598827506895, "grad_norm": 6.861751723919351, "learning_rate": 3.471351752056143e-06, "loss": 0.15644683837890624, "step": 104035 }, { "epoch": 0.8996031162722329, "grad_norm": 0.6264622992976642, "learning_rate": 3.471150548173814e-06, "loss": 0.05292778015136719, "step": 104040 }, { "epoch": 0.8996463497937761, "grad_norm": 0.07101469287611252, "learning_rate": 3.470949342118582e-06, "loss": 0.08651542663574219, "step": 104045 }, { "epoch": 0.8996895833153193, "grad_norm": 1.097810959014648, "learning_rate": 3.4707481338913722e-06, "loss": 0.20378494262695312, "step": 104050 }, { "epoch": 0.8997328168368627, "grad_norm": 26.804179260145098, "learning_rate": 3.470546923493114e-06, "loss": 0.18716773986816407, "step": 104055 }, { "epoch": 0.8997760503584059, "grad_norm": 5.769543330090067, "learning_rate": 3.470345710924735e-06, "loss": 0.035171890258789064, "step": 104060 }, { "epoch": 0.8998192838799491, "grad_norm": 0.887557561767532, "learning_rate": 3.470144496187163e-06, "loss": 0.15785751342773438, "step": 104065 }, { "epoch": 0.8998625174014924, "grad_norm": 4.765571543677817, "learning_rate": 3.469943279281327e-06, "loss": 0.0606689453125, "step": 104070 }, { "epoch": 0.8999057509230357, "grad_norm": 0.3221648839184494, "learning_rate": 3.4697420602081525e-06, "loss": 0.11696701049804688, "step": 104075 }, { "epoch": 0.8999489844445789, "grad_norm": 2.8515724938614317, "learning_rate": 3.46954083896857e-06, "loss": 0.059228134155273435, "step": 104080 }, { "epoch": 0.8999922179661222, "grad_norm": 28.703915605320816, "learning_rate": 3.4693396155635067e-06, "loss": 0.114691162109375, "step": 104085 }, { "epoch": 0.9000354514876655, "grad_norm": 0.42189667796432256, "learning_rate": 3.46913838999389e-06, "loss": 0.247802734375, "step": 104090 }, { "epoch": 0.9000786850092087, "grad_norm": 5.041273737855924, "learning_rate": 3.4689371622606497e-06, "loss": 0.25765018463134765, "step": 104095 }, { "epoch": 0.900121918530752, "grad_norm": 46.0131171700872, "learning_rate": 3.468735932364712e-06, "loss": 0.36341209411621095, "step": 104100 }, { "epoch": 0.9001651520522953, "grad_norm": 9.254338877734659, "learning_rate": 3.468534700307005e-06, "loss": 0.08892860412597656, "step": 104105 }, { "epoch": 0.9002083855738385, "grad_norm": 18.406939639614095, "learning_rate": 3.4683334660884577e-06, "loss": 0.13761062622070314, "step": 104110 }, { "epoch": 0.9002516190953818, "grad_norm": 8.03109620724176, "learning_rate": 3.468132229709998e-06, "loss": 0.3822177886962891, "step": 104115 }, { "epoch": 0.9002948526169251, "grad_norm": 2.2656178631011796, "learning_rate": 3.4679309911725535e-06, "loss": 0.064825439453125, "step": 104120 }, { "epoch": 0.9003380861384683, "grad_norm": 15.483290268366321, "learning_rate": 3.4677297504770536e-06, "loss": 0.042813873291015624, "step": 104125 }, { "epoch": 0.9003813196600116, "grad_norm": 2.8684732014146332, "learning_rate": 3.467528507624425e-06, "loss": 0.0476043701171875, "step": 104130 }, { "epoch": 0.9004245531815549, "grad_norm": 1.3903632319536814, "learning_rate": 3.467327262615596e-06, "loss": 0.10119285583496093, "step": 104135 }, { "epoch": 0.9004677867030981, "grad_norm": 7.368376950258173, "learning_rate": 3.4671260154514942e-06, "loss": 0.09224395751953125, "step": 104140 }, { "epoch": 0.9005110202246414, "grad_norm": 2.170092101259056, "learning_rate": 3.4669247661330494e-06, "loss": 0.13197174072265624, "step": 104145 }, { "epoch": 0.9005542537461847, "grad_norm": 1.2012787732721273, "learning_rate": 3.466723514661189e-06, "loss": 0.41121711730957033, "step": 104150 }, { "epoch": 0.9005974872677279, "grad_norm": 29.55607298956508, "learning_rate": 3.4665222610368404e-06, "loss": 0.1542278289794922, "step": 104155 }, { "epoch": 0.9006407207892712, "grad_norm": 0.06156239055973473, "learning_rate": 3.466321005260933e-06, "loss": 0.05218048095703125, "step": 104160 }, { "epoch": 0.9006839543108144, "grad_norm": 0.17299649576696274, "learning_rate": 3.4661197473343937e-06, "loss": 0.052294921875, "step": 104165 }, { "epoch": 0.9007271878323577, "grad_norm": 3.850422781971357, "learning_rate": 3.465918487258151e-06, "loss": 0.06082763671875, "step": 104170 }, { "epoch": 0.900770421353901, "grad_norm": 0.14559505425871366, "learning_rate": 3.4657172250331345e-06, "loss": 0.24199209213256836, "step": 104175 }, { "epoch": 0.9008136548754442, "grad_norm": 0.19561663374670554, "learning_rate": 3.465515960660271e-06, "loss": 0.17946968078613282, "step": 104180 }, { "epoch": 0.9008568883969875, "grad_norm": 53.591814052209415, "learning_rate": 3.4653146941404887e-06, "loss": 0.28709716796875, "step": 104185 }, { "epoch": 0.9009001219185307, "grad_norm": 31.78493788028496, "learning_rate": 3.465113425474716e-06, "loss": 0.16409454345703126, "step": 104190 }, { "epoch": 0.900943355440074, "grad_norm": 3.515862787387612, "learning_rate": 3.4649121546638814e-06, "loss": 0.28395843505859375, "step": 104195 }, { "epoch": 0.9009865889616173, "grad_norm": 14.597839962512698, "learning_rate": 3.464710881708913e-06, "loss": 0.12530670166015626, "step": 104200 }, { "epoch": 0.9010298224831605, "grad_norm": 25.336133993037464, "learning_rate": 3.464509606610739e-06, "loss": 0.16867599487304688, "step": 104205 }, { "epoch": 0.9010730560047038, "grad_norm": 1.878469638439174, "learning_rate": 3.4643083293702886e-06, "loss": 0.05891075134277344, "step": 104210 }, { "epoch": 0.9011162895262471, "grad_norm": 3.3621834846568714, "learning_rate": 3.4641070499884888e-06, "loss": 0.08643035888671875, "step": 104215 }, { "epoch": 0.9011595230477903, "grad_norm": 35.63321170846054, "learning_rate": 3.4639057684662677e-06, "loss": 0.26235160827636717, "step": 104220 }, { "epoch": 0.9012027565693336, "grad_norm": 0.9583760280569846, "learning_rate": 3.463704484804555e-06, "loss": 0.05033416748046875, "step": 104225 }, { "epoch": 0.9012459900908769, "grad_norm": 16.16173023159185, "learning_rate": 3.4635031990042765e-06, "loss": 0.2623077392578125, "step": 104230 }, { "epoch": 0.9012892236124201, "grad_norm": 4.521866777670958, "learning_rate": 3.4633019110663644e-06, "loss": 0.11402587890625, "step": 104235 }, { "epoch": 0.9013324571339634, "grad_norm": 6.221092667258027, "learning_rate": 3.463100620991744e-06, "loss": 0.2307891845703125, "step": 104240 }, { "epoch": 0.9013756906555066, "grad_norm": 15.301019627570922, "learning_rate": 3.462899328781344e-06, "loss": 0.13306961059570313, "step": 104245 }, { "epoch": 0.9014189241770499, "grad_norm": 0.6813746813021511, "learning_rate": 3.462698034436093e-06, "loss": 0.0404449462890625, "step": 104250 }, { "epoch": 0.9014621576985932, "grad_norm": 10.978699206708312, "learning_rate": 3.462496737956921e-06, "loss": 0.17384281158447265, "step": 104255 }, { "epoch": 0.9015053912201364, "grad_norm": 3.3702084040608136, "learning_rate": 3.462295439344754e-06, "loss": 0.13664016723632813, "step": 104260 }, { "epoch": 0.9015486247416797, "grad_norm": 1.4807623437253423, "learning_rate": 3.4620941386005205e-06, "loss": 0.27919349670410154, "step": 104265 }, { "epoch": 0.901591858263223, "grad_norm": 0.5983929607784854, "learning_rate": 3.461892835725151e-06, "loss": 0.04026527404785156, "step": 104270 }, { "epoch": 0.9016350917847662, "grad_norm": 28.137705667326955, "learning_rate": 3.4616915307195717e-06, "loss": 0.30230045318603516, "step": 104275 }, { "epoch": 0.9016783253063095, "grad_norm": 4.634550189690072, "learning_rate": 3.461490223584712e-06, "loss": 0.11920051574707032, "step": 104280 }, { "epoch": 0.9017215588278528, "grad_norm": 0.47149530450964733, "learning_rate": 3.4612889143215003e-06, "loss": 0.01572113037109375, "step": 104285 }, { "epoch": 0.901764792349396, "grad_norm": 1.7225879140546518, "learning_rate": 3.4610876029308654e-06, "loss": 0.08541259765625, "step": 104290 }, { "epoch": 0.9018080258709393, "grad_norm": 75.89298086151125, "learning_rate": 3.4608862894137344e-06, "loss": 0.4042484283447266, "step": 104295 }, { "epoch": 0.9018512593924826, "grad_norm": 27.41098999093479, "learning_rate": 3.460684973771037e-06, "loss": 0.21476287841796876, "step": 104300 }, { "epoch": 0.9018944929140258, "grad_norm": 0.559935553271372, "learning_rate": 3.460483656003701e-06, "loss": 0.2084259033203125, "step": 104305 }, { "epoch": 0.9019377264355691, "grad_norm": 12.51574031530131, "learning_rate": 3.4602823361126546e-06, "loss": 0.07419891357421875, "step": 104310 }, { "epoch": 0.9019809599571124, "grad_norm": 11.344867282364437, "learning_rate": 3.4600810140988276e-06, "loss": 0.2612476348876953, "step": 104315 }, { "epoch": 0.9020241934786556, "grad_norm": 2.0173505798842446, "learning_rate": 3.4598796899631476e-06, "loss": 0.06015968322753906, "step": 104320 }, { "epoch": 0.9020674270001989, "grad_norm": 15.41141763949605, "learning_rate": 3.4596783637065424e-06, "loss": 0.1333831787109375, "step": 104325 }, { "epoch": 0.9021106605217422, "grad_norm": 4.559748134844184, "learning_rate": 3.4594770353299417e-06, "loss": 0.03323974609375, "step": 104330 }, { "epoch": 0.9021538940432854, "grad_norm": 45.00413093969009, "learning_rate": 3.4592757048342737e-06, "loss": 0.31325569152832033, "step": 104335 }, { "epoch": 0.9021971275648286, "grad_norm": 18.651545334501865, "learning_rate": 3.4590743722204664e-06, "loss": 0.2441089630126953, "step": 104340 }, { "epoch": 0.902240361086372, "grad_norm": 3.384532680478749, "learning_rate": 3.4588730374894486e-06, "loss": 0.03728065490722656, "step": 104345 }, { "epoch": 0.9022835946079152, "grad_norm": 0.12971251652472685, "learning_rate": 3.458671700642149e-06, "loss": 0.015087127685546875, "step": 104350 }, { "epoch": 0.9023268281294584, "grad_norm": 14.443159847808243, "learning_rate": 3.4584703616794966e-06, "loss": 0.05201263427734375, "step": 104355 }, { "epoch": 0.9023700616510018, "grad_norm": 28.062279352716782, "learning_rate": 3.458269020602418e-06, "loss": 0.21886749267578126, "step": 104360 }, { "epoch": 0.902413295172545, "grad_norm": 2.6402588285719077, "learning_rate": 3.4580676774118443e-06, "loss": 0.1769012451171875, "step": 104365 }, { "epoch": 0.9024565286940882, "grad_norm": 0.8206165127909654, "learning_rate": 3.457866332108703e-06, "loss": 0.066131591796875, "step": 104370 }, { "epoch": 0.9024997622156316, "grad_norm": 1.1946551350540677, "learning_rate": 3.4576649846939216e-06, "loss": 0.29190216064453123, "step": 104375 }, { "epoch": 0.9025429957371748, "grad_norm": 5.204836061230079, "learning_rate": 3.457463635168431e-06, "loss": 0.30193405151367186, "step": 104380 }, { "epoch": 0.902586229258718, "grad_norm": 0.5558059713871898, "learning_rate": 3.457262283533158e-06, "loss": 0.0277069091796875, "step": 104385 }, { "epoch": 0.9026294627802613, "grad_norm": 3.47200775237443, "learning_rate": 3.457060929789032e-06, "loss": 0.16081771850585938, "step": 104390 }, { "epoch": 0.9026726963018046, "grad_norm": 14.39351194781821, "learning_rate": 3.4568595739369807e-06, "loss": 0.26703720092773436, "step": 104395 }, { "epoch": 0.9027159298233478, "grad_norm": 4.259108893217989, "learning_rate": 3.4566582159779343e-06, "loss": 0.0673583984375, "step": 104400 }, { "epoch": 0.9027591633448911, "grad_norm": 2.292492850157927, "learning_rate": 3.4564568559128196e-06, "loss": 0.21649322509765626, "step": 104405 }, { "epoch": 0.9028023968664344, "grad_norm": 1.461104357550842, "learning_rate": 3.456255493742567e-06, "loss": 0.155718994140625, "step": 104410 }, { "epoch": 0.9028456303879776, "grad_norm": 7.077948031135885, "learning_rate": 3.4560541294681036e-06, "loss": 0.14272651672363282, "step": 104415 }, { "epoch": 0.9028888639095208, "grad_norm": 2.048293309379691, "learning_rate": 3.4558527630903594e-06, "loss": 0.509808349609375, "step": 104420 }, { "epoch": 0.9029320974310642, "grad_norm": 16.76696817513201, "learning_rate": 3.455651394610262e-06, "loss": 0.19185333251953124, "step": 104425 }, { "epoch": 0.9029753309526074, "grad_norm": 2.5476661507987726, "learning_rate": 3.4554500240287413e-06, "loss": 0.23417434692382813, "step": 104430 }, { "epoch": 0.9030185644741506, "grad_norm": 0.9407343213059856, "learning_rate": 3.4552486513467257e-06, "loss": 0.02281646728515625, "step": 104435 }, { "epoch": 0.903061797995694, "grad_norm": 1.5474253992666525, "learning_rate": 3.4550472765651424e-06, "loss": 0.03500633239746094, "step": 104440 }, { "epoch": 0.9031050315172372, "grad_norm": 4.763762648979937, "learning_rate": 3.4548458996849215e-06, "loss": 0.03906402587890625, "step": 104445 }, { "epoch": 0.9031482650387804, "grad_norm": 0.04137159503291364, "learning_rate": 3.454644520706992e-06, "loss": 0.1464252471923828, "step": 104450 }, { "epoch": 0.9031914985603238, "grad_norm": 1.0620652204206422, "learning_rate": 3.454443139632282e-06, "loss": 0.01495208740234375, "step": 104455 }, { "epoch": 0.903234732081867, "grad_norm": 19.316988489170292, "learning_rate": 3.4542417564617212e-06, "loss": 0.09264965057373047, "step": 104460 }, { "epoch": 0.9032779656034102, "grad_norm": 6.5602820891916265, "learning_rate": 3.4540403711962367e-06, "loss": 0.05631999969482422, "step": 104465 }, { "epoch": 0.9033211991249536, "grad_norm": 1.2680574139287109, "learning_rate": 3.4538389838367575e-06, "loss": 0.104302978515625, "step": 104470 }, { "epoch": 0.9033644326464968, "grad_norm": 20.197075329092957, "learning_rate": 3.4536375943842138e-06, "loss": 0.07909660339355469, "step": 104475 }, { "epoch": 0.90340766616804, "grad_norm": 17.973961592021382, "learning_rate": 3.453436202839534e-06, "loss": 0.1205322265625, "step": 104480 }, { "epoch": 0.9034508996895834, "grad_norm": 10.363392974834047, "learning_rate": 3.453234809203645e-06, "loss": 0.278216552734375, "step": 104485 }, { "epoch": 0.9034941332111266, "grad_norm": 0.03279908671903103, "learning_rate": 3.453033413477478e-06, "loss": 0.22412281036376952, "step": 104490 }, { "epoch": 0.9035373667326698, "grad_norm": 0.3573328530935297, "learning_rate": 3.4528320156619617e-06, "loss": 0.02917022705078125, "step": 104495 }, { "epoch": 0.9035806002542132, "grad_norm": 8.36369604279291, "learning_rate": 3.4526306157580237e-06, "loss": 0.0415130615234375, "step": 104500 }, { "epoch": 0.9036238337757564, "grad_norm": 18.904485759886775, "learning_rate": 3.452429213766592e-06, "loss": 0.12343368530273438, "step": 104505 }, { "epoch": 0.9036670672972996, "grad_norm": 6.275605414529274, "learning_rate": 3.452227809688598e-06, "loss": 0.21057357788085937, "step": 104510 }, { "epoch": 0.9037103008188428, "grad_norm": 0.07929170025165758, "learning_rate": 3.45202640352497e-06, "loss": 0.28183326721191404, "step": 104515 }, { "epoch": 0.9037535343403862, "grad_norm": 4.481604108177341, "learning_rate": 3.451824995276635e-06, "loss": 0.192437744140625, "step": 104520 }, { "epoch": 0.9037967678619294, "grad_norm": 27.930084556263374, "learning_rate": 3.4516235849445236e-06, "loss": 0.21934814453125, "step": 104525 }, { "epoch": 0.9038400013834726, "grad_norm": 1.5834496413369927, "learning_rate": 3.4514221725295634e-06, "loss": 0.059679412841796876, "step": 104530 }, { "epoch": 0.903883234905016, "grad_norm": 57.16750728181045, "learning_rate": 3.4512207580326843e-06, "loss": 0.3047187805175781, "step": 104535 }, { "epoch": 0.9039264684265592, "grad_norm": 7.131233311167843, "learning_rate": 3.4510193414548154e-06, "loss": 0.1490020751953125, "step": 104540 }, { "epoch": 0.9039697019481024, "grad_norm": 12.841401769737159, "learning_rate": 3.450817922796885e-06, "loss": 0.26633453369140625, "step": 104545 }, { "epoch": 0.9040129354696458, "grad_norm": 12.494715426007842, "learning_rate": 3.4506165020598217e-06, "loss": 0.1315408706665039, "step": 104550 }, { "epoch": 0.904056168991189, "grad_norm": 0.85075827984187, "learning_rate": 3.4504150792445556e-06, "loss": 0.17982177734375, "step": 104555 }, { "epoch": 0.9040994025127322, "grad_norm": 12.600134256479425, "learning_rate": 3.4502136543520144e-06, "loss": 0.055192184448242185, "step": 104560 }, { "epoch": 0.9041426360342756, "grad_norm": 5.75518149334325, "learning_rate": 3.450012227383127e-06, "loss": 0.1322784423828125, "step": 104565 }, { "epoch": 0.9041858695558188, "grad_norm": 23.914601131875816, "learning_rate": 3.449810798338824e-06, "loss": 0.27361373901367186, "step": 104570 }, { "epoch": 0.904229103077362, "grad_norm": 2.112472274279052, "learning_rate": 3.4496093672200343e-06, "loss": 0.2230560302734375, "step": 104575 }, { "epoch": 0.9042723365989054, "grad_norm": 11.024843970599264, "learning_rate": 3.449407934027684e-06, "loss": 0.0894134521484375, "step": 104580 }, { "epoch": 0.9043155701204486, "grad_norm": 2.814405792333337, "learning_rate": 3.4492064987627053e-06, "loss": 0.01772003173828125, "step": 104585 }, { "epoch": 0.9043588036419918, "grad_norm": 15.359942397996818, "learning_rate": 3.449005061426025e-06, "loss": 0.20720138549804687, "step": 104590 }, { "epoch": 0.9044020371635351, "grad_norm": 2.362875885391953, "learning_rate": 3.448803622018574e-06, "loss": 0.169464111328125, "step": 104595 }, { "epoch": 0.9044452706850784, "grad_norm": 17.79156329004523, "learning_rate": 3.448602180541279e-06, "loss": 0.312353515625, "step": 104600 }, { "epoch": 0.9044885042066216, "grad_norm": 41.10139364830875, "learning_rate": 3.4484007369950714e-06, "loss": 0.20072174072265625, "step": 104605 }, { "epoch": 0.9045317377281649, "grad_norm": 31.04519333967451, "learning_rate": 3.4481992913808793e-06, "loss": 0.20811767578125, "step": 104610 }, { "epoch": 0.9045749712497082, "grad_norm": 1.1922264988871234, "learning_rate": 3.4479978436996303e-06, "loss": 0.1811356544494629, "step": 104615 }, { "epoch": 0.9046182047712514, "grad_norm": 6.507860737922265, "learning_rate": 3.4477963939522562e-06, "loss": 0.04930686950683594, "step": 104620 }, { "epoch": 0.9046614382927947, "grad_norm": 21.425036434075352, "learning_rate": 3.447594942139685e-06, "loss": 0.13798904418945312, "step": 104625 }, { "epoch": 0.904704671814338, "grad_norm": 7.604539498019669, "learning_rate": 3.447393488262845e-06, "loss": 0.4070770263671875, "step": 104630 }, { "epoch": 0.9047479053358812, "grad_norm": 2.0990211290138276, "learning_rate": 3.4471920323226655e-06, "loss": 0.129888916015625, "step": 104635 }, { "epoch": 0.9047911388574245, "grad_norm": 6.770125298319245, "learning_rate": 3.4469905743200756e-06, "loss": 0.19438705444335938, "step": 104640 }, { "epoch": 0.9048343723789678, "grad_norm": 2.2478296704145504, "learning_rate": 3.4467891142560046e-06, "loss": 0.1140380859375, "step": 104645 }, { "epoch": 0.904877605900511, "grad_norm": 35.46417749301363, "learning_rate": 3.4465876521313827e-06, "loss": 0.14818267822265624, "step": 104650 }, { "epoch": 0.9049208394220543, "grad_norm": 15.407077195945881, "learning_rate": 3.4463861879471375e-06, "loss": 0.19990615844726561, "step": 104655 }, { "epoch": 0.9049640729435976, "grad_norm": 5.12762740433358, "learning_rate": 3.4461847217041983e-06, "loss": 0.07407379150390625, "step": 104660 }, { "epoch": 0.9050073064651408, "grad_norm": 0.7782221771857767, "learning_rate": 3.4459832534034946e-06, "loss": 0.07113304138183593, "step": 104665 }, { "epoch": 0.905050539986684, "grad_norm": 1.1143359037422171, "learning_rate": 3.4457817830459556e-06, "loss": 0.4330940246582031, "step": 104670 }, { "epoch": 0.9050937735082273, "grad_norm": 1.7952207002807838, "learning_rate": 3.4455803106325113e-06, "loss": 0.17471466064453126, "step": 104675 }, { "epoch": 0.9051370070297706, "grad_norm": 0.24679226775531055, "learning_rate": 3.4453788361640893e-06, "loss": 0.0379608154296875, "step": 104680 }, { "epoch": 0.9051802405513139, "grad_norm": 13.177484688715504, "learning_rate": 3.4451773596416196e-06, "loss": 0.12576522827148437, "step": 104685 }, { "epoch": 0.9052234740728571, "grad_norm": 30.08524505627902, "learning_rate": 3.444975881066032e-06, "loss": 0.2363372802734375, "step": 104690 }, { "epoch": 0.9052667075944004, "grad_norm": 1.3736036403173335, "learning_rate": 3.4447744004382535e-06, "loss": 0.053794479370117186, "step": 104695 }, { "epoch": 0.9053099411159437, "grad_norm": 4.52817334393793, "learning_rate": 3.4445729177592146e-06, "loss": 0.26718902587890625, "step": 104700 }, { "epoch": 0.9053531746374869, "grad_norm": 1.112257981262415, "learning_rate": 3.4443714330298457e-06, "loss": 0.042066001892089845, "step": 104705 }, { "epoch": 0.9053964081590302, "grad_norm": 0.15400702547746095, "learning_rate": 3.444169946251074e-06, "loss": 0.061215972900390624, "step": 104710 }, { "epoch": 0.9054396416805734, "grad_norm": 3.427537066781378, "learning_rate": 3.4439684574238314e-06, "loss": 0.06654586791992187, "step": 104715 }, { "epoch": 0.9054828752021167, "grad_norm": 22.64134860565184, "learning_rate": 3.4437669665490444e-06, "loss": 0.3724372863769531, "step": 104720 }, { "epoch": 0.90552610872366, "grad_norm": 3.940175995396679, "learning_rate": 3.4435654736276433e-06, "loss": 0.062200927734375, "step": 104725 }, { "epoch": 0.9055693422452032, "grad_norm": 2.6362487670146626, "learning_rate": 3.4433639786605573e-06, "loss": 0.0898406982421875, "step": 104730 }, { "epoch": 0.9056125757667465, "grad_norm": 4.35293506410459, "learning_rate": 3.4431624816487168e-06, "loss": 0.050055313110351565, "step": 104735 }, { "epoch": 0.9056558092882898, "grad_norm": 38.630439906920685, "learning_rate": 3.4429609825930493e-06, "loss": 0.2893280029296875, "step": 104740 }, { "epoch": 0.905699042809833, "grad_norm": 5.0299000961158455, "learning_rate": 3.4427594814944853e-06, "loss": 0.08162345886230468, "step": 104745 }, { "epoch": 0.9057422763313763, "grad_norm": 18.170653811492855, "learning_rate": 3.442557978353954e-06, "loss": 0.1120361328125, "step": 104750 }, { "epoch": 0.9057855098529196, "grad_norm": 8.374369325835465, "learning_rate": 3.4423564731723833e-06, "loss": 0.06364021301269532, "step": 104755 }, { "epoch": 0.9058287433744628, "grad_norm": 1.7647584631892346, "learning_rate": 3.4421549659507045e-06, "loss": 0.08468360900878906, "step": 104760 }, { "epoch": 0.9058719768960061, "grad_norm": 16.383069199946796, "learning_rate": 3.441953456689845e-06, "loss": 0.3978607177734375, "step": 104765 }, { "epoch": 0.9059152104175493, "grad_norm": 24.62076986060565, "learning_rate": 3.441751945390737e-06, "loss": 0.13904266357421874, "step": 104770 }, { "epoch": 0.9059584439390926, "grad_norm": 24.7560036220183, "learning_rate": 3.4415504320543067e-06, "loss": 0.29883270263671874, "step": 104775 }, { "epoch": 0.9060016774606359, "grad_norm": 3.870874078754747, "learning_rate": 3.4413489166814854e-06, "loss": 0.12839622497558595, "step": 104780 }, { "epoch": 0.9060449109821791, "grad_norm": 0.6839470582843388, "learning_rate": 3.4411473992732012e-06, "loss": 0.19197120666503906, "step": 104785 }, { "epoch": 0.9060881445037224, "grad_norm": 0.9597487115385818, "learning_rate": 3.440945879830384e-06, "loss": 0.20811920166015624, "step": 104790 }, { "epoch": 0.9061313780252657, "grad_norm": 18.054388833350636, "learning_rate": 3.440744358353965e-06, "loss": 0.1541473388671875, "step": 104795 }, { "epoch": 0.9061746115468089, "grad_norm": 1.0752290899309034, "learning_rate": 3.4405428348448714e-06, "loss": 0.2964679718017578, "step": 104800 }, { "epoch": 0.9062178450683522, "grad_norm": 5.786043748201559, "learning_rate": 3.4403413093040325e-06, "loss": 0.13329544067382812, "step": 104805 }, { "epoch": 0.9062610785898955, "grad_norm": 52.459456161757004, "learning_rate": 3.4401397817323787e-06, "loss": 0.3004669189453125, "step": 104810 }, { "epoch": 0.9063043121114387, "grad_norm": 10.51813435098923, "learning_rate": 3.4399382521308384e-06, "loss": 0.057696533203125, "step": 104815 }, { "epoch": 0.906347545632982, "grad_norm": 0.2764059405141139, "learning_rate": 3.439736720500342e-06, "loss": 0.033489227294921875, "step": 104820 }, { "epoch": 0.9063907791545253, "grad_norm": 1.9440152673464104, "learning_rate": 3.4395351868418197e-06, "loss": 0.2530517578125, "step": 104825 }, { "epoch": 0.9064340126760685, "grad_norm": 12.501323657168012, "learning_rate": 3.4393336511562e-06, "loss": 0.1110748291015625, "step": 104830 }, { "epoch": 0.9064772461976118, "grad_norm": 15.642229311775226, "learning_rate": 3.439132113444411e-06, "loss": 0.12618179321289064, "step": 104835 }, { "epoch": 0.9065204797191551, "grad_norm": 3.0608110180358445, "learning_rate": 3.4389305737073844e-06, "loss": 0.14139461517333984, "step": 104840 }, { "epoch": 0.9065637132406983, "grad_norm": 17.567686887188245, "learning_rate": 3.4387290319460483e-06, "loss": 0.46901702880859375, "step": 104845 }, { "epoch": 0.9066069467622415, "grad_norm": 0.9641976939870768, "learning_rate": 3.4385274881613335e-06, "loss": 0.2251556396484375, "step": 104850 }, { "epoch": 0.9066501802837849, "grad_norm": 48.61310554770724, "learning_rate": 3.438325942354168e-06, "loss": 0.2699981689453125, "step": 104855 }, { "epoch": 0.9066934138053281, "grad_norm": 6.287573759449751, "learning_rate": 3.4381243945254822e-06, "loss": 0.22280426025390626, "step": 104860 }, { "epoch": 0.9067366473268713, "grad_norm": 4.916677316412489, "learning_rate": 3.4379228446762046e-06, "loss": 0.21364765167236327, "step": 104865 }, { "epoch": 0.9067798808484147, "grad_norm": 0.6134316804491171, "learning_rate": 3.437721292807266e-06, "loss": 0.052190399169921874, "step": 104870 }, { "epoch": 0.9068231143699579, "grad_norm": 15.72229081449664, "learning_rate": 3.437519738919596e-06, "loss": 0.2949371337890625, "step": 104875 }, { "epoch": 0.9068663478915011, "grad_norm": 3.284499148401227, "learning_rate": 3.4373181830141235e-06, "loss": 0.03210601806640625, "step": 104880 }, { "epoch": 0.9069095814130445, "grad_norm": 8.020428052083902, "learning_rate": 3.437116625091778e-06, "loss": 0.3363525390625, "step": 104885 }, { "epoch": 0.9069528149345877, "grad_norm": 1.3704054561907983, "learning_rate": 3.436915065153489e-06, "loss": 0.13927459716796875, "step": 104890 }, { "epoch": 0.9069960484561309, "grad_norm": 0.14210610061452061, "learning_rate": 3.4367135032001865e-06, "loss": 0.16347274780273438, "step": 104895 }, { "epoch": 0.9070392819776742, "grad_norm": 22.329504152833795, "learning_rate": 3.4365119392328005e-06, "loss": 0.14951324462890625, "step": 104900 }, { "epoch": 0.9070825154992175, "grad_norm": 2.3105464318942097, "learning_rate": 3.436310373252259e-06, "loss": 0.1821258544921875, "step": 104905 }, { "epoch": 0.9071257490207607, "grad_norm": 18.887745993223096, "learning_rate": 3.4361088052594937e-06, "loss": 0.24576034545898437, "step": 104910 }, { "epoch": 0.907168982542304, "grad_norm": 15.216490718921852, "learning_rate": 3.435907235255432e-06, "loss": 0.45731353759765625, "step": 104915 }, { "epoch": 0.9072122160638473, "grad_norm": 43.462987300338575, "learning_rate": 3.4357056632410057e-06, "loss": 0.15295944213867188, "step": 104920 }, { "epoch": 0.9072554495853905, "grad_norm": 19.072976199722493, "learning_rate": 3.435504089217143e-06, "loss": 0.12332611083984375, "step": 104925 }, { "epoch": 0.9072986831069338, "grad_norm": 10.009022881940211, "learning_rate": 3.435302513184773e-06, "loss": 0.08905410766601562, "step": 104930 }, { "epoch": 0.9073419166284771, "grad_norm": 1.3410181339398055, "learning_rate": 3.435100935144828e-06, "loss": 0.240252685546875, "step": 104935 }, { "epoch": 0.9073851501500203, "grad_norm": 5.36153863459923, "learning_rate": 3.434899355098235e-06, "loss": 0.08445243835449219, "step": 104940 }, { "epoch": 0.9074283836715635, "grad_norm": 0.1303075681094601, "learning_rate": 3.434697773045925e-06, "loss": 0.05196866989135742, "step": 104945 }, { "epoch": 0.9074716171931069, "grad_norm": 34.69888680399642, "learning_rate": 3.4344961889888263e-06, "loss": 0.20124359130859376, "step": 104950 }, { "epoch": 0.9075148507146501, "grad_norm": 7.677790401362035, "learning_rate": 3.4342946029278695e-06, "loss": 0.14776153564453126, "step": 104955 }, { "epoch": 0.9075580842361933, "grad_norm": 9.371806213367835, "learning_rate": 3.434093014863986e-06, "loss": 0.08755340576171874, "step": 104960 }, { "epoch": 0.9076013177577367, "grad_norm": 2.0111028750817668, "learning_rate": 3.4338914247981025e-06, "loss": 0.14471435546875, "step": 104965 }, { "epoch": 0.9076445512792799, "grad_norm": 0.9965027189792273, "learning_rate": 3.4336898327311506e-06, "loss": 0.15671539306640625, "step": 104970 }, { "epoch": 0.9076877848008231, "grad_norm": 18.149356187724973, "learning_rate": 3.43348823866406e-06, "loss": 0.7049720764160157, "step": 104975 }, { "epoch": 0.9077310183223665, "grad_norm": 28.567303100649184, "learning_rate": 3.4332866425977586e-06, "loss": 0.2187713623046875, "step": 104980 }, { "epoch": 0.9077742518439097, "grad_norm": 3.956800380902225, "learning_rate": 3.433085044533178e-06, "loss": 0.12683868408203125, "step": 104985 }, { "epoch": 0.9078174853654529, "grad_norm": 0.4607574721178984, "learning_rate": 3.432883444471248e-06, "loss": 0.11170501708984375, "step": 104990 }, { "epoch": 0.9078607188869963, "grad_norm": 4.185203476712933, "learning_rate": 3.432681842412897e-06, "loss": 0.22493362426757812, "step": 104995 }, { "epoch": 0.9079039524085395, "grad_norm": 15.843538745469383, "learning_rate": 3.4324802383590567e-06, "loss": 0.47379817962646487, "step": 105000 }, { "epoch": 0.9079471859300827, "grad_norm": 4.5439895142347595, "learning_rate": 3.432278632310655e-06, "loss": 0.1254608154296875, "step": 105005 }, { "epoch": 0.9079904194516261, "grad_norm": 36.18571535705982, "learning_rate": 3.4320770242686223e-06, "loss": 0.1509246826171875, "step": 105010 }, { "epoch": 0.9080336529731693, "grad_norm": 7.894850107839203, "learning_rate": 3.4318754142338888e-06, "loss": 0.096484375, "step": 105015 }, { "epoch": 0.9080768864947125, "grad_norm": 0.15417933390818034, "learning_rate": 3.4316738022073847e-06, "loss": 0.058134841918945315, "step": 105020 }, { "epoch": 0.9081201200162557, "grad_norm": 5.779491428170528, "learning_rate": 3.431472188190038e-06, "loss": 0.23214950561523437, "step": 105025 }, { "epoch": 0.9081633535377991, "grad_norm": 10.183803199845762, "learning_rate": 3.43127057218278e-06, "loss": 0.12466201782226563, "step": 105030 }, { "epoch": 0.9082065870593423, "grad_norm": 13.199149515099412, "learning_rate": 3.431068954186541e-06, "loss": 0.5957006454467774, "step": 105035 }, { "epoch": 0.9082498205808855, "grad_norm": 1.1536952301586982, "learning_rate": 3.4308673342022494e-06, "loss": 0.19283828735351563, "step": 105040 }, { "epoch": 0.9082930541024289, "grad_norm": 6.151821390531237, "learning_rate": 3.4306657122308353e-06, "loss": 0.09636650085449219, "step": 105045 }, { "epoch": 0.9083362876239721, "grad_norm": 0.10108260030502188, "learning_rate": 3.4304640882732297e-06, "loss": 0.05543594360351563, "step": 105050 }, { "epoch": 0.9083795211455153, "grad_norm": 8.183506908008837, "learning_rate": 3.430262462330362e-06, "loss": 0.06621551513671875, "step": 105055 }, { "epoch": 0.9084227546670587, "grad_norm": 23.290690522468935, "learning_rate": 3.430060834403161e-06, "loss": 0.1333221435546875, "step": 105060 }, { "epoch": 0.9084659881886019, "grad_norm": 7.806795292397929, "learning_rate": 3.429859204492558e-06, "loss": 0.08345947265625, "step": 105065 }, { "epoch": 0.9085092217101451, "grad_norm": 6.0112570658625675, "learning_rate": 3.429657572599482e-06, "loss": 0.073175048828125, "step": 105070 }, { "epoch": 0.9085524552316885, "grad_norm": 30.19689471855563, "learning_rate": 3.4294559387248637e-06, "loss": 0.14581642150878907, "step": 105075 }, { "epoch": 0.9085956887532317, "grad_norm": 5.380386706474991, "learning_rate": 3.429254302869632e-06, "loss": 0.1125732421875, "step": 105080 }, { "epoch": 0.9086389222747749, "grad_norm": 9.009142231356797, "learning_rate": 3.4290526650347182e-06, "loss": 0.18890113830566407, "step": 105085 }, { "epoch": 0.9086821557963183, "grad_norm": 0.3782444337209358, "learning_rate": 3.4288510252210506e-06, "loss": 0.028456878662109376, "step": 105090 }, { "epoch": 0.9087253893178615, "grad_norm": 10.568872719244165, "learning_rate": 3.42864938342956e-06, "loss": 0.23539352416992188, "step": 105095 }, { "epoch": 0.9087686228394047, "grad_norm": 21.124608953420655, "learning_rate": 3.428447739661177e-06, "loss": 0.12888565063476562, "step": 105100 }, { "epoch": 0.9088118563609481, "grad_norm": 19.677845932533348, "learning_rate": 3.4282460939168296e-06, "loss": 0.11546630859375, "step": 105105 }, { "epoch": 0.9088550898824913, "grad_norm": 0.48043661689631745, "learning_rate": 3.4280444461974507e-06, "loss": 0.07560272216796875, "step": 105110 }, { "epoch": 0.9088983234040345, "grad_norm": 0.4362381020091963, "learning_rate": 3.4278427965039684e-06, "loss": 0.11307220458984375, "step": 105115 }, { "epoch": 0.9089415569255778, "grad_norm": 30.470393352659148, "learning_rate": 3.4276411448373113e-06, "loss": 0.075042724609375, "step": 105120 }, { "epoch": 0.9089847904471211, "grad_norm": 8.352651419869582, "learning_rate": 3.427439491198412e-06, "loss": 0.1949798583984375, "step": 105125 }, { "epoch": 0.9090280239686643, "grad_norm": 1.145554749667822, "learning_rate": 3.4272378355882e-06, "loss": 0.008653640747070312, "step": 105130 }, { "epoch": 0.9090712574902076, "grad_norm": 54.854069455503854, "learning_rate": 3.427036178007605e-06, "loss": 0.4649017333984375, "step": 105135 }, { "epoch": 0.9091144910117509, "grad_norm": 19.40974569632974, "learning_rate": 3.426834518457556e-06, "loss": 0.177838134765625, "step": 105140 }, { "epoch": 0.9091577245332941, "grad_norm": 26.968005592036835, "learning_rate": 3.426632856938985e-06, "loss": 0.2188018798828125, "step": 105145 }, { "epoch": 0.9092009580548374, "grad_norm": 6.048252580624694, "learning_rate": 3.42643119345282e-06, "loss": 0.17626724243164063, "step": 105150 }, { "epoch": 0.9092441915763807, "grad_norm": 16.352843315974127, "learning_rate": 3.4262295279999914e-06, "loss": 0.23447113037109374, "step": 105155 }, { "epoch": 0.9092874250979239, "grad_norm": 14.839528606225135, "learning_rate": 3.426027860581431e-06, "loss": 0.27647247314453127, "step": 105160 }, { "epoch": 0.9093306586194672, "grad_norm": 28.588246093589394, "learning_rate": 3.425826191198068e-06, "loss": 0.14067459106445312, "step": 105165 }, { "epoch": 0.9093738921410105, "grad_norm": 10.286801830993014, "learning_rate": 3.4256245198508314e-06, "loss": 0.38521728515625, "step": 105170 }, { "epoch": 0.9094171256625537, "grad_norm": 0.037772423086561174, "learning_rate": 3.425422846540653e-06, "loss": 0.15859146118164064, "step": 105175 }, { "epoch": 0.909460359184097, "grad_norm": 1.5555786505581284, "learning_rate": 3.425221171268461e-06, "loss": 0.10137901306152344, "step": 105180 }, { "epoch": 0.9095035927056403, "grad_norm": 8.517487113930805, "learning_rate": 3.4250194940351866e-06, "loss": 0.207342529296875, "step": 105185 }, { "epoch": 0.9095468262271835, "grad_norm": 8.377628619908764, "learning_rate": 3.4248178148417604e-06, "loss": 0.082879638671875, "step": 105190 }, { "epoch": 0.9095900597487268, "grad_norm": 4.6904785391981125, "learning_rate": 3.4246161336891127e-06, "loss": 0.1101287841796875, "step": 105195 }, { "epoch": 0.90963329327027, "grad_norm": 6.019291453372076, "learning_rate": 3.424414450578172e-06, "loss": 0.08108673095703126, "step": 105200 }, { "epoch": 0.9096765267918133, "grad_norm": 23.914607784014567, "learning_rate": 3.424212765509869e-06, "loss": 0.25603408813476564, "step": 105205 }, { "epoch": 0.9097197603133566, "grad_norm": 9.351303073324253, "learning_rate": 3.4240110784851346e-06, "loss": 0.35728912353515624, "step": 105210 }, { "epoch": 0.9097629938348998, "grad_norm": 16.218214279411143, "learning_rate": 3.4238093895048988e-06, "loss": 0.1494354248046875, "step": 105215 }, { "epoch": 0.9098062273564431, "grad_norm": 9.419608581102356, "learning_rate": 3.4236076985700908e-06, "loss": 0.4780845642089844, "step": 105220 }, { "epoch": 0.9098494608779863, "grad_norm": 1.7120285318887563, "learning_rate": 3.4234060056816428e-06, "loss": 0.30965213775634765, "step": 105225 }, { "epoch": 0.9098926943995296, "grad_norm": 6.5369379342908855, "learning_rate": 3.4232043108404835e-06, "loss": 0.170928955078125, "step": 105230 }, { "epoch": 0.9099359279210729, "grad_norm": 14.071018070779763, "learning_rate": 3.423002614047542e-06, "loss": 0.16793212890625, "step": 105235 }, { "epoch": 0.9099791614426161, "grad_norm": 0.22533409930179918, "learning_rate": 3.4228009153037504e-06, "loss": 0.3070281982421875, "step": 105240 }, { "epoch": 0.9100223949641594, "grad_norm": 10.897348730554224, "learning_rate": 3.422599214610039e-06, "loss": 0.19766387939453126, "step": 105245 }, { "epoch": 0.9100656284857027, "grad_norm": 7.395552565994681, "learning_rate": 3.422397511967336e-06, "loss": 0.07647247314453125, "step": 105250 }, { "epoch": 0.910108862007246, "grad_norm": 2.7831945568656886, "learning_rate": 3.4221958073765745e-06, "loss": 0.19010982513427735, "step": 105255 }, { "epoch": 0.9101520955287892, "grad_norm": 10.345348283051363, "learning_rate": 3.4219941008386835e-06, "loss": 0.679803466796875, "step": 105260 }, { "epoch": 0.9101953290503325, "grad_norm": 28.449811051809395, "learning_rate": 3.421792392354591e-06, "loss": 0.10259628295898438, "step": 105265 }, { "epoch": 0.9102385625718757, "grad_norm": 6.457496362614111, "learning_rate": 3.4215906819252306e-06, "loss": 0.0665283203125, "step": 105270 }, { "epoch": 0.910281796093419, "grad_norm": 0.4929156961251263, "learning_rate": 3.421388969551531e-06, "loss": 0.13720207214355468, "step": 105275 }, { "epoch": 0.9103250296149623, "grad_norm": 2.043476154867059, "learning_rate": 3.4211872552344227e-06, "loss": 0.06793403625488281, "step": 105280 }, { "epoch": 0.9103682631365055, "grad_norm": 0.2358133487568337, "learning_rate": 3.4209855389748365e-06, "loss": 0.12038459777832031, "step": 105285 }, { "epoch": 0.9104114966580488, "grad_norm": 3.3670085530966296, "learning_rate": 3.420783820773701e-06, "loss": 0.09789810180664063, "step": 105290 }, { "epoch": 0.910454730179592, "grad_norm": 0.09694024596771783, "learning_rate": 3.4205821006319486e-06, "loss": 0.27141571044921875, "step": 105295 }, { "epoch": 0.9104979637011353, "grad_norm": 10.342524570277956, "learning_rate": 3.4203803785505084e-06, "loss": 0.054071044921875, "step": 105300 }, { "epoch": 0.9105411972226786, "grad_norm": 6.9282688918809425, "learning_rate": 3.4201786545303114e-06, "loss": 0.2546875, "step": 105305 }, { "epoch": 0.9105844307442218, "grad_norm": 20.300209629200914, "learning_rate": 3.419976928572288e-06, "loss": 0.377276611328125, "step": 105310 }, { "epoch": 0.9106276642657651, "grad_norm": 1.1835167622677192, "learning_rate": 3.419775200677367e-06, "loss": 0.11983261108398438, "step": 105315 }, { "epoch": 0.9106708977873084, "grad_norm": 26.72874783229066, "learning_rate": 3.4195734708464798e-06, "loss": 0.42366790771484375, "step": 105320 }, { "epoch": 0.9107141313088516, "grad_norm": 1.9040952014469046, "learning_rate": 3.419371739080558e-06, "loss": 0.03680896759033203, "step": 105325 }, { "epoch": 0.9107573648303949, "grad_norm": 37.8194644739554, "learning_rate": 3.4191700053805297e-06, "loss": 0.3395416259765625, "step": 105330 }, { "epoch": 0.9108005983519382, "grad_norm": 0.16915350652128308, "learning_rate": 3.418968269747327e-06, "loss": 0.08136138916015626, "step": 105335 }, { "epoch": 0.9108438318734814, "grad_norm": 34.648072527618325, "learning_rate": 3.41876653218188e-06, "loss": 0.337359619140625, "step": 105340 }, { "epoch": 0.9108870653950247, "grad_norm": 2.2276301510272165, "learning_rate": 3.418564792685118e-06, "loss": 0.107867431640625, "step": 105345 }, { "epoch": 0.910930298916568, "grad_norm": 12.526666157965867, "learning_rate": 3.418363051257972e-06, "loss": 0.1657695770263672, "step": 105350 }, { "epoch": 0.9109735324381112, "grad_norm": 3.611407453129624, "learning_rate": 3.4181613079013736e-06, "loss": 0.10118408203125, "step": 105355 }, { "epoch": 0.9110167659596545, "grad_norm": 5.501185406559246, "learning_rate": 3.4179595626162514e-06, "loss": 0.07253446578979492, "step": 105360 }, { "epoch": 0.9110599994811978, "grad_norm": 1.6353127967907977, "learning_rate": 3.4177578154035368e-06, "loss": 0.25460128784179686, "step": 105365 }, { "epoch": 0.911103233002741, "grad_norm": 1.8690699194362455, "learning_rate": 3.41755606626416e-06, "loss": 0.196795654296875, "step": 105370 }, { "epoch": 0.9111464665242842, "grad_norm": 2.509716558775389, "learning_rate": 3.417354315199052e-06, "loss": 0.132275390625, "step": 105375 }, { "epoch": 0.9111897000458276, "grad_norm": 56.41322472569521, "learning_rate": 3.4171525622091415e-06, "loss": 0.3612548828125, "step": 105380 }, { "epoch": 0.9112329335673708, "grad_norm": 0.7232996903800851, "learning_rate": 3.4169508072953617e-06, "loss": 0.12350921630859375, "step": 105385 }, { "epoch": 0.911276167088914, "grad_norm": 5.640881649278038, "learning_rate": 3.4167490504586418e-06, "loss": 0.036480712890625, "step": 105390 }, { "epoch": 0.9113194006104574, "grad_norm": 13.993740984752478, "learning_rate": 3.4165472916999105e-06, "loss": 0.06616096496582032, "step": 105395 }, { "epoch": 0.9113626341320006, "grad_norm": 2.126369863067045, "learning_rate": 3.4163455310201013e-06, "loss": 0.3805229187011719, "step": 105400 }, { "epoch": 0.9114058676535438, "grad_norm": 19.760430127955356, "learning_rate": 3.4161437684201416e-06, "loss": 0.3061168670654297, "step": 105405 }, { "epoch": 0.9114491011750872, "grad_norm": 4.7868989271020075, "learning_rate": 3.415942003900965e-06, "loss": 0.067901611328125, "step": 105410 }, { "epoch": 0.9114923346966304, "grad_norm": 18.976381178266042, "learning_rate": 3.4157402374635006e-06, "loss": 0.07011299133300782, "step": 105415 }, { "epoch": 0.9115355682181736, "grad_norm": 1.6826160458072088, "learning_rate": 3.415538469108679e-06, "loss": 0.07848968505859374, "step": 105420 }, { "epoch": 0.911578801739717, "grad_norm": 23.30174531512152, "learning_rate": 3.4153366988374298e-06, "loss": 0.31485443115234374, "step": 105425 }, { "epoch": 0.9116220352612602, "grad_norm": 0.12226219725961283, "learning_rate": 3.415134926650686e-06, "loss": 0.262451171875, "step": 105430 }, { "epoch": 0.9116652687828034, "grad_norm": 4.771217251679379, "learning_rate": 3.4149331525493755e-06, "loss": 0.05964508056640625, "step": 105435 }, { "epoch": 0.9117085023043467, "grad_norm": 53.0822377678019, "learning_rate": 3.4147313765344295e-06, "loss": 0.23848419189453124, "step": 105440 }, { "epoch": 0.91175173582589, "grad_norm": 25.762832340894473, "learning_rate": 3.4145295986067798e-06, "loss": 0.372607421875, "step": 105445 }, { "epoch": 0.9117949693474332, "grad_norm": 13.37897827627763, "learning_rate": 3.4143278187673568e-06, "loss": 0.12548828125, "step": 105450 }, { "epoch": 0.9118382028689765, "grad_norm": 6.100078824480115, "learning_rate": 3.4141260370170893e-06, "loss": 0.10978240966796875, "step": 105455 }, { "epoch": 0.9118814363905198, "grad_norm": 12.176807572891903, "learning_rate": 3.4139242533569094e-06, "loss": 0.15255241394042968, "step": 105460 }, { "epoch": 0.911924669912063, "grad_norm": 0.9019056463998824, "learning_rate": 3.413722467787748e-06, "loss": 0.2080596923828125, "step": 105465 }, { "epoch": 0.9119679034336062, "grad_norm": 0.663498922925047, "learning_rate": 3.413520680310535e-06, "loss": 0.05290641784667969, "step": 105470 }, { "epoch": 0.9120111369551496, "grad_norm": 2.3056923246627843, "learning_rate": 3.4133188909262006e-06, "loss": 0.6527420043945312, "step": 105475 }, { "epoch": 0.9120543704766928, "grad_norm": 16.547534317600565, "learning_rate": 3.4131170996356765e-06, "loss": 0.13162078857421874, "step": 105480 }, { "epoch": 0.912097603998236, "grad_norm": 0.5889143959875667, "learning_rate": 3.412915306439892e-06, "loss": 0.05446014404296875, "step": 105485 }, { "epoch": 0.9121408375197794, "grad_norm": 55.97447155299211, "learning_rate": 3.4127135113397796e-06, "loss": 0.60966796875, "step": 105490 }, { "epoch": 0.9121840710413226, "grad_norm": 1.640522551855013, "learning_rate": 3.412511714336269e-06, "loss": 0.16770973205566406, "step": 105495 }, { "epoch": 0.9122273045628658, "grad_norm": 2.06548024664772, "learning_rate": 3.4123099154302906e-06, "loss": 0.2136260986328125, "step": 105500 }, { "epoch": 0.9122705380844092, "grad_norm": 2.1108399428167477, "learning_rate": 3.4121081146227745e-06, "loss": 0.08660659790039063, "step": 105505 }, { "epoch": 0.9123137716059524, "grad_norm": 1.516910468187372, "learning_rate": 3.4119063119146537e-06, "loss": 0.0885009765625, "step": 105510 }, { "epoch": 0.9123570051274956, "grad_norm": 5.325697902618711, "learning_rate": 3.4117045073068557e-06, "loss": 0.1329803466796875, "step": 105515 }, { "epoch": 0.912400238649039, "grad_norm": 8.038475974753148, "learning_rate": 3.4115027008003135e-06, "loss": 0.397613525390625, "step": 105520 }, { "epoch": 0.9124434721705822, "grad_norm": 49.147151175614525, "learning_rate": 3.411300892395958e-06, "loss": 0.48489532470703123, "step": 105525 }, { "epoch": 0.9124867056921254, "grad_norm": 3.953263772356033, "learning_rate": 3.4110990820947183e-06, "loss": 0.1781402587890625, "step": 105530 }, { "epoch": 0.9125299392136688, "grad_norm": 5.7210266061804695, "learning_rate": 3.4108972698975262e-06, "loss": 0.2513092041015625, "step": 105535 }, { "epoch": 0.912573172735212, "grad_norm": 7.035224808546646, "learning_rate": 3.4106954558053123e-06, "loss": 0.108587646484375, "step": 105540 }, { "epoch": 0.9126164062567552, "grad_norm": 19.07537951286964, "learning_rate": 3.410493639819007e-06, "loss": 0.12242279052734376, "step": 105545 }, { "epoch": 0.9126596397782984, "grad_norm": 17.772991916545784, "learning_rate": 3.410291821939542e-06, "loss": 0.2414865493774414, "step": 105550 }, { "epoch": 0.9127028732998418, "grad_norm": 2.336478594157576, "learning_rate": 3.410090002167847e-06, "loss": 0.1771453857421875, "step": 105555 }, { "epoch": 0.912746106821385, "grad_norm": 0.5587817881828301, "learning_rate": 3.409888180504853e-06, "loss": 0.23260498046875, "step": 105560 }, { "epoch": 0.9127893403429282, "grad_norm": 2.441728304324736, "learning_rate": 3.4096863569514905e-06, "loss": 0.1061126708984375, "step": 105565 }, { "epoch": 0.9128325738644716, "grad_norm": 4.982636992476136, "learning_rate": 3.409484531508691e-06, "loss": 0.24449310302734376, "step": 105570 }, { "epoch": 0.9128758073860148, "grad_norm": 0.7568292207210007, "learning_rate": 3.409282704177385e-06, "loss": 0.012031745910644532, "step": 105575 }, { "epoch": 0.912919040907558, "grad_norm": 0.6318988680998905, "learning_rate": 3.4090808749585037e-06, "loss": 0.15601272583007814, "step": 105580 }, { "epoch": 0.9129622744291014, "grad_norm": 28.206958990561258, "learning_rate": 3.4088790438529774e-06, "loss": 0.13055572509765626, "step": 105585 }, { "epoch": 0.9130055079506446, "grad_norm": 13.694640458280102, "learning_rate": 3.4086772108617372e-06, "loss": 0.12767810821533204, "step": 105590 }, { "epoch": 0.9130487414721878, "grad_norm": 5.610886242189032, "learning_rate": 3.4084753759857136e-06, "loss": 0.0503204345703125, "step": 105595 }, { "epoch": 0.9130919749937312, "grad_norm": 0.6351974186299906, "learning_rate": 3.4082735392258375e-06, "loss": 0.1312957763671875, "step": 105600 }, { "epoch": 0.9131352085152744, "grad_norm": 6.225112748676752, "learning_rate": 3.40807170058304e-06, "loss": 0.025681686401367188, "step": 105605 }, { "epoch": 0.9131784420368176, "grad_norm": 1.0379448640796372, "learning_rate": 3.407869860058252e-06, "loss": 0.13017807006835938, "step": 105610 }, { "epoch": 0.913221675558361, "grad_norm": 21.96982254565921, "learning_rate": 3.407668017652404e-06, "loss": 0.4426750183105469, "step": 105615 }, { "epoch": 0.9132649090799042, "grad_norm": 2.1203346202139195, "learning_rate": 3.4074661733664277e-06, "loss": 0.4654022216796875, "step": 105620 }, { "epoch": 0.9133081426014474, "grad_norm": 40.88714814278635, "learning_rate": 3.4072643272012536e-06, "loss": 0.22188644409179686, "step": 105625 }, { "epoch": 0.9133513761229908, "grad_norm": 1.536605113901793, "learning_rate": 3.4070624791578115e-06, "loss": 0.08362045288085937, "step": 105630 }, { "epoch": 0.913394609644534, "grad_norm": 2.1434922374402654, "learning_rate": 3.4068606292370326e-06, "loss": 0.10768604278564453, "step": 105635 }, { "epoch": 0.9134378431660772, "grad_norm": 6.529278273858602, "learning_rate": 3.40665877743985e-06, "loss": 0.184234619140625, "step": 105640 }, { "epoch": 0.9134810766876205, "grad_norm": 1.4378872443107598, "learning_rate": 3.4064569237671933e-06, "loss": 0.2811851501464844, "step": 105645 }, { "epoch": 0.9135243102091638, "grad_norm": 8.096833303095135, "learning_rate": 3.4062550682199914e-06, "loss": 0.10963935852050781, "step": 105650 }, { "epoch": 0.913567543730707, "grad_norm": 15.622249614160365, "learning_rate": 3.4060532107991783e-06, "loss": 0.3155967712402344, "step": 105655 }, { "epoch": 0.9136107772522503, "grad_norm": 40.56953821368832, "learning_rate": 3.405851351505683e-06, "loss": 0.3200469970703125, "step": 105660 }, { "epoch": 0.9136540107737936, "grad_norm": 5.121018987205918, "learning_rate": 3.405649490340437e-06, "loss": 0.08448333740234375, "step": 105665 }, { "epoch": 0.9136972442953368, "grad_norm": 26.21835714118052, "learning_rate": 3.4054476273043725e-06, "loss": 0.19437332153320314, "step": 105670 }, { "epoch": 0.9137404778168801, "grad_norm": 1.6356923493600024, "learning_rate": 3.4052457623984195e-06, "loss": 0.10609283447265624, "step": 105675 }, { "epoch": 0.9137837113384234, "grad_norm": 15.652835799067253, "learning_rate": 3.4050438956235074e-06, "loss": 0.12231111526489258, "step": 105680 }, { "epoch": 0.9138269448599666, "grad_norm": 38.53860835118127, "learning_rate": 3.404842026980569e-06, "loss": 0.19092903137207032, "step": 105685 }, { "epoch": 0.9138701783815099, "grad_norm": 2.5505665016920225, "learning_rate": 3.4046401564705357e-06, "loss": 0.143438720703125, "step": 105690 }, { "epoch": 0.9139134119030532, "grad_norm": 2.81143302602765, "learning_rate": 3.404438284094337e-06, "loss": 0.2658718109130859, "step": 105695 }, { "epoch": 0.9139566454245964, "grad_norm": 25.405670358213076, "learning_rate": 3.4042364098529056e-06, "loss": 0.14552001953125, "step": 105700 }, { "epoch": 0.9139998789461397, "grad_norm": 6.929367703988879, "learning_rate": 3.404034533747171e-06, "loss": 0.08223419189453125, "step": 105705 }, { "epoch": 0.914043112467683, "grad_norm": 1.5695110902674143, "learning_rate": 3.4038326557780645e-06, "loss": 0.030419921875, "step": 105710 }, { "epoch": 0.9140863459892262, "grad_norm": 16.323580250821177, "learning_rate": 3.4036307759465177e-06, "loss": 0.3905914306640625, "step": 105715 }, { "epoch": 0.9141295795107695, "grad_norm": 54.292052055433416, "learning_rate": 3.4034288942534614e-06, "loss": 0.17145614624023436, "step": 105720 }, { "epoch": 0.9141728130323127, "grad_norm": 0.12090477959902435, "learning_rate": 3.4032270106998273e-06, "loss": 0.08408126831054688, "step": 105725 }, { "epoch": 0.914216046553856, "grad_norm": 0.6785138015052304, "learning_rate": 3.403025125286545e-06, "loss": 0.22098770141601562, "step": 105730 }, { "epoch": 0.9142592800753992, "grad_norm": 10.805164013432655, "learning_rate": 3.402823238014547e-06, "loss": 0.05694160461425781, "step": 105735 }, { "epoch": 0.9143025135969425, "grad_norm": 3.4364228872745124, "learning_rate": 3.402621348884763e-06, "loss": 0.061630630493164064, "step": 105740 }, { "epoch": 0.9143457471184858, "grad_norm": 6.48055699165536, "learning_rate": 3.402419457898126e-06, "loss": 0.2620674133300781, "step": 105745 }, { "epoch": 0.914388980640029, "grad_norm": 3.674498930332634, "learning_rate": 3.4022175650555652e-06, "loss": 0.0837677001953125, "step": 105750 }, { "epoch": 0.9144322141615723, "grad_norm": 7.538521206817681, "learning_rate": 3.402015670358014e-06, "loss": 0.257672119140625, "step": 105755 }, { "epoch": 0.9144754476831156, "grad_norm": 3.313047120580138, "learning_rate": 3.4018137738064e-06, "loss": 0.010843276977539062, "step": 105760 }, { "epoch": 0.9145186812046588, "grad_norm": 0.06540323364564574, "learning_rate": 3.4016118754016577e-06, "loss": 0.07532081604003907, "step": 105765 }, { "epoch": 0.9145619147262021, "grad_norm": 16.94536890405463, "learning_rate": 3.401409975144716e-06, "loss": 0.12950592041015624, "step": 105770 }, { "epoch": 0.9146051482477454, "grad_norm": 4.919160311615514, "learning_rate": 3.4012080730365072e-06, "loss": 0.11902618408203125, "step": 105775 }, { "epoch": 0.9146483817692886, "grad_norm": 0.7340592260778943, "learning_rate": 3.401006169077963e-06, "loss": 0.05404510498046875, "step": 105780 }, { "epoch": 0.9146916152908319, "grad_norm": 14.898310297639378, "learning_rate": 3.400804263270014e-06, "loss": 0.091815185546875, "step": 105785 }, { "epoch": 0.9147348488123752, "grad_norm": 0.6668296710238234, "learning_rate": 3.4006023556135893e-06, "loss": 0.0867279052734375, "step": 105790 }, { "epoch": 0.9147780823339184, "grad_norm": 6.765782484033862, "learning_rate": 3.4004004461096237e-06, "loss": 0.0381103515625, "step": 105795 }, { "epoch": 0.9148213158554617, "grad_norm": 7.169533832892295, "learning_rate": 3.400198534759046e-06, "loss": 0.199737548828125, "step": 105800 }, { "epoch": 0.914864549377005, "grad_norm": 0.8046174107577402, "learning_rate": 3.3999966215627873e-06, "loss": 0.07628669738769531, "step": 105805 }, { "epoch": 0.9149077828985482, "grad_norm": 52.388556231274656, "learning_rate": 3.399794706521781e-06, "loss": 0.244451904296875, "step": 105810 }, { "epoch": 0.9149510164200915, "grad_norm": 19.258272094140455, "learning_rate": 3.399592789636956e-06, "loss": 0.13884315490722657, "step": 105815 }, { "epoch": 0.9149942499416347, "grad_norm": 9.374972205240113, "learning_rate": 3.3993908709092447e-06, "loss": 0.1354736328125, "step": 105820 }, { "epoch": 0.915037483463178, "grad_norm": 5.61533438024466, "learning_rate": 3.3991889503395776e-06, "loss": 0.191644287109375, "step": 105825 }, { "epoch": 0.9150807169847213, "grad_norm": 1.8000871550897781, "learning_rate": 3.398987027928886e-06, "loss": 0.08214111328125, "step": 105830 }, { "epoch": 0.9151239505062645, "grad_norm": 0.1586329361357811, "learning_rate": 3.3987851036781026e-06, "loss": 0.07913379669189453, "step": 105835 }, { "epoch": 0.9151671840278078, "grad_norm": 17.592685323831947, "learning_rate": 3.3985831775881566e-06, "loss": 0.09443359375, "step": 105840 }, { "epoch": 0.9152104175493511, "grad_norm": 23.742317755267905, "learning_rate": 3.398381249659981e-06, "loss": 0.13983001708984374, "step": 105845 }, { "epoch": 0.9152536510708943, "grad_norm": 11.14912657160473, "learning_rate": 3.3981793198945053e-06, "loss": 0.21781768798828124, "step": 105850 }, { "epoch": 0.9152968845924376, "grad_norm": 5.145905674064877, "learning_rate": 3.397977388292662e-06, "loss": 0.1590810775756836, "step": 105855 }, { "epoch": 0.9153401181139809, "grad_norm": 1.5308696239347308, "learning_rate": 3.3977754548553826e-06, "loss": 0.13677520751953126, "step": 105860 }, { "epoch": 0.9153833516355241, "grad_norm": 7.624970689127544, "learning_rate": 3.397573519583598e-06, "loss": 0.15055389404296876, "step": 105865 }, { "epoch": 0.9154265851570674, "grad_norm": 6.94082182503984, "learning_rate": 3.3973715824782394e-06, "loss": 0.04661407470703125, "step": 105870 }, { "epoch": 0.9154698186786107, "grad_norm": 5.307948741961355, "learning_rate": 3.3971696435402376e-06, "loss": 0.13646621704101564, "step": 105875 }, { "epoch": 0.9155130522001539, "grad_norm": 0.8565605656209262, "learning_rate": 3.3969677027705253e-06, "loss": 0.10251083374023437, "step": 105880 }, { "epoch": 0.9155562857216972, "grad_norm": 12.012751085166785, "learning_rate": 3.396765760170032e-06, "loss": 0.1697967529296875, "step": 105885 }, { "epoch": 0.9155995192432405, "grad_norm": 12.53367967693737, "learning_rate": 3.396563815739691e-06, "loss": 0.175811767578125, "step": 105890 }, { "epoch": 0.9156427527647837, "grad_norm": 6.955916816075134, "learning_rate": 3.3963618694804327e-06, "loss": 0.06670265197753907, "step": 105895 }, { "epoch": 0.9156859862863269, "grad_norm": 7.087950827195141, "learning_rate": 3.3961599213931884e-06, "loss": 0.08565387725830079, "step": 105900 }, { "epoch": 0.9157292198078703, "grad_norm": 9.881852236902795, "learning_rate": 3.395957971478889e-06, "loss": 0.2639984130859375, "step": 105905 }, { "epoch": 0.9157724533294135, "grad_norm": 5.113621542969155, "learning_rate": 3.3957560197384666e-06, "loss": 0.30358352661132815, "step": 105910 }, { "epoch": 0.9158156868509567, "grad_norm": 26.576817766654294, "learning_rate": 3.3955540661728525e-06, "loss": 0.186602783203125, "step": 105915 }, { "epoch": 0.9158589203725, "grad_norm": 3.0228399012020533, "learning_rate": 3.395352110782978e-06, "loss": 0.06550922393798828, "step": 105920 }, { "epoch": 0.9159021538940433, "grad_norm": 6.311507066720635, "learning_rate": 3.3951501535697745e-06, "loss": 0.17220458984375, "step": 105925 }, { "epoch": 0.9159453874155865, "grad_norm": 13.592957187585279, "learning_rate": 3.394948194534174e-06, "loss": 0.21042461395263673, "step": 105930 }, { "epoch": 0.9159886209371298, "grad_norm": 38.10070820143708, "learning_rate": 3.394746233677107e-06, "loss": 0.30018310546875, "step": 105935 }, { "epoch": 0.9160318544586731, "grad_norm": 0.49851102081939636, "learning_rate": 3.394544270999504e-06, "loss": 0.1517852783203125, "step": 105940 }, { "epoch": 0.9160750879802163, "grad_norm": 11.789078950041336, "learning_rate": 3.3943423065022992e-06, "loss": 0.34845542907714844, "step": 105945 }, { "epoch": 0.9161183215017596, "grad_norm": 29.847734717901464, "learning_rate": 3.394140340186422e-06, "loss": 0.1431488037109375, "step": 105950 }, { "epoch": 0.9161615550233029, "grad_norm": 1.1161469346876935, "learning_rate": 3.3939383720528045e-06, "loss": 0.09831562042236328, "step": 105955 }, { "epoch": 0.9162047885448461, "grad_norm": 0.166771861483655, "learning_rate": 3.3937364021023783e-06, "loss": 0.06774673461914063, "step": 105960 }, { "epoch": 0.9162480220663894, "grad_norm": 0.10088152733010476, "learning_rate": 3.3935344303360734e-06, "loss": 0.2623687744140625, "step": 105965 }, { "epoch": 0.9162912555879327, "grad_norm": 14.790753467040815, "learning_rate": 3.393332456754824e-06, "loss": 0.06979141235351563, "step": 105970 }, { "epoch": 0.9163344891094759, "grad_norm": 38.56591275760824, "learning_rate": 3.3931304813595592e-06, "loss": 0.550628662109375, "step": 105975 }, { "epoch": 0.9163777226310192, "grad_norm": 1.4741775072929988, "learning_rate": 3.3929285041512113e-06, "loss": 0.16771621704101564, "step": 105980 }, { "epoch": 0.9164209561525625, "grad_norm": 2.2774548059729116, "learning_rate": 3.3927265251307126e-06, "loss": 0.2570518493652344, "step": 105985 }, { "epoch": 0.9164641896741057, "grad_norm": 52.04207215475518, "learning_rate": 3.3925245442989935e-06, "loss": 0.1923166275024414, "step": 105990 }, { "epoch": 0.9165074231956489, "grad_norm": 6.264100109415104, "learning_rate": 3.3923225616569855e-06, "loss": 0.09266357421875, "step": 105995 }, { "epoch": 0.9165506567171923, "grad_norm": 10.841532635691191, "learning_rate": 3.3921205772056203e-06, "loss": 0.1289642333984375, "step": 106000 }, { "epoch": 0.9165938902387355, "grad_norm": 1.4569238405267726, "learning_rate": 3.391918590945831e-06, "loss": 0.06827774047851562, "step": 106005 }, { "epoch": 0.9166371237602787, "grad_norm": 1.0493014692968652, "learning_rate": 3.3917166028785468e-06, "loss": 0.042032623291015626, "step": 106010 }, { "epoch": 0.9166803572818221, "grad_norm": 5.8543873576519605, "learning_rate": 3.3915146130046998e-06, "loss": 0.05904998779296875, "step": 106015 }, { "epoch": 0.9167235908033653, "grad_norm": 5.7634239455570535, "learning_rate": 3.3913126213252234e-06, "loss": 0.15178070068359376, "step": 106020 }, { "epoch": 0.9167668243249085, "grad_norm": 0.29931484525377716, "learning_rate": 3.3911106278410463e-06, "loss": 0.07260475158691407, "step": 106025 }, { "epoch": 0.9168100578464519, "grad_norm": 15.19458675593947, "learning_rate": 3.390908632553102e-06, "loss": 0.23577880859375, "step": 106030 }, { "epoch": 0.9168532913679951, "grad_norm": 16.27173944923791, "learning_rate": 3.390706635462322e-06, "loss": 0.2693115234375, "step": 106035 }, { "epoch": 0.9168965248895383, "grad_norm": 4.889073213464894, "learning_rate": 3.390504636569638e-06, "loss": 0.2100830078125, "step": 106040 }, { "epoch": 0.9169397584110817, "grad_norm": 8.328304816208426, "learning_rate": 3.3903026358759798e-06, "loss": 0.3207870006561279, "step": 106045 }, { "epoch": 0.9169829919326249, "grad_norm": 0.2529776715270526, "learning_rate": 3.3901006333822817e-06, "loss": 0.12840728759765624, "step": 106050 }, { "epoch": 0.9170262254541681, "grad_norm": 1.546689873641719, "learning_rate": 3.389898629089473e-06, "loss": 0.11267433166503907, "step": 106055 }, { "epoch": 0.9170694589757115, "grad_norm": 11.533741003558145, "learning_rate": 3.389696622998486e-06, "loss": 0.20989341735839845, "step": 106060 }, { "epoch": 0.9171126924972547, "grad_norm": 15.018619669991894, "learning_rate": 3.389494615110253e-06, "loss": 0.12695579528808593, "step": 106065 }, { "epoch": 0.9171559260187979, "grad_norm": 0.8148647571267644, "learning_rate": 3.389292605425706e-06, "loss": 0.10212783813476563, "step": 106070 }, { "epoch": 0.9171991595403411, "grad_norm": 1.5739191001282757, "learning_rate": 3.3890905939457758e-06, "loss": 0.05042724609375, "step": 106075 }, { "epoch": 0.9172423930618845, "grad_norm": 1.676469268133225, "learning_rate": 3.388888580671393e-06, "loss": 0.04675369262695313, "step": 106080 }, { "epoch": 0.9172856265834277, "grad_norm": 0.6888044165717725, "learning_rate": 3.3886865656034908e-06, "loss": 0.16668777465820311, "step": 106085 }, { "epoch": 0.917328860104971, "grad_norm": 3.8140516067758026, "learning_rate": 3.3884845487430013e-06, "loss": 0.23295135498046876, "step": 106090 }, { "epoch": 0.9173720936265143, "grad_norm": 4.814582657824117, "learning_rate": 3.3882825300908546e-06, "loss": 0.19855270385742188, "step": 106095 }, { "epoch": 0.9174153271480575, "grad_norm": 5.390254930043287, "learning_rate": 3.3880805096479837e-06, "loss": 0.029378890991210938, "step": 106100 }, { "epoch": 0.9174585606696007, "grad_norm": 7.157679938451265, "learning_rate": 3.3878784874153195e-06, "loss": 0.0646392822265625, "step": 106105 }, { "epoch": 0.9175017941911441, "grad_norm": 16.256993038080296, "learning_rate": 3.3876764633937933e-06, "loss": 0.076397705078125, "step": 106110 }, { "epoch": 0.9175450277126873, "grad_norm": 2.9954230836237854, "learning_rate": 3.3874744375843385e-06, "loss": 0.34357461929321287, "step": 106115 }, { "epoch": 0.9175882612342305, "grad_norm": 98.47086448222039, "learning_rate": 3.387272409987886e-06, "loss": 0.1233438491821289, "step": 106120 }, { "epoch": 0.9176314947557739, "grad_norm": 12.284075536593825, "learning_rate": 3.3870703806053663e-06, "loss": 0.161090087890625, "step": 106125 }, { "epoch": 0.9176747282773171, "grad_norm": 9.574650230729068, "learning_rate": 3.386868349437713e-06, "loss": 0.45557861328125, "step": 106130 }, { "epoch": 0.9177179617988603, "grad_norm": 0.9905268876036777, "learning_rate": 3.3866663164858567e-06, "loss": 0.008509063720703125, "step": 106135 }, { "epoch": 0.9177611953204037, "grad_norm": 20.253347711230738, "learning_rate": 3.386464281750729e-06, "loss": 0.11795806884765625, "step": 106140 }, { "epoch": 0.9178044288419469, "grad_norm": 12.334167798319983, "learning_rate": 3.386262245233264e-06, "loss": 0.19301414489746094, "step": 106145 }, { "epoch": 0.9178476623634901, "grad_norm": 0.7705068456388122, "learning_rate": 3.3860602069343903e-06, "loss": 0.0778879165649414, "step": 106150 }, { "epoch": 0.9178908958850334, "grad_norm": 5.559889937021929, "learning_rate": 3.3858581668550414e-06, "loss": 0.1044647216796875, "step": 106155 }, { "epoch": 0.9179341294065767, "grad_norm": 12.988564298037252, "learning_rate": 3.385656124996149e-06, "loss": 0.09178924560546875, "step": 106160 }, { "epoch": 0.9179773629281199, "grad_norm": 2.6450924021705475, "learning_rate": 3.3854540813586437e-06, "loss": 0.08992919921875, "step": 106165 }, { "epoch": 0.9180205964496632, "grad_norm": 15.823228552671932, "learning_rate": 3.385252035943459e-06, "loss": 0.056623077392578124, "step": 106170 }, { "epoch": 0.9180638299712065, "grad_norm": 47.801143061206055, "learning_rate": 3.385049988751525e-06, "loss": 0.2664939880371094, "step": 106175 }, { "epoch": 0.9181070634927497, "grad_norm": 3.4006986128481573, "learning_rate": 3.384847939783776e-06, "loss": 0.136029052734375, "step": 106180 }, { "epoch": 0.918150297014293, "grad_norm": 3.0073459799376243, "learning_rate": 3.3846458890411413e-06, "loss": 0.10673236846923828, "step": 106185 }, { "epoch": 0.9181935305358363, "grad_norm": 3.5111963008160805, "learning_rate": 3.384443836524554e-06, "loss": 0.056461334228515625, "step": 106190 }, { "epoch": 0.9182367640573795, "grad_norm": 1.018986029172459, "learning_rate": 3.384241782234945e-06, "loss": 0.03379669189453125, "step": 106195 }, { "epoch": 0.9182799975789228, "grad_norm": 19.983909066092156, "learning_rate": 3.384039726173248e-06, "loss": 0.14506988525390624, "step": 106200 }, { "epoch": 0.9183232311004661, "grad_norm": 2.9058254997651107, "learning_rate": 3.383837668340393e-06, "loss": 0.39206771850585936, "step": 106205 }, { "epoch": 0.9183664646220093, "grad_norm": 1.406563775368662, "learning_rate": 3.3836356087373126e-06, "loss": 0.029215240478515626, "step": 106210 }, { "epoch": 0.9184096981435526, "grad_norm": 3.155318583776259, "learning_rate": 3.38343354736494e-06, "loss": 0.17525177001953124, "step": 106215 }, { "epoch": 0.9184529316650959, "grad_norm": 6.39901402315111, "learning_rate": 3.3832314842242036e-06, "loss": 0.05419921875, "step": 106220 }, { "epoch": 0.9184961651866391, "grad_norm": 0.74908523741808, "learning_rate": 3.3830294193160383e-06, "loss": 0.016218948364257812, "step": 106225 }, { "epoch": 0.9185393987081824, "grad_norm": 31.43034268807685, "learning_rate": 3.3828273526413756e-06, "loss": 0.1904937744140625, "step": 106230 }, { "epoch": 0.9185826322297257, "grad_norm": 0.7209853908064666, "learning_rate": 3.382625284201147e-06, "loss": 0.06602325439453124, "step": 106235 }, { "epoch": 0.9186258657512689, "grad_norm": 1.000955821534805, "learning_rate": 3.382423213996284e-06, "loss": 0.2112579345703125, "step": 106240 }, { "epoch": 0.9186690992728122, "grad_norm": 5.029190944270782, "learning_rate": 3.382221142027719e-06, "loss": 0.10139274597167969, "step": 106245 }, { "epoch": 0.9187123327943554, "grad_norm": 34.496114172027255, "learning_rate": 3.3820190682963837e-06, "loss": 0.1413036346435547, "step": 106250 }, { "epoch": 0.9187555663158987, "grad_norm": 2.5505830901658286, "learning_rate": 3.3818169928032106e-06, "loss": 0.060027694702148436, "step": 106255 }, { "epoch": 0.918798799837442, "grad_norm": 8.535868062333005, "learning_rate": 3.381614915549131e-06, "loss": 0.05097980499267578, "step": 106260 }, { "epoch": 0.9188420333589852, "grad_norm": 0.9872791900209116, "learning_rate": 3.3814128365350774e-06, "loss": 0.19600830078125, "step": 106265 }, { "epoch": 0.9188852668805285, "grad_norm": 36.960808268117475, "learning_rate": 3.381210755761981e-06, "loss": 0.3394500732421875, "step": 106270 }, { "epoch": 0.9189285004020717, "grad_norm": 7.448369138639257, "learning_rate": 3.381008673230775e-06, "loss": 0.09896240234375, "step": 106275 }, { "epoch": 0.918971733923615, "grad_norm": 18.526090298497653, "learning_rate": 3.3808065889423903e-06, "loss": 0.16208133697509766, "step": 106280 }, { "epoch": 0.9190149674451583, "grad_norm": 6.674785359151043, "learning_rate": 3.3806045028977587e-06, "loss": 0.09519424438476562, "step": 106285 }, { "epoch": 0.9190582009667015, "grad_norm": 24.873954990014806, "learning_rate": 3.380402415097814e-06, "loss": 0.15899658203125, "step": 106290 }, { "epoch": 0.9191014344882448, "grad_norm": 32.91146661873785, "learning_rate": 3.3802003255434865e-06, "loss": 0.19812088012695311, "step": 106295 }, { "epoch": 0.9191446680097881, "grad_norm": 6.355507932707047, "learning_rate": 3.379998234235708e-06, "loss": 0.22122955322265625, "step": 106300 }, { "epoch": 0.9191879015313313, "grad_norm": 31.718883401009062, "learning_rate": 3.3797961411754117e-06, "loss": 0.30628128051757814, "step": 106305 }, { "epoch": 0.9192311350528746, "grad_norm": 0.7219270535152378, "learning_rate": 3.379594046363529e-06, "loss": 0.0447357177734375, "step": 106310 }, { "epoch": 0.9192743685744179, "grad_norm": 33.19308573619236, "learning_rate": 3.379391949800992e-06, "loss": 0.1135894775390625, "step": 106315 }, { "epoch": 0.9193176020959611, "grad_norm": 10.926850324362622, "learning_rate": 3.3791898514887337e-06, "loss": 0.3530719757080078, "step": 106320 }, { "epoch": 0.9193608356175044, "grad_norm": 0.12739488221338285, "learning_rate": 3.3789877514276846e-06, "loss": 0.09336090087890625, "step": 106325 }, { "epoch": 0.9194040691390476, "grad_norm": 0.6354080865496079, "learning_rate": 3.3787856496187776e-06, "loss": 0.2117828369140625, "step": 106330 }, { "epoch": 0.9194473026605909, "grad_norm": 10.351282771615834, "learning_rate": 3.3785835460629443e-06, "loss": 0.08013992309570313, "step": 106335 }, { "epoch": 0.9194905361821342, "grad_norm": 22.266464623461253, "learning_rate": 3.3783814407611173e-06, "loss": 0.20561599731445312, "step": 106340 }, { "epoch": 0.9195337697036774, "grad_norm": 24.291280969243168, "learning_rate": 3.378179333714229e-06, "loss": 0.20460968017578124, "step": 106345 }, { "epoch": 0.9195770032252207, "grad_norm": 13.655185314027555, "learning_rate": 3.3779772249232105e-06, "loss": 0.07589454650878906, "step": 106350 }, { "epoch": 0.919620236746764, "grad_norm": 0.8225678094520907, "learning_rate": 3.377775114388995e-06, "loss": 0.222393798828125, "step": 106355 }, { "epoch": 0.9196634702683072, "grad_norm": 2.228708604429135, "learning_rate": 3.3775730021125137e-06, "loss": 0.08650722503662109, "step": 106360 }, { "epoch": 0.9197067037898505, "grad_norm": 0.5451348101362901, "learning_rate": 3.3773708880946986e-06, "loss": 0.05003814697265625, "step": 106365 }, { "epoch": 0.9197499373113938, "grad_norm": 0.8752978768283931, "learning_rate": 3.3771687723364837e-06, "loss": 0.04582366943359375, "step": 106370 }, { "epoch": 0.919793170832937, "grad_norm": 20.048557640728266, "learning_rate": 3.3769666548387985e-06, "loss": 0.164154052734375, "step": 106375 }, { "epoch": 0.9198364043544803, "grad_norm": 0.9531675650692913, "learning_rate": 3.3767645356025765e-06, "loss": 0.028498458862304687, "step": 106380 }, { "epoch": 0.9198796378760236, "grad_norm": 0.18050923662750024, "learning_rate": 3.3765624146287507e-06, "loss": 0.2723842620849609, "step": 106385 }, { "epoch": 0.9199228713975668, "grad_norm": 1.7631811380438054, "learning_rate": 3.376360291918251e-06, "loss": 0.017425537109375, "step": 106390 }, { "epoch": 0.9199661049191101, "grad_norm": 5.436557244396602, "learning_rate": 3.376158167472011e-06, "loss": 0.0817108154296875, "step": 106395 }, { "epoch": 0.9200093384406534, "grad_norm": 48.00259443180208, "learning_rate": 3.375956041290964e-06, "loss": 0.10057525634765625, "step": 106400 }, { "epoch": 0.9200525719621966, "grad_norm": 4.82021143211363, "learning_rate": 3.3757539133760402e-06, "loss": 0.272607421875, "step": 106405 }, { "epoch": 0.9200958054837399, "grad_norm": 9.39207663473748, "learning_rate": 3.3755517837281722e-06, "loss": 0.1945209503173828, "step": 106410 }, { "epoch": 0.9201390390052832, "grad_norm": 5.502904440083629, "learning_rate": 3.375349652348294e-06, "loss": 0.17153053283691405, "step": 106415 }, { "epoch": 0.9201822725268264, "grad_norm": 0.2851842713714626, "learning_rate": 3.3751475192373346e-06, "loss": 0.3018951416015625, "step": 106420 }, { "epoch": 0.9202255060483696, "grad_norm": 4.3709602532764915, "learning_rate": 3.3749453843962284e-06, "loss": 0.17541275024414063, "step": 106425 }, { "epoch": 0.920268739569913, "grad_norm": 0.14141655646919432, "learning_rate": 3.374743247825908e-06, "loss": 0.018495941162109376, "step": 106430 }, { "epoch": 0.9203119730914562, "grad_norm": 40.943518049119284, "learning_rate": 3.3745411095273043e-06, "loss": 0.5451629638671875, "step": 106435 }, { "epoch": 0.9203552066129994, "grad_norm": 0.6509063313192744, "learning_rate": 3.3743389695013506e-06, "loss": 0.1542449951171875, "step": 106440 }, { "epoch": 0.9203984401345428, "grad_norm": 0.5649379079945214, "learning_rate": 3.3741368277489778e-06, "loss": 0.019347381591796876, "step": 106445 }, { "epoch": 0.920441673656086, "grad_norm": 4.833755318487319, "learning_rate": 3.373934684271119e-06, "loss": 0.08656539916992187, "step": 106450 }, { "epoch": 0.9204849071776292, "grad_norm": 1.3331860888792506, "learning_rate": 3.373732539068708e-06, "loss": 0.0477020263671875, "step": 106455 }, { "epoch": 0.9205281406991725, "grad_norm": 46.90680833241083, "learning_rate": 3.3735303921426735e-06, "loss": 0.6853965759277344, "step": 106460 }, { "epoch": 0.9205713742207158, "grad_norm": 29.592922148878536, "learning_rate": 3.373328243493951e-06, "loss": 0.3127758026123047, "step": 106465 }, { "epoch": 0.920614607742259, "grad_norm": 0.7709218728690064, "learning_rate": 3.373126093123472e-06, "loss": 0.015027618408203125, "step": 106470 }, { "epoch": 0.9206578412638023, "grad_norm": 43.391403597025125, "learning_rate": 3.3729239410321676e-06, "loss": 0.33983097076416013, "step": 106475 }, { "epoch": 0.9207010747853456, "grad_norm": 4.070944028691771, "learning_rate": 3.372721787220971e-06, "loss": 0.34038543701171875, "step": 106480 }, { "epoch": 0.9207443083068888, "grad_norm": 16.878768136819993, "learning_rate": 3.3725196316908147e-06, "loss": 0.12689056396484374, "step": 106485 }, { "epoch": 0.9207875418284321, "grad_norm": 19.313139265892957, "learning_rate": 3.3723174744426306e-06, "loss": 0.08548860549926758, "step": 106490 }, { "epoch": 0.9208307753499754, "grad_norm": 74.0690307245934, "learning_rate": 3.3721153154773514e-06, "loss": 0.10216751098632812, "step": 106495 }, { "epoch": 0.9208740088715186, "grad_norm": 1.1684866401689695, "learning_rate": 3.3719131547959098e-06, "loss": 0.187542724609375, "step": 106500 }, { "epoch": 0.9209172423930618, "grad_norm": 6.1342224342623135, "learning_rate": 3.3717109923992366e-06, "loss": 0.29857940673828126, "step": 106505 }, { "epoch": 0.9209604759146052, "grad_norm": 16.52807520973658, "learning_rate": 3.371508828288265e-06, "loss": 0.11440582275390625, "step": 106510 }, { "epoch": 0.9210037094361484, "grad_norm": 4.290601626906495, "learning_rate": 3.3713066624639285e-06, "loss": 0.054864501953125, "step": 106515 }, { "epoch": 0.9210469429576916, "grad_norm": 30.222391561507983, "learning_rate": 3.3711044949271582e-06, "loss": 0.11351585388183594, "step": 106520 }, { "epoch": 0.921090176479235, "grad_norm": 9.059958126041195, "learning_rate": 3.370902325678886e-06, "loss": 0.16309051513671874, "step": 106525 }, { "epoch": 0.9211334100007782, "grad_norm": 39.24835440860065, "learning_rate": 3.3707001547200455e-06, "loss": 0.199432373046875, "step": 106530 }, { "epoch": 0.9211766435223214, "grad_norm": 0.3024167234237827, "learning_rate": 3.370497982051569e-06, "loss": 0.02482757568359375, "step": 106535 }, { "epoch": 0.9212198770438648, "grad_norm": 24.720711163071016, "learning_rate": 3.370295807674388e-06, "loss": 0.27612991333007814, "step": 106540 }, { "epoch": 0.921263110565408, "grad_norm": 23.17226948536282, "learning_rate": 3.3700936315894363e-06, "loss": 0.08492355346679688, "step": 106545 }, { "epoch": 0.9213063440869512, "grad_norm": 13.425706524435558, "learning_rate": 3.3698914537976454e-06, "loss": 0.15981884002685548, "step": 106550 }, { "epoch": 0.9213495776084946, "grad_norm": 11.62564790578213, "learning_rate": 3.3696892742999466e-06, "loss": 0.1448822021484375, "step": 106555 }, { "epoch": 0.9213928111300378, "grad_norm": 12.21526578614958, "learning_rate": 3.3694870930972743e-06, "loss": 0.1349273681640625, "step": 106560 }, { "epoch": 0.921436044651581, "grad_norm": 1.9565211187718485, "learning_rate": 3.36928491019056e-06, "loss": 0.04227581024169922, "step": 106565 }, { "epoch": 0.9214792781731244, "grad_norm": 45.827105873205525, "learning_rate": 3.3690827255807364e-06, "loss": 0.43996734619140626, "step": 106570 }, { "epoch": 0.9215225116946676, "grad_norm": 17.681065910791332, "learning_rate": 3.3688805392687363e-06, "loss": 0.034946441650390625, "step": 106575 }, { "epoch": 0.9215657452162108, "grad_norm": 14.506679822576755, "learning_rate": 3.3686783512554918e-06, "loss": 0.17444305419921874, "step": 106580 }, { "epoch": 0.9216089787377542, "grad_norm": 0.07522214321892383, "learning_rate": 3.3684761615419345e-06, "loss": 0.03621406555175781, "step": 106585 }, { "epoch": 0.9216522122592974, "grad_norm": 5.339836956734435, "learning_rate": 3.3682739701289974e-06, "loss": 0.10560455322265624, "step": 106590 }, { "epoch": 0.9216954457808406, "grad_norm": 0.2230742535217041, "learning_rate": 3.368071777017615e-06, "loss": 0.08022117614746094, "step": 106595 }, { "epoch": 0.9217386793023838, "grad_norm": 0.6751669875910655, "learning_rate": 3.367869582208717e-06, "loss": 0.15668182373046874, "step": 106600 }, { "epoch": 0.9217819128239272, "grad_norm": 7.357655385641153, "learning_rate": 3.367667385703237e-06, "loss": 0.10226402282714844, "step": 106605 }, { "epoch": 0.9218251463454704, "grad_norm": 4.0744183735296895, "learning_rate": 3.367465187502108e-06, "loss": 0.30927581787109376, "step": 106610 }, { "epoch": 0.9218683798670136, "grad_norm": 1.1377103752046767, "learning_rate": 3.367262987606261e-06, "loss": 0.11388206481933594, "step": 106615 }, { "epoch": 0.921911613388557, "grad_norm": 5.665249740513407, "learning_rate": 3.36706078601663e-06, "loss": 0.0994659423828125, "step": 106620 }, { "epoch": 0.9219548469101002, "grad_norm": 3.5796670633044454, "learning_rate": 3.3668585827341474e-06, "loss": 0.050574111938476565, "step": 106625 }, { "epoch": 0.9219980804316434, "grad_norm": 2.449992696548071, "learning_rate": 3.3666563777597457e-06, "loss": 0.3144866943359375, "step": 106630 }, { "epoch": 0.9220413139531868, "grad_norm": 0.49255275319329567, "learning_rate": 3.3664541710943567e-06, "loss": 0.06649246215820312, "step": 106635 }, { "epoch": 0.92208454747473, "grad_norm": 0.1819038624168325, "learning_rate": 3.3662519627389133e-06, "loss": 0.14930877685546876, "step": 106640 }, { "epoch": 0.9221277809962732, "grad_norm": 1.3038738013384947, "learning_rate": 3.3660497526943486e-06, "loss": 0.12586135864257814, "step": 106645 }, { "epoch": 0.9221710145178166, "grad_norm": 4.289258042989628, "learning_rate": 3.3658475409615947e-06, "loss": 0.022379302978515626, "step": 106650 }, { "epoch": 0.9222142480393598, "grad_norm": 26.017276658355826, "learning_rate": 3.3656453275415838e-06, "loss": 0.42218017578125, "step": 106655 }, { "epoch": 0.922257481560903, "grad_norm": 5.753791611095678, "learning_rate": 3.3654431124352497e-06, "loss": 0.047591781616210936, "step": 106660 }, { "epoch": 0.9223007150824464, "grad_norm": 3.8947284408117167, "learning_rate": 3.3652408956435233e-06, "loss": 0.0742532730102539, "step": 106665 }, { "epoch": 0.9223439486039896, "grad_norm": 0.3755956661720871, "learning_rate": 3.3650386771673394e-06, "loss": 0.059619140625, "step": 106670 }, { "epoch": 0.9223871821255328, "grad_norm": 27.589935552555414, "learning_rate": 3.3648364570076288e-06, "loss": 0.116094970703125, "step": 106675 }, { "epoch": 0.9224304156470761, "grad_norm": 1.4838222691706853, "learning_rate": 3.3646342351653245e-06, "loss": 0.038100814819335936, "step": 106680 }, { "epoch": 0.9224736491686194, "grad_norm": 21.149574355787664, "learning_rate": 3.3644320116413595e-06, "loss": 0.0931121826171875, "step": 106685 }, { "epoch": 0.9225168826901626, "grad_norm": 2.7963613572968327, "learning_rate": 3.3642297864366664e-06, "loss": 0.184228515625, "step": 106690 }, { "epoch": 0.9225601162117059, "grad_norm": 1.1553458024644807, "learning_rate": 3.364027559552178e-06, "loss": 0.38658962249755857, "step": 106695 }, { "epoch": 0.9226033497332492, "grad_norm": 10.751068913975532, "learning_rate": 3.363825330988826e-06, "loss": 0.4778289794921875, "step": 106700 }, { "epoch": 0.9226465832547924, "grad_norm": 1.576848764889923, "learning_rate": 3.3636231007475436e-06, "loss": 0.0334503173828125, "step": 106705 }, { "epoch": 0.9226898167763357, "grad_norm": 0.3182704602181321, "learning_rate": 3.363420868829264e-06, "loss": 0.0362091064453125, "step": 106710 }, { "epoch": 0.922733050297879, "grad_norm": 9.307050964103647, "learning_rate": 3.3632186352349193e-06, "loss": 0.08947744369506835, "step": 106715 }, { "epoch": 0.9227762838194222, "grad_norm": 33.12229379808931, "learning_rate": 3.3630163999654423e-06, "loss": 0.39900970458984375, "step": 106720 }, { "epoch": 0.9228195173409655, "grad_norm": 24.825295309753336, "learning_rate": 3.3628141630217662e-06, "loss": 0.301617431640625, "step": 106725 }, { "epoch": 0.9228627508625088, "grad_norm": 0.53430497899059, "learning_rate": 3.3626119244048223e-06, "loss": 0.20172119140625, "step": 106730 }, { "epoch": 0.922905984384052, "grad_norm": 1.2981165799163277, "learning_rate": 3.3624096841155448e-06, "loss": 0.01843547821044922, "step": 106735 }, { "epoch": 0.9229492179055953, "grad_norm": 5.76482687023903, "learning_rate": 3.3622074421548667e-06, "loss": 0.08892669677734374, "step": 106740 }, { "epoch": 0.9229924514271386, "grad_norm": 2.4419691381099295, "learning_rate": 3.3620051985237185e-06, "loss": 0.22296562194824218, "step": 106745 }, { "epoch": 0.9230356849486818, "grad_norm": 4.330584114574907, "learning_rate": 3.361802953223035e-06, "loss": 0.24581222534179686, "step": 106750 }, { "epoch": 0.923078918470225, "grad_norm": 6.194482722148359, "learning_rate": 3.3616007062537477e-06, "loss": 0.20977935791015626, "step": 106755 }, { "epoch": 0.9231221519917684, "grad_norm": 13.842407454377904, "learning_rate": 3.3613984576167903e-06, "loss": 0.22724876403808594, "step": 106760 }, { "epoch": 0.9231653855133116, "grad_norm": 19.921390440019742, "learning_rate": 3.3611962073130956e-06, "loss": 0.233074951171875, "step": 106765 }, { "epoch": 0.9232086190348548, "grad_norm": 0.23195181480805963, "learning_rate": 3.3609939553435954e-06, "loss": 0.056043243408203124, "step": 106770 }, { "epoch": 0.9232518525563981, "grad_norm": 3.495464858245596, "learning_rate": 3.3607917017092236e-06, "loss": 0.07469406127929687, "step": 106775 }, { "epoch": 0.9232950860779414, "grad_norm": 6.768063782047886, "learning_rate": 3.360589446410912e-06, "loss": 0.09925079345703125, "step": 106780 }, { "epoch": 0.9233383195994846, "grad_norm": 0.08032875922730111, "learning_rate": 3.3603871894495933e-06, "loss": 0.12543563842773436, "step": 106785 }, { "epoch": 0.9233815531210279, "grad_norm": 12.02141810203963, "learning_rate": 3.360184930826201e-06, "loss": 0.09012069702148437, "step": 106790 }, { "epoch": 0.9234247866425712, "grad_norm": 27.239857295006775, "learning_rate": 3.3599826705416675e-06, "loss": 0.1930694580078125, "step": 106795 }, { "epoch": 0.9234680201641144, "grad_norm": 1.6215733934403491, "learning_rate": 3.359780408596926e-06, "loss": 0.14012641906738282, "step": 106800 }, { "epoch": 0.9235112536856577, "grad_norm": 6.6156227603359286, "learning_rate": 3.359578144992909e-06, "loss": 0.22992324829101562, "step": 106805 }, { "epoch": 0.923554487207201, "grad_norm": 1.7856285255710242, "learning_rate": 3.359375879730549e-06, "loss": 0.17357635498046875, "step": 106810 }, { "epoch": 0.9235977207287442, "grad_norm": 29.707391667195274, "learning_rate": 3.359173612810779e-06, "loss": 0.09918670654296875, "step": 106815 }, { "epoch": 0.9236409542502875, "grad_norm": 8.691169099561888, "learning_rate": 3.3589713442345326e-06, "loss": 0.31416854858398435, "step": 106820 }, { "epoch": 0.9236841877718308, "grad_norm": 1.920533238910717, "learning_rate": 3.358769074002742e-06, "loss": 0.347137451171875, "step": 106825 }, { "epoch": 0.923727421293374, "grad_norm": 4.844461239061457, "learning_rate": 3.3585668021163404e-06, "loss": 0.18105621337890626, "step": 106830 }, { "epoch": 0.9237706548149173, "grad_norm": 2.405542997043811, "learning_rate": 3.35836452857626e-06, "loss": 0.052652740478515626, "step": 106835 }, { "epoch": 0.9238138883364606, "grad_norm": 0.08178182540572343, "learning_rate": 3.358162253383434e-06, "loss": 0.24133720397949218, "step": 106840 }, { "epoch": 0.9238571218580038, "grad_norm": 21.63852150679662, "learning_rate": 3.3579599765387955e-06, "loss": 0.18701629638671874, "step": 106845 }, { "epoch": 0.9239003553795471, "grad_norm": 0.8534978650417517, "learning_rate": 3.357757698043278e-06, "loss": 0.11537628173828125, "step": 106850 }, { "epoch": 0.9239435889010903, "grad_norm": 0.7543188941807545, "learning_rate": 3.3575554178978128e-06, "loss": 0.144580078125, "step": 106855 }, { "epoch": 0.9239868224226336, "grad_norm": 5.604336187141335, "learning_rate": 3.3573531361033334e-06, "loss": 0.10519676208496094, "step": 106860 }, { "epoch": 0.9240300559441769, "grad_norm": 1.2706552218034814, "learning_rate": 3.357150852660773e-06, "loss": 0.06363201141357422, "step": 106865 }, { "epoch": 0.9240732894657201, "grad_norm": 16.64006301418791, "learning_rate": 3.356948567571064e-06, "loss": 0.06329574584960937, "step": 106870 }, { "epoch": 0.9241165229872634, "grad_norm": 6.277612558642778, "learning_rate": 3.3567462808351403e-06, "loss": 0.25362720489501955, "step": 106875 }, { "epoch": 0.9241597565088067, "grad_norm": 66.28418221735615, "learning_rate": 3.3565439924539346e-06, "loss": 0.3389373779296875, "step": 106880 }, { "epoch": 0.9242029900303499, "grad_norm": 4.214889896872108, "learning_rate": 3.3563417024283795e-06, "loss": 0.18184814453125, "step": 106885 }, { "epoch": 0.9242462235518932, "grad_norm": 1.885449903109106, "learning_rate": 3.3561394107594073e-06, "loss": 0.2330322265625, "step": 106890 }, { "epoch": 0.9242894570734365, "grad_norm": 0.179380265845261, "learning_rate": 3.3559371174479527e-06, "loss": 0.06194915771484375, "step": 106895 }, { "epoch": 0.9243326905949797, "grad_norm": 1.792514767277828, "learning_rate": 3.3557348224949466e-06, "loss": 0.1193878173828125, "step": 106900 }, { "epoch": 0.924375924116523, "grad_norm": 13.974445000769082, "learning_rate": 3.3555325259013227e-06, "loss": 0.21485214233398436, "step": 106905 }, { "epoch": 0.9244191576380663, "grad_norm": 5.155107447158649, "learning_rate": 3.355330227668015e-06, "loss": 0.18851318359375, "step": 106910 }, { "epoch": 0.9244623911596095, "grad_norm": 33.308976535101465, "learning_rate": 3.3551279277959556e-06, "loss": 0.32047595977783205, "step": 106915 }, { "epoch": 0.9245056246811528, "grad_norm": 2.4319993281384558, "learning_rate": 3.3549256262860767e-06, "loss": 0.2963226318359375, "step": 106920 }, { "epoch": 0.9245488582026961, "grad_norm": 51.29560395510108, "learning_rate": 3.3547233231393135e-06, "loss": 0.15996246337890624, "step": 106925 }, { "epoch": 0.9245920917242393, "grad_norm": 4.468402822717809, "learning_rate": 3.3545210183565962e-06, "loss": 0.42558975219726564, "step": 106930 }, { "epoch": 0.9246353252457826, "grad_norm": 43.98465095781009, "learning_rate": 3.3543187119388602e-06, "loss": 0.3604438781738281, "step": 106935 }, { "epoch": 0.9246785587673259, "grad_norm": 2.357125544014361, "learning_rate": 3.3541164038870376e-06, "loss": 0.13243560791015624, "step": 106940 }, { "epoch": 0.9247217922888691, "grad_norm": 0.4819164917963191, "learning_rate": 3.3539140942020617e-06, "loss": 0.15643081665039063, "step": 106945 }, { "epoch": 0.9247650258104123, "grad_norm": 0.10372478823482031, "learning_rate": 3.3537117828848648e-06, "loss": 0.01753692626953125, "step": 106950 }, { "epoch": 0.9248082593319557, "grad_norm": 14.044697846169942, "learning_rate": 3.3535094699363794e-06, "loss": 0.1300588607788086, "step": 106955 }, { "epoch": 0.9248514928534989, "grad_norm": 12.804384109589712, "learning_rate": 3.3533071553575415e-06, "loss": 0.5838336944580078, "step": 106960 }, { "epoch": 0.9248947263750421, "grad_norm": 18.633698043180907, "learning_rate": 3.353104839149282e-06, "loss": 0.10918731689453125, "step": 106965 }, { "epoch": 0.9249379598965854, "grad_norm": 16.50016185607239, "learning_rate": 3.3529025213125326e-06, "loss": 0.115374755859375, "step": 106970 }, { "epoch": 0.9249811934181287, "grad_norm": 0.40841302766595566, "learning_rate": 3.3527002018482293e-06, "loss": 0.04918975830078125, "step": 106975 }, { "epoch": 0.9250244269396719, "grad_norm": 2.7014145135587984, "learning_rate": 3.3524978807573034e-06, "loss": 0.1014434814453125, "step": 106980 }, { "epoch": 0.9250676604612152, "grad_norm": 4.118044754599754, "learning_rate": 3.3522955580406883e-06, "loss": 0.19900741577148437, "step": 106985 }, { "epoch": 0.9251108939827585, "grad_norm": 0.6461226030833119, "learning_rate": 3.3520932336993176e-06, "loss": 0.07050971984863282, "step": 106990 }, { "epoch": 0.9251541275043017, "grad_norm": 0.8841620240359543, "learning_rate": 3.3518909077341237e-06, "loss": 0.240875244140625, "step": 106995 }, { "epoch": 0.925197361025845, "grad_norm": 0.04638824907526806, "learning_rate": 3.35168858014604e-06, "loss": 0.24391326904296876, "step": 107000 }, { "epoch": 0.9252405945473883, "grad_norm": 6.740850849522922, "learning_rate": 3.351486250936e-06, "loss": 0.06783905029296874, "step": 107005 }, { "epoch": 0.9252838280689315, "grad_norm": 44.69507197387724, "learning_rate": 3.351283920104936e-06, "loss": 0.11315078735351562, "step": 107010 }, { "epoch": 0.9253270615904748, "grad_norm": 6.174894739070488, "learning_rate": 3.3510815876537817e-06, "loss": 0.08253021240234375, "step": 107015 }, { "epoch": 0.9253702951120181, "grad_norm": 6.944469173189599, "learning_rate": 3.3508792535834706e-06, "loss": 0.15269775390625, "step": 107020 }, { "epoch": 0.9254135286335613, "grad_norm": 20.8625279755209, "learning_rate": 3.3506769178949356e-06, "loss": 0.1451202392578125, "step": 107025 }, { "epoch": 0.9254567621551045, "grad_norm": 1.0253507174987924, "learning_rate": 3.3504745805891094e-06, "loss": 0.19972553253173828, "step": 107030 }, { "epoch": 0.9254999956766479, "grad_norm": 9.990904716304838, "learning_rate": 3.350272241666925e-06, "loss": 0.026047134399414064, "step": 107035 }, { "epoch": 0.9255432291981911, "grad_norm": 0.6078289026517147, "learning_rate": 3.350069901129315e-06, "loss": 0.2991523742675781, "step": 107040 }, { "epoch": 0.9255864627197343, "grad_norm": 2.614514264781669, "learning_rate": 3.3498675589772153e-06, "loss": 0.09706134796142578, "step": 107045 }, { "epoch": 0.9256296962412777, "grad_norm": 0.6332851869289566, "learning_rate": 3.349665215211556e-06, "loss": 0.249639892578125, "step": 107050 }, { "epoch": 0.9256729297628209, "grad_norm": 32.90857203000553, "learning_rate": 3.349462869833273e-06, "loss": 0.40755767822265626, "step": 107055 }, { "epoch": 0.9257161632843641, "grad_norm": 4.048843541453337, "learning_rate": 3.349260522843297e-06, "loss": 0.2576881408691406, "step": 107060 }, { "epoch": 0.9257593968059075, "grad_norm": 3.108495769654481, "learning_rate": 3.3490581742425626e-06, "loss": 0.16834259033203125, "step": 107065 }, { "epoch": 0.9258026303274507, "grad_norm": 6.606119966013662, "learning_rate": 3.3488558240320027e-06, "loss": 0.1906768798828125, "step": 107070 }, { "epoch": 0.9258458638489939, "grad_norm": 19.366142398282946, "learning_rate": 3.3486534722125515e-06, "loss": 0.09279918670654297, "step": 107075 }, { "epoch": 0.9258890973705373, "grad_norm": 10.975548684563647, "learning_rate": 3.34845111878514e-06, "loss": 0.1636219024658203, "step": 107080 }, { "epoch": 0.9259323308920805, "grad_norm": 0.10352620286515285, "learning_rate": 3.3482487637507036e-06, "loss": 0.1259521484375, "step": 107085 }, { "epoch": 0.9259755644136237, "grad_norm": 1.6825399777330816, "learning_rate": 3.3480464071101743e-06, "loss": 0.6514518737792969, "step": 107090 }, { "epoch": 0.9260187979351671, "grad_norm": 32.66415026556592, "learning_rate": 3.3478440488644857e-06, "loss": 0.29118804931640624, "step": 107095 }, { "epoch": 0.9260620314567103, "grad_norm": 19.61294643359345, "learning_rate": 3.347641689014571e-06, "loss": 0.140771484375, "step": 107100 }, { "epoch": 0.9261052649782535, "grad_norm": 0.7143293213258266, "learning_rate": 3.347439327561364e-06, "loss": 0.05213470458984375, "step": 107105 }, { "epoch": 0.9261484984997969, "grad_norm": 22.143864922786065, "learning_rate": 3.3472369645057966e-06, "loss": 0.286614990234375, "step": 107110 }, { "epoch": 0.9261917320213401, "grad_norm": 0.9237299410817452, "learning_rate": 3.347034599848804e-06, "loss": 0.20161857604980468, "step": 107115 }, { "epoch": 0.9262349655428833, "grad_norm": 23.277897036916393, "learning_rate": 3.3468322335913186e-06, "loss": 0.10588226318359376, "step": 107120 }, { "epoch": 0.9262781990644265, "grad_norm": 55.19941261625083, "learning_rate": 3.346629865734273e-06, "loss": 0.16158447265625, "step": 107125 }, { "epoch": 0.9263214325859699, "grad_norm": 13.750224007596874, "learning_rate": 3.3464274962786006e-06, "loss": 0.09407272338867187, "step": 107130 }, { "epoch": 0.9263646661075131, "grad_norm": 0.621541324775178, "learning_rate": 3.3462251252252365e-06, "loss": 0.31011810302734377, "step": 107135 }, { "epoch": 0.9264078996290563, "grad_norm": 5.729424768776348, "learning_rate": 3.346022752575112e-06, "loss": 0.05569000244140625, "step": 107140 }, { "epoch": 0.9264511331505997, "grad_norm": 13.331447616611085, "learning_rate": 3.3458203783291606e-06, "loss": 0.0966461181640625, "step": 107145 }, { "epoch": 0.9264943666721429, "grad_norm": 5.186740766973896, "learning_rate": 3.3456180024883175e-06, "loss": 0.1626220703125, "step": 107150 }, { "epoch": 0.9265376001936861, "grad_norm": 4.440223858637013, "learning_rate": 3.345415625053513e-06, "loss": 0.10932769775390624, "step": 107155 }, { "epoch": 0.9265808337152295, "grad_norm": 2.8907679562009054, "learning_rate": 3.345213246025683e-06, "loss": 0.25048370361328126, "step": 107160 }, { "epoch": 0.9266240672367727, "grad_norm": 6.7241409054965615, "learning_rate": 3.3450108654057606e-06, "loss": 0.1641571044921875, "step": 107165 }, { "epoch": 0.9266673007583159, "grad_norm": 7.46633982859314, "learning_rate": 3.344808483194678e-06, "loss": 0.17026100158691407, "step": 107170 }, { "epoch": 0.9267105342798593, "grad_norm": 15.730770936121434, "learning_rate": 3.3446060993933692e-06, "loss": 0.1517608642578125, "step": 107175 }, { "epoch": 0.9267537678014025, "grad_norm": 2.0778896487878384, "learning_rate": 3.3444037140027675e-06, "loss": 0.12435760498046874, "step": 107180 }, { "epoch": 0.9267970013229457, "grad_norm": 28.65655013020746, "learning_rate": 3.3442013270238062e-06, "loss": 0.17628021240234376, "step": 107185 }, { "epoch": 0.9268402348444891, "grad_norm": 12.202352464319508, "learning_rate": 3.343998938457419e-06, "loss": 0.0860565185546875, "step": 107190 }, { "epoch": 0.9268834683660323, "grad_norm": 31.119955492992258, "learning_rate": 3.3437965483045395e-06, "loss": 0.3346595764160156, "step": 107195 }, { "epoch": 0.9269267018875755, "grad_norm": 2.7354678274838617, "learning_rate": 3.3435941565661e-06, "loss": 0.0618927001953125, "step": 107200 }, { "epoch": 0.9269699354091188, "grad_norm": 32.97092777978066, "learning_rate": 3.3433917632430344e-06, "loss": 0.17091522216796876, "step": 107205 }, { "epoch": 0.9270131689306621, "grad_norm": 12.267872790203432, "learning_rate": 3.3431893683362767e-06, "loss": 0.473175048828125, "step": 107210 }, { "epoch": 0.9270564024522053, "grad_norm": 3.697455735557134, "learning_rate": 3.34298697184676e-06, "loss": 0.098431396484375, "step": 107215 }, { "epoch": 0.9270996359737486, "grad_norm": 1.1273363488428945, "learning_rate": 3.3427845737754187e-06, "loss": 0.06401214599609376, "step": 107220 }, { "epoch": 0.9271428694952919, "grad_norm": 5.627360370012989, "learning_rate": 3.3425821741231835e-06, "loss": 0.10627593994140624, "step": 107225 }, { "epoch": 0.9271861030168351, "grad_norm": 49.33217965431821, "learning_rate": 3.3423797728909904e-06, "loss": 0.26915283203125, "step": 107230 }, { "epoch": 0.9272293365383784, "grad_norm": 1.0687731235143598, "learning_rate": 3.342177370079772e-06, "loss": 0.13808250427246094, "step": 107235 }, { "epoch": 0.9272725700599217, "grad_norm": 0.2180045559156766, "learning_rate": 3.3419749656904615e-06, "loss": 0.17671051025390624, "step": 107240 }, { "epoch": 0.9273158035814649, "grad_norm": 20.159232791099843, "learning_rate": 3.341772559723993e-06, "loss": 0.19174423217773437, "step": 107245 }, { "epoch": 0.9273590371030082, "grad_norm": 0.5865948372718028, "learning_rate": 3.3415701521813e-06, "loss": 0.108837890625, "step": 107250 }, { "epoch": 0.9274022706245515, "grad_norm": 1.2006549095888437, "learning_rate": 3.3413677430633147e-06, "loss": 0.06830368041992188, "step": 107255 }, { "epoch": 0.9274455041460947, "grad_norm": 12.3183243668497, "learning_rate": 3.341165332370973e-06, "loss": 0.10822200775146484, "step": 107260 }, { "epoch": 0.927488737667638, "grad_norm": 3.2288865752679166, "learning_rate": 3.3409629201052053e-06, "loss": 0.1690204620361328, "step": 107265 }, { "epoch": 0.9275319711891813, "grad_norm": 26.7580838337694, "learning_rate": 3.3407605062669475e-06, "loss": 0.1859161376953125, "step": 107270 }, { "epoch": 0.9275752047107245, "grad_norm": 13.846268678364178, "learning_rate": 3.3405580908571326e-06, "loss": 0.11989059448242187, "step": 107275 }, { "epoch": 0.9276184382322678, "grad_norm": 7.074468002902209, "learning_rate": 3.3403556738766933e-06, "loss": 0.074981689453125, "step": 107280 }, { "epoch": 0.9276616717538111, "grad_norm": 1.2038593103378268, "learning_rate": 3.340153255326564e-06, "loss": 0.06987991333007812, "step": 107285 }, { "epoch": 0.9277049052753543, "grad_norm": 0.31468983018993113, "learning_rate": 3.339950835207678e-06, "loss": 0.0553166389465332, "step": 107290 }, { "epoch": 0.9277481387968975, "grad_norm": 16.552962154010572, "learning_rate": 3.339748413520969e-06, "loss": 0.1457611083984375, "step": 107295 }, { "epoch": 0.9277913723184408, "grad_norm": 46.95090566402765, "learning_rate": 3.33954599026737e-06, "loss": 0.30403289794921873, "step": 107300 }, { "epoch": 0.9278346058399841, "grad_norm": 1.459864107012972, "learning_rate": 3.3393435654478148e-06, "loss": 0.18728713989257811, "step": 107305 }, { "epoch": 0.9278778393615273, "grad_norm": 9.126387861464542, "learning_rate": 3.3391411390632376e-06, "loss": 0.285308837890625, "step": 107310 }, { "epoch": 0.9279210728830706, "grad_norm": 19.42902651489989, "learning_rate": 3.338938711114571e-06, "loss": 0.1845916748046875, "step": 107315 }, { "epoch": 0.9279643064046139, "grad_norm": 11.179037412964972, "learning_rate": 3.3387362816027488e-06, "loss": 0.06179046630859375, "step": 107320 }, { "epoch": 0.9280075399261571, "grad_norm": 1.3900747707997338, "learning_rate": 3.338533850528705e-06, "loss": 0.03587493896484375, "step": 107325 }, { "epoch": 0.9280507734477004, "grad_norm": 0.9561671405275735, "learning_rate": 3.338331417893373e-06, "loss": 0.083636474609375, "step": 107330 }, { "epoch": 0.9280940069692437, "grad_norm": 0.5595391197331241, "learning_rate": 3.338128983697686e-06, "loss": 0.04319343566894531, "step": 107335 }, { "epoch": 0.9281372404907869, "grad_norm": 5.2909136801538885, "learning_rate": 3.3379265479425792e-06, "loss": 0.106561279296875, "step": 107340 }, { "epoch": 0.9281804740123302, "grad_norm": 24.18366493562595, "learning_rate": 3.337724110628985e-06, "loss": 0.10148563385009765, "step": 107345 }, { "epoch": 0.9282237075338735, "grad_norm": 21.21425801548835, "learning_rate": 3.337521671757836e-06, "loss": 0.13165740966796874, "step": 107350 }, { "epoch": 0.9282669410554167, "grad_norm": 3.216855307502887, "learning_rate": 3.3373192313300665e-06, "loss": 0.18717994689941406, "step": 107355 }, { "epoch": 0.92831017457696, "grad_norm": 7.9746890228721, "learning_rate": 3.3371167893466114e-06, "loss": 0.12578887939453126, "step": 107360 }, { "epoch": 0.9283534080985033, "grad_norm": 1.7640155149821861, "learning_rate": 3.3369143458084028e-06, "loss": 0.07428054809570313, "step": 107365 }, { "epoch": 0.9283966416200465, "grad_norm": 1.0050459440541988, "learning_rate": 3.336711900716376e-06, "loss": 0.06605682373046876, "step": 107370 }, { "epoch": 0.9284398751415898, "grad_norm": 4.827679807629159, "learning_rate": 3.3365094540714633e-06, "loss": 0.13719863891601564, "step": 107375 }, { "epoch": 0.928483108663133, "grad_norm": 14.282781836823027, "learning_rate": 3.336307005874598e-06, "loss": 0.2317535400390625, "step": 107380 }, { "epoch": 0.9285263421846763, "grad_norm": 8.899520482676808, "learning_rate": 3.336104556126715e-06, "loss": 0.18281326293945313, "step": 107385 }, { "epoch": 0.9285695757062196, "grad_norm": 10.207370828136385, "learning_rate": 3.3359021048287477e-06, "loss": 0.06132965087890625, "step": 107390 }, { "epoch": 0.9286128092277628, "grad_norm": 3.331661494702765, "learning_rate": 3.3356996519816305e-06, "loss": 0.045749664306640625, "step": 107395 }, { "epoch": 0.9286560427493061, "grad_norm": 30.072061233748034, "learning_rate": 3.335497197586294e-06, "loss": 0.18905181884765626, "step": 107400 }, { "epoch": 0.9286992762708494, "grad_norm": 3.5693862082188974, "learning_rate": 3.3352947416436747e-06, "loss": 0.1897979736328125, "step": 107405 }, { "epoch": 0.9287425097923926, "grad_norm": 8.506179871820118, "learning_rate": 3.335092284154707e-06, "loss": 0.07761383056640625, "step": 107410 }, { "epoch": 0.9287857433139359, "grad_norm": 1.2765577797773997, "learning_rate": 3.3348898251203217e-06, "loss": 0.09667320251464843, "step": 107415 }, { "epoch": 0.9288289768354792, "grad_norm": 9.650348427731746, "learning_rate": 3.3346873645414556e-06, "loss": 0.08423881530761719, "step": 107420 }, { "epoch": 0.9288722103570224, "grad_norm": 33.36881623608536, "learning_rate": 3.3344849024190404e-06, "loss": 0.13755569458007813, "step": 107425 }, { "epoch": 0.9289154438785657, "grad_norm": 1.5313119400671296, "learning_rate": 3.3342824387540097e-06, "loss": 0.09408340454101563, "step": 107430 }, { "epoch": 0.928958677400109, "grad_norm": 10.80781101976481, "learning_rate": 3.3340799735472985e-06, "loss": 0.3438240051269531, "step": 107435 }, { "epoch": 0.9290019109216522, "grad_norm": 7.547619516684864, "learning_rate": 3.33387750679984e-06, "loss": 0.08104286193847657, "step": 107440 }, { "epoch": 0.9290451444431955, "grad_norm": 1.333376883875095, "learning_rate": 3.3336750385125674e-06, "loss": 0.12411079406738282, "step": 107445 }, { "epoch": 0.9290883779647388, "grad_norm": 3.3826832941144094, "learning_rate": 3.333472568686416e-06, "loss": 0.0451568603515625, "step": 107450 }, { "epoch": 0.929131611486282, "grad_norm": 5.4150295714152445, "learning_rate": 3.333270097322318e-06, "loss": 0.05589599609375, "step": 107455 }, { "epoch": 0.9291748450078252, "grad_norm": 0.0384176167048437, "learning_rate": 3.3330676244212077e-06, "loss": 0.013462066650390625, "step": 107460 }, { "epoch": 0.9292180785293686, "grad_norm": 20.45969412020234, "learning_rate": 3.332865149984019e-06, "loss": 0.2276470184326172, "step": 107465 }, { "epoch": 0.9292613120509118, "grad_norm": 11.772973481124358, "learning_rate": 3.332662674011686e-06, "loss": 0.04655532836914063, "step": 107470 }, { "epoch": 0.929304545572455, "grad_norm": 2.505180013878137, "learning_rate": 3.332460196505142e-06, "loss": 0.04183807373046875, "step": 107475 }, { "epoch": 0.9293477790939983, "grad_norm": 6.260637713027597, "learning_rate": 3.3322577174653206e-06, "loss": 0.10339374542236328, "step": 107480 }, { "epoch": 0.9293910126155416, "grad_norm": 1.690695048787204, "learning_rate": 3.332055236893156e-06, "loss": 0.116717529296875, "step": 107485 }, { "epoch": 0.9294342461370848, "grad_norm": 0.49824683406755405, "learning_rate": 3.331852754789582e-06, "loss": 0.0900238037109375, "step": 107490 }, { "epoch": 0.9294774796586281, "grad_norm": 6.277298549384416, "learning_rate": 3.3316502711555317e-06, "loss": 0.11387939453125, "step": 107495 }, { "epoch": 0.9295207131801714, "grad_norm": 0.26526703443169614, "learning_rate": 3.331447785991941e-06, "loss": 0.10340023040771484, "step": 107500 }, { "epoch": 0.9295639467017146, "grad_norm": 16.022221806012215, "learning_rate": 3.331245299299742e-06, "loss": 0.16922607421875, "step": 107505 }, { "epoch": 0.929607180223258, "grad_norm": 0.6509548025203762, "learning_rate": 3.3310428110798688e-06, "loss": 0.06844024658203125, "step": 107510 }, { "epoch": 0.9296504137448012, "grad_norm": 40.66902644571573, "learning_rate": 3.3308403213332558e-06, "loss": 0.34660682678222654, "step": 107515 }, { "epoch": 0.9296936472663444, "grad_norm": 27.058294015513688, "learning_rate": 3.330637830060836e-06, "loss": 0.1381061553955078, "step": 107520 }, { "epoch": 0.9297368807878877, "grad_norm": 1.034963412998358, "learning_rate": 3.330435337263544e-06, "loss": 0.08315200805664062, "step": 107525 }, { "epoch": 0.929780114309431, "grad_norm": 1.0209829492565587, "learning_rate": 3.3302328429423134e-06, "loss": 0.014156723022460937, "step": 107530 }, { "epoch": 0.9298233478309742, "grad_norm": 2.234083827190959, "learning_rate": 3.3300303470980783e-06, "loss": 0.0966278076171875, "step": 107535 }, { "epoch": 0.9298665813525175, "grad_norm": 3.3437737948865487, "learning_rate": 3.3298278497317717e-06, "loss": 0.13304634094238282, "step": 107540 }, { "epoch": 0.9299098148740608, "grad_norm": 48.21480110347195, "learning_rate": 3.329625350844329e-06, "loss": 0.5599029541015625, "step": 107545 }, { "epoch": 0.929953048395604, "grad_norm": 40.61697081458917, "learning_rate": 3.3294228504366827e-06, "loss": 0.27198333740234376, "step": 107550 }, { "epoch": 0.9299962819171472, "grad_norm": 13.88658573053556, "learning_rate": 3.3292203485097677e-06, "loss": 0.2141193389892578, "step": 107555 }, { "epoch": 0.9300395154386906, "grad_norm": 40.81015183910947, "learning_rate": 3.3290178450645164e-06, "loss": 0.086761474609375, "step": 107560 }, { "epoch": 0.9300827489602338, "grad_norm": 14.069808841681047, "learning_rate": 3.3288153401018654e-06, "loss": 0.20257568359375, "step": 107565 }, { "epoch": 0.930125982481777, "grad_norm": 2.1296549904465683, "learning_rate": 3.328612833622747e-06, "loss": 0.102716064453125, "step": 107570 }, { "epoch": 0.9301692160033204, "grad_norm": 11.985197314146001, "learning_rate": 3.3284103256280943e-06, "loss": 0.25630950927734375, "step": 107575 }, { "epoch": 0.9302124495248636, "grad_norm": 11.804523774315618, "learning_rate": 3.3282078161188427e-06, "loss": 0.11672286987304688, "step": 107580 }, { "epoch": 0.9302556830464068, "grad_norm": 1.5253627543944952, "learning_rate": 3.328005305095926e-06, "loss": 0.015930747985839842, "step": 107585 }, { "epoch": 0.9302989165679502, "grad_norm": 8.625669125915625, "learning_rate": 3.327802792560277e-06, "loss": 0.07460479736328125, "step": 107590 }, { "epoch": 0.9303421500894934, "grad_norm": 18.976236720503113, "learning_rate": 3.3276002785128315e-06, "loss": 0.26028289794921877, "step": 107595 }, { "epoch": 0.9303853836110366, "grad_norm": 13.20008732080553, "learning_rate": 3.3273977629545213e-06, "loss": 0.08311233520507813, "step": 107600 }, { "epoch": 0.93042861713258, "grad_norm": 0.6197653157368166, "learning_rate": 3.327195245886282e-06, "loss": 0.33032989501953125, "step": 107605 }, { "epoch": 0.9304718506541232, "grad_norm": 0.8976098438500294, "learning_rate": 3.3269927273090472e-06, "loss": 0.1924114227294922, "step": 107610 }, { "epoch": 0.9305150841756664, "grad_norm": 4.20026010836049, "learning_rate": 3.3267902072237516e-06, "loss": 0.11459808349609375, "step": 107615 }, { "epoch": 0.9305583176972098, "grad_norm": 11.746397766158674, "learning_rate": 3.3265876856313273e-06, "loss": 0.0471099853515625, "step": 107620 }, { "epoch": 0.930601551218753, "grad_norm": 18.674861898072695, "learning_rate": 3.3263851625327107e-06, "loss": 0.3809722900390625, "step": 107625 }, { "epoch": 0.9306447847402962, "grad_norm": 62.039376239178665, "learning_rate": 3.3261826379288335e-06, "loss": 0.16687545776367188, "step": 107630 }, { "epoch": 0.9306880182618394, "grad_norm": 1.0216085992189887, "learning_rate": 3.325980111820631e-06, "loss": 0.07420272827148437, "step": 107635 }, { "epoch": 0.9307312517833828, "grad_norm": 0.5786038772455977, "learning_rate": 3.3257775842090367e-06, "loss": 0.008308792114257812, "step": 107640 }, { "epoch": 0.930774485304926, "grad_norm": 2.207385811640696, "learning_rate": 3.325575055094986e-06, "loss": 0.04029998779296875, "step": 107645 }, { "epoch": 0.9308177188264692, "grad_norm": 2.2812843489669676, "learning_rate": 3.3253725244794116e-06, "loss": 0.146734619140625, "step": 107650 }, { "epoch": 0.9308609523480126, "grad_norm": 0.48543572211716146, "learning_rate": 3.3251699923632466e-06, "loss": 0.21095428466796876, "step": 107655 }, { "epoch": 0.9309041858695558, "grad_norm": 2.092164419722221, "learning_rate": 3.3249674587474276e-06, "loss": 0.117315673828125, "step": 107660 }, { "epoch": 0.930947419391099, "grad_norm": 6.271730683147224, "learning_rate": 3.3247649236328876e-06, "loss": 0.051921844482421875, "step": 107665 }, { "epoch": 0.9309906529126424, "grad_norm": 30.006632818402785, "learning_rate": 3.324562387020559e-06, "loss": 0.23470497131347656, "step": 107670 }, { "epoch": 0.9310338864341856, "grad_norm": 44.23732427058935, "learning_rate": 3.324359848911379e-06, "loss": 0.27008590698242185, "step": 107675 }, { "epoch": 0.9310771199557288, "grad_norm": 13.6609526434562, "learning_rate": 3.3241573093062794e-06, "loss": 0.1140411376953125, "step": 107680 }, { "epoch": 0.9311203534772722, "grad_norm": 1.5857712000445396, "learning_rate": 3.3239547682061946e-06, "loss": 0.22466583251953126, "step": 107685 }, { "epoch": 0.9311635869988154, "grad_norm": 1.998001706728274, "learning_rate": 3.3237522256120593e-06, "loss": 0.047624969482421876, "step": 107690 }, { "epoch": 0.9312068205203586, "grad_norm": 3.3626044906785286, "learning_rate": 3.3235496815248074e-06, "loss": 0.037420654296875, "step": 107695 }, { "epoch": 0.931250054041902, "grad_norm": 0.4051589199426092, "learning_rate": 3.3233471359453726e-06, "loss": 0.201678466796875, "step": 107700 }, { "epoch": 0.9312932875634452, "grad_norm": 11.668815769243423, "learning_rate": 3.3231445888746902e-06, "loss": 0.10021514892578125, "step": 107705 }, { "epoch": 0.9313365210849884, "grad_norm": 7.472505505527645, "learning_rate": 3.3229420403136936e-06, "loss": 0.06809368133544921, "step": 107710 }, { "epoch": 0.9313797546065318, "grad_norm": 5.6703524046207505, "learning_rate": 3.3227394902633153e-06, "loss": 0.029288482666015626, "step": 107715 }, { "epoch": 0.931422988128075, "grad_norm": 2.0582141928755746, "learning_rate": 3.3225369387244917e-06, "loss": 0.13645172119140625, "step": 107720 }, { "epoch": 0.9314662216496182, "grad_norm": 33.89103705151937, "learning_rate": 3.322334385698157e-06, "loss": 0.15391082763671876, "step": 107725 }, { "epoch": 0.9315094551711615, "grad_norm": 2.085524123975029, "learning_rate": 3.3221318311852443e-06, "loss": 0.6345787048339844, "step": 107730 }, { "epoch": 0.9315526886927048, "grad_norm": 1.409513240068698, "learning_rate": 3.3219292751866872e-06, "loss": 0.06570358276367187, "step": 107735 }, { "epoch": 0.931595922214248, "grad_norm": 0.9741378938381416, "learning_rate": 3.3217267177034225e-06, "loss": 0.03124847412109375, "step": 107740 }, { "epoch": 0.9316391557357913, "grad_norm": 0.6142708117846415, "learning_rate": 3.321524158736381e-06, "loss": 0.04781036376953125, "step": 107745 }, { "epoch": 0.9316823892573346, "grad_norm": 4.576100184319282, "learning_rate": 3.321321598286498e-06, "loss": 0.08995094299316406, "step": 107750 }, { "epoch": 0.9317256227788778, "grad_norm": 27.3865346663107, "learning_rate": 3.3211190363547097e-06, "loss": 0.1674276351928711, "step": 107755 }, { "epoch": 0.9317688563004211, "grad_norm": 4.994218353762296, "learning_rate": 3.3209164729419484e-06, "loss": 0.1853759765625, "step": 107760 }, { "epoch": 0.9318120898219644, "grad_norm": 42.3412918150883, "learning_rate": 3.3207139080491485e-06, "loss": 0.7682746887207031, "step": 107765 }, { "epoch": 0.9318553233435076, "grad_norm": 32.951022967783565, "learning_rate": 3.320511341677245e-06, "loss": 0.0911590576171875, "step": 107770 }, { "epoch": 0.9318985568650509, "grad_norm": 13.208870506315556, "learning_rate": 3.32030877382717e-06, "loss": 0.21276168823242186, "step": 107775 }, { "epoch": 0.9319417903865942, "grad_norm": 1.1900313570795134, "learning_rate": 3.3201062044998602e-06, "loss": 0.13580894470214844, "step": 107780 }, { "epoch": 0.9319850239081374, "grad_norm": 2.3180748831219304, "learning_rate": 3.319903633696249e-06, "loss": 0.1444915771484375, "step": 107785 }, { "epoch": 0.9320282574296807, "grad_norm": 1.4723516459513917, "learning_rate": 3.31970106141727e-06, "loss": 0.04259757995605469, "step": 107790 }, { "epoch": 0.932071490951224, "grad_norm": 0.7817009799271809, "learning_rate": 3.3194984876638585e-06, "loss": 0.1010650634765625, "step": 107795 }, { "epoch": 0.9321147244727672, "grad_norm": 37.32424561356309, "learning_rate": 3.3192959124369476e-06, "loss": 0.2053150177001953, "step": 107800 }, { "epoch": 0.9321579579943104, "grad_norm": 0.5039039786282057, "learning_rate": 3.3190933357374732e-06, "loss": 0.00887603759765625, "step": 107805 }, { "epoch": 0.9322011915158537, "grad_norm": 0.11356349646292685, "learning_rate": 3.3188907575663677e-06, "loss": 0.2624725341796875, "step": 107810 }, { "epoch": 0.932244425037397, "grad_norm": 4.328490562905253, "learning_rate": 3.3186881779245664e-06, "loss": 0.20720672607421875, "step": 107815 }, { "epoch": 0.9322876585589402, "grad_norm": 1.1036415315699777, "learning_rate": 3.3184855968130033e-06, "loss": 0.099163818359375, "step": 107820 }, { "epoch": 0.9323308920804835, "grad_norm": 28.37115899648527, "learning_rate": 3.318283014232613e-06, "loss": 0.214007568359375, "step": 107825 }, { "epoch": 0.9323741256020268, "grad_norm": 0.9364481720929098, "learning_rate": 3.318080430184329e-06, "loss": 0.19007568359375, "step": 107830 }, { "epoch": 0.93241735912357, "grad_norm": 35.17474386436487, "learning_rate": 3.3178778446690867e-06, "loss": 0.1078094482421875, "step": 107835 }, { "epoch": 0.9324605926451133, "grad_norm": 26.127868308015966, "learning_rate": 3.31767525768782e-06, "loss": 0.27458934783935546, "step": 107840 }, { "epoch": 0.9325038261666566, "grad_norm": 10.447783775587405, "learning_rate": 3.3174726692414627e-06, "loss": 0.13796844482421874, "step": 107845 }, { "epoch": 0.9325470596881998, "grad_norm": 0.7876424034025563, "learning_rate": 3.31727007933095e-06, "loss": 0.06250534057617188, "step": 107850 }, { "epoch": 0.9325902932097431, "grad_norm": 7.170962605772694, "learning_rate": 3.3170674879572144e-06, "loss": 0.33351898193359375, "step": 107855 }, { "epoch": 0.9326335267312864, "grad_norm": 24.60221746343455, "learning_rate": 3.316864895121193e-06, "loss": 0.20001354217529296, "step": 107860 }, { "epoch": 0.9326767602528296, "grad_norm": 23.941498159351756, "learning_rate": 3.3166623008238183e-06, "loss": 0.1472076416015625, "step": 107865 }, { "epoch": 0.9327199937743729, "grad_norm": 6.974426842131093, "learning_rate": 3.316459705066025e-06, "loss": 0.20604019165039061, "step": 107870 }, { "epoch": 0.9327632272959162, "grad_norm": 3.39466599180369, "learning_rate": 3.316257107848747e-06, "loss": 0.1761810302734375, "step": 107875 }, { "epoch": 0.9328064608174594, "grad_norm": 4.572835879954099, "learning_rate": 3.31605450917292e-06, "loss": 0.042107009887695314, "step": 107880 }, { "epoch": 0.9328496943390027, "grad_norm": 0.6996026382350539, "learning_rate": 3.3158519090394766e-06, "loss": 0.09512481689453126, "step": 107885 }, { "epoch": 0.932892927860546, "grad_norm": 9.413288649867699, "learning_rate": 3.315649307449353e-06, "loss": 0.09565582275390624, "step": 107890 }, { "epoch": 0.9329361613820892, "grad_norm": 0.31849116616932965, "learning_rate": 3.315446704403482e-06, "loss": 0.08585205078125, "step": 107895 }, { "epoch": 0.9329793949036325, "grad_norm": 5.681796982245118, "learning_rate": 3.3152440999027995e-06, "loss": 0.037340545654296876, "step": 107900 }, { "epoch": 0.9330226284251757, "grad_norm": 1.7515521175233613, "learning_rate": 3.3150414939482386e-06, "loss": 0.09189605712890625, "step": 107905 }, { "epoch": 0.933065861946719, "grad_norm": 2.8233667645103306, "learning_rate": 3.3148388865407342e-06, "loss": 0.06548080444335938, "step": 107910 }, { "epoch": 0.9331090954682623, "grad_norm": 7.238028764190413, "learning_rate": 3.3146362776812205e-06, "loss": 0.13533935546875, "step": 107915 }, { "epoch": 0.9331523289898055, "grad_norm": 60.47207021451787, "learning_rate": 3.314433667370632e-06, "loss": 0.45842666625976564, "step": 107920 }, { "epoch": 0.9331955625113488, "grad_norm": 20.1551270170607, "learning_rate": 3.3142310556099036e-06, "loss": 0.16402130126953124, "step": 107925 }, { "epoch": 0.9332387960328921, "grad_norm": 5.4199768669064845, "learning_rate": 3.3140284423999694e-06, "loss": 0.09760017395019531, "step": 107930 }, { "epoch": 0.9332820295544353, "grad_norm": 1.3073446810873404, "learning_rate": 3.3138258277417642e-06, "loss": 0.0459503173828125, "step": 107935 }, { "epoch": 0.9333252630759786, "grad_norm": 47.16715383023159, "learning_rate": 3.313623211636221e-06, "loss": 0.3162109375, "step": 107940 }, { "epoch": 0.9333684965975219, "grad_norm": 1.011171489348933, "learning_rate": 3.313420594084275e-06, "loss": 0.005689811706542969, "step": 107945 }, { "epoch": 0.9334117301190651, "grad_norm": 3.6267713035459495, "learning_rate": 3.313217975086862e-06, "loss": 0.13712539672851562, "step": 107950 }, { "epoch": 0.9334549636406084, "grad_norm": 3.5104967934820706, "learning_rate": 3.3130153546449142e-06, "loss": 0.26140480041503905, "step": 107955 }, { "epoch": 0.9334981971621517, "grad_norm": 13.838243359758742, "learning_rate": 3.3128127327593683e-06, "loss": 0.058675384521484374, "step": 107960 }, { "epoch": 0.9335414306836949, "grad_norm": 55.95670018541039, "learning_rate": 3.312610109431158e-06, "loss": 0.480767822265625, "step": 107965 }, { "epoch": 0.9335846642052382, "grad_norm": 1.4940702055761856, "learning_rate": 3.3124074846612167e-06, "loss": 0.034799957275390626, "step": 107970 }, { "epoch": 0.9336278977267815, "grad_norm": 12.04977071559344, "learning_rate": 3.312204858450479e-06, "loss": 0.1891693115234375, "step": 107975 }, { "epoch": 0.9336711312483247, "grad_norm": 4.988232575196548, "learning_rate": 3.3120022307998812e-06, "loss": 0.034197235107421876, "step": 107980 }, { "epoch": 0.9337143647698679, "grad_norm": 3.7527807889456817, "learning_rate": 3.311799601710357e-06, "loss": 0.1597686767578125, "step": 107985 }, { "epoch": 0.9337575982914113, "grad_norm": 5.063052473893226, "learning_rate": 3.311596971182839e-06, "loss": 0.1481536865234375, "step": 107990 }, { "epoch": 0.9338008318129545, "grad_norm": 1.2496967589728458, "learning_rate": 3.3113943392182652e-06, "loss": 0.06365776062011719, "step": 107995 }, { "epoch": 0.9338440653344977, "grad_norm": 0.5723333615912455, "learning_rate": 3.311191705817567e-06, "loss": 0.09063034057617188, "step": 108000 }, { "epoch": 0.933887298856041, "grad_norm": 1.7740082716998977, "learning_rate": 3.31098907098168e-06, "loss": 0.11746368408203126, "step": 108005 }, { "epoch": 0.9339305323775843, "grad_norm": 50.93751186657827, "learning_rate": 3.31078643471154e-06, "loss": 0.1932615280151367, "step": 108010 }, { "epoch": 0.9339737658991275, "grad_norm": 6.323320062366751, "learning_rate": 3.3105837970080795e-06, "loss": 0.26109619140625, "step": 108015 }, { "epoch": 0.9340169994206708, "grad_norm": 0.9968801420269982, "learning_rate": 3.3103811578722345e-06, "loss": 0.03663711547851563, "step": 108020 }, { "epoch": 0.9340602329422141, "grad_norm": 5.257444390268155, "learning_rate": 3.310178517304938e-06, "loss": 0.0969512939453125, "step": 108025 }, { "epoch": 0.9341034664637573, "grad_norm": 5.500056069763115, "learning_rate": 3.3099758753071264e-06, "loss": 0.13796539306640626, "step": 108030 }, { "epoch": 0.9341466999853006, "grad_norm": 15.247223148788532, "learning_rate": 3.3097732318797335e-06, "loss": 0.079913330078125, "step": 108035 }, { "epoch": 0.9341899335068439, "grad_norm": 2.9110074935944685, "learning_rate": 3.3095705870236942e-06, "loss": 0.11741809844970703, "step": 108040 }, { "epoch": 0.9342331670283871, "grad_norm": 10.794424388129855, "learning_rate": 3.3093679407399428e-06, "loss": 0.05606346130371094, "step": 108045 }, { "epoch": 0.9342764005499304, "grad_norm": 19.03850884242857, "learning_rate": 3.3091652930294126e-06, "loss": 0.0921875, "step": 108050 }, { "epoch": 0.9343196340714737, "grad_norm": 2.304452652748543, "learning_rate": 3.3089626438930397e-06, "loss": 0.20521888732910157, "step": 108055 }, { "epoch": 0.9343628675930169, "grad_norm": 0.8847660571988994, "learning_rate": 3.308759993331759e-06, "loss": 0.25304718017578126, "step": 108060 }, { "epoch": 0.9344061011145602, "grad_norm": 7.864529112534332, "learning_rate": 3.308557341346504e-06, "loss": 0.19490203857421876, "step": 108065 }, { "epoch": 0.9344493346361035, "grad_norm": 11.269167100127705, "learning_rate": 3.30835468793821e-06, "loss": 0.17336673736572267, "step": 108070 }, { "epoch": 0.9344925681576467, "grad_norm": 4.597809393836719, "learning_rate": 3.308152033107812e-06, "loss": 0.17975540161132814, "step": 108075 }, { "epoch": 0.9345358016791899, "grad_norm": 1.4963319363114653, "learning_rate": 3.3079493768562433e-06, "loss": 0.020782470703125, "step": 108080 }, { "epoch": 0.9345790352007333, "grad_norm": 0.9934427599520557, "learning_rate": 3.3077467191844387e-06, "loss": 0.2096874237060547, "step": 108085 }, { "epoch": 0.9346222687222765, "grad_norm": 26.72299865131933, "learning_rate": 3.307544060093335e-06, "loss": 0.2581787109375, "step": 108090 }, { "epoch": 0.9346655022438197, "grad_norm": 15.32789935826644, "learning_rate": 3.3073413995838645e-06, "loss": 0.3201740264892578, "step": 108095 }, { "epoch": 0.9347087357653631, "grad_norm": 0.2611637339773643, "learning_rate": 3.3071387376569624e-06, "loss": 0.0281707763671875, "step": 108100 }, { "epoch": 0.9347519692869063, "grad_norm": 5.173592787315436, "learning_rate": 3.306936074313564e-06, "loss": 0.2679147720336914, "step": 108105 }, { "epoch": 0.9347952028084495, "grad_norm": 8.545239138237307, "learning_rate": 3.3067334095546033e-06, "loss": 0.4648105621337891, "step": 108110 }, { "epoch": 0.9348384363299929, "grad_norm": 13.944616512230951, "learning_rate": 3.3065307433810147e-06, "loss": 0.185693359375, "step": 108115 }, { "epoch": 0.9348816698515361, "grad_norm": 1.0581108107971335, "learning_rate": 3.306328075793734e-06, "loss": 0.29770050048828123, "step": 108120 }, { "epoch": 0.9349249033730793, "grad_norm": 0.12463621737128562, "learning_rate": 3.3061254067936953e-06, "loss": 0.146893310546875, "step": 108125 }, { "epoch": 0.9349681368946227, "grad_norm": 6.708344435443043, "learning_rate": 3.305922736381833e-06, "loss": 0.17771453857421876, "step": 108130 }, { "epoch": 0.9350113704161659, "grad_norm": 2.778826153563948, "learning_rate": 3.3057200645590827e-06, "loss": 0.0524871826171875, "step": 108135 }, { "epoch": 0.9350546039377091, "grad_norm": 8.664983263742583, "learning_rate": 3.3055173913263775e-06, "loss": 0.1369384765625, "step": 108140 }, { "epoch": 0.9350978374592525, "grad_norm": 3.294377714040597, "learning_rate": 3.3053147166846533e-06, "loss": 0.19395484924316406, "step": 108145 }, { "epoch": 0.9351410709807957, "grad_norm": 0.705075424377913, "learning_rate": 3.3051120406348457e-06, "loss": 0.10465316772460938, "step": 108150 }, { "epoch": 0.9351843045023389, "grad_norm": 0.5570461026663464, "learning_rate": 3.3049093631778876e-06, "loss": 0.15038681030273438, "step": 108155 }, { "epoch": 0.9352275380238821, "grad_norm": 17.317871184787332, "learning_rate": 3.304706684314715e-06, "loss": 0.08385009765625, "step": 108160 }, { "epoch": 0.9352707715454255, "grad_norm": 7.9679093020144585, "learning_rate": 3.304504004046261e-06, "loss": 0.07071685791015625, "step": 108165 }, { "epoch": 0.9353140050669687, "grad_norm": 1.694769131835112, "learning_rate": 3.3043013223734613e-06, "loss": 0.18937721252441406, "step": 108170 }, { "epoch": 0.9353572385885119, "grad_norm": 3.655599663187419, "learning_rate": 3.304098639297252e-06, "loss": 0.04110722541809082, "step": 108175 }, { "epoch": 0.9354004721100553, "grad_norm": 2.831483447477576, "learning_rate": 3.3038959548185656e-06, "loss": 0.08398895263671875, "step": 108180 }, { "epoch": 0.9354437056315985, "grad_norm": 7.474846285306097, "learning_rate": 3.3036932689383384e-06, "loss": 0.06759185791015625, "step": 108185 }, { "epoch": 0.9354869391531417, "grad_norm": 42.82938753232085, "learning_rate": 3.3034905816575053e-06, "loss": 0.1742889404296875, "step": 108190 }, { "epoch": 0.9355301726746851, "grad_norm": 0.5113820736392629, "learning_rate": 3.303287892977e-06, "loss": 0.0941986083984375, "step": 108195 }, { "epoch": 0.9355734061962283, "grad_norm": 15.787158429438687, "learning_rate": 3.3030852028977563e-06, "loss": 0.05739307403564453, "step": 108200 }, { "epoch": 0.9356166397177715, "grad_norm": 8.998620887697292, "learning_rate": 3.3028825114207127e-06, "loss": 0.11643218994140625, "step": 108205 }, { "epoch": 0.9356598732393149, "grad_norm": 34.57428317704938, "learning_rate": 3.3026798185468e-06, "loss": 0.130877685546875, "step": 108210 }, { "epoch": 0.9357031067608581, "grad_norm": 0.8807705751701724, "learning_rate": 3.3024771242769554e-06, "loss": 0.030263900756835938, "step": 108215 }, { "epoch": 0.9357463402824013, "grad_norm": 7.547639629800132, "learning_rate": 3.3022744286121136e-06, "loss": 0.065191650390625, "step": 108220 }, { "epoch": 0.9357895738039447, "grad_norm": 0.6701594114109369, "learning_rate": 3.3020717315532075e-06, "loss": 0.13677902221679689, "step": 108225 }, { "epoch": 0.9358328073254879, "grad_norm": 1.2315407537425729, "learning_rate": 3.301869033101174e-06, "loss": 0.1196990966796875, "step": 108230 }, { "epoch": 0.9358760408470311, "grad_norm": 0.5274581920847593, "learning_rate": 3.301666333256948e-06, "loss": 0.106353759765625, "step": 108235 }, { "epoch": 0.9359192743685745, "grad_norm": 10.789996224270071, "learning_rate": 3.301463632021463e-06, "loss": 0.22805099487304686, "step": 108240 }, { "epoch": 0.9359625078901177, "grad_norm": 0.35317661592281435, "learning_rate": 3.301260929395654e-06, "loss": 0.12462005615234376, "step": 108245 }, { "epoch": 0.9360057414116609, "grad_norm": 17.834625022557585, "learning_rate": 3.301058225380456e-06, "loss": 0.1612701416015625, "step": 108250 }, { "epoch": 0.9360489749332042, "grad_norm": 4.979759879804711, "learning_rate": 3.300855519976805e-06, "loss": 0.3307014465332031, "step": 108255 }, { "epoch": 0.9360922084547475, "grad_norm": 27.547873083641218, "learning_rate": 3.300652813185634e-06, "loss": 0.0641510009765625, "step": 108260 }, { "epoch": 0.9361354419762907, "grad_norm": 55.050553887830134, "learning_rate": 3.300450105007879e-06, "loss": 0.42942352294921876, "step": 108265 }, { "epoch": 0.936178675497834, "grad_norm": 49.4845558705744, "learning_rate": 3.3002473954444753e-06, "loss": 0.16656723022460937, "step": 108270 }, { "epoch": 0.9362219090193773, "grad_norm": 5.653331906006453, "learning_rate": 3.3000446844963567e-06, "loss": 0.2739391326904297, "step": 108275 }, { "epoch": 0.9362651425409205, "grad_norm": 28.823598690727373, "learning_rate": 3.2998419721644577e-06, "loss": 0.203399658203125, "step": 108280 }, { "epoch": 0.9363083760624638, "grad_norm": 1.1965467443265239, "learning_rate": 3.2996392584497157e-06, "loss": 0.17697296142578126, "step": 108285 }, { "epoch": 0.9363516095840071, "grad_norm": 12.271551728549396, "learning_rate": 3.2994365433530626e-06, "loss": 0.3093914031982422, "step": 108290 }, { "epoch": 0.9363948431055503, "grad_norm": 0.8457250930192456, "learning_rate": 3.299233826875435e-06, "loss": 0.29746856689453127, "step": 108295 }, { "epoch": 0.9364380766270936, "grad_norm": 60.677947188106714, "learning_rate": 3.2990311090177675e-06, "loss": 0.36343994140625, "step": 108300 }, { "epoch": 0.9364813101486369, "grad_norm": 0.08638486131497797, "learning_rate": 3.2988283897809953e-06, "loss": 0.09333648681640624, "step": 108305 }, { "epoch": 0.9365245436701801, "grad_norm": 47.795880403942704, "learning_rate": 3.298625669166052e-06, "loss": 0.22158164978027345, "step": 108310 }, { "epoch": 0.9365677771917233, "grad_norm": 27.518224174895245, "learning_rate": 3.2984229471738742e-06, "loss": 0.1536590576171875, "step": 108315 }, { "epoch": 0.9366110107132667, "grad_norm": 0.3920235082956617, "learning_rate": 3.2982202238053956e-06, "loss": 0.09016342163085937, "step": 108320 }, { "epoch": 0.9366542442348099, "grad_norm": 4.61052155684753, "learning_rate": 3.2980174990615523e-06, "loss": 0.08088512420654297, "step": 108325 }, { "epoch": 0.9366974777563531, "grad_norm": 1.095832884331259, "learning_rate": 3.2978147729432794e-06, "loss": 0.17494354248046876, "step": 108330 }, { "epoch": 0.9367407112778964, "grad_norm": 1.5446259155556579, "learning_rate": 3.2976120454515096e-06, "loss": 0.1878185272216797, "step": 108335 }, { "epoch": 0.9367839447994397, "grad_norm": 3.549313623067352, "learning_rate": 3.2974093165871796e-06, "loss": 0.06459312438964844, "step": 108340 }, { "epoch": 0.936827178320983, "grad_norm": 13.515868078652838, "learning_rate": 3.2972065863512242e-06, "loss": 0.306353759765625, "step": 108345 }, { "epoch": 0.9368704118425262, "grad_norm": 2.3821315491770436, "learning_rate": 3.297003854744579e-06, "loss": 0.15552787780761718, "step": 108350 }, { "epoch": 0.9369136453640695, "grad_norm": 0.5515334129610938, "learning_rate": 3.2968011217681776e-06, "loss": 0.16162481307983398, "step": 108355 }, { "epoch": 0.9369568788856127, "grad_norm": 1.9464571011900846, "learning_rate": 3.2965983874229563e-06, "loss": 0.32676849365234373, "step": 108360 }, { "epoch": 0.937000112407156, "grad_norm": 16.043998849405938, "learning_rate": 3.2963956517098482e-06, "loss": 0.1293914794921875, "step": 108365 }, { "epoch": 0.9370433459286993, "grad_norm": 11.469993097008626, "learning_rate": 3.2961929146297904e-06, "loss": 0.15871734619140626, "step": 108370 }, { "epoch": 0.9370865794502425, "grad_norm": 0.3679297707597019, "learning_rate": 3.295990176183717e-06, "loss": 0.03469161987304688, "step": 108375 }, { "epoch": 0.9371298129717858, "grad_norm": 30.329763762327943, "learning_rate": 3.2957874363725632e-06, "loss": 0.09722824096679687, "step": 108380 }, { "epoch": 0.9371730464933291, "grad_norm": 6.3780714260111395, "learning_rate": 3.2955846951972637e-06, "loss": 0.15116729736328124, "step": 108385 }, { "epoch": 0.9372162800148723, "grad_norm": 4.9099908464003015, "learning_rate": 3.2953819526587536e-06, "loss": 0.08918609619140624, "step": 108390 }, { "epoch": 0.9372595135364156, "grad_norm": 7.058071670468874, "learning_rate": 3.2951792087579676e-06, "loss": 0.11420211791992188, "step": 108395 }, { "epoch": 0.9373027470579589, "grad_norm": 0.702182180365068, "learning_rate": 3.2949764634958413e-06, "loss": 0.130645751953125, "step": 108400 }, { "epoch": 0.9373459805795021, "grad_norm": 23.994635429881033, "learning_rate": 3.2947737168733106e-06, "loss": 0.296405029296875, "step": 108405 }, { "epoch": 0.9373892141010454, "grad_norm": 42.9358944225004, "learning_rate": 3.2945709688913094e-06, "loss": 0.42737579345703125, "step": 108410 }, { "epoch": 0.9374324476225887, "grad_norm": 3.588899158338891, "learning_rate": 3.294368219550772e-06, "loss": 0.12383651733398438, "step": 108415 }, { "epoch": 0.9374756811441319, "grad_norm": 0.6720192717517152, "learning_rate": 3.2941654688526346e-06, "loss": 0.22141532897949218, "step": 108420 }, { "epoch": 0.9375189146656752, "grad_norm": 1.8826389119475306, "learning_rate": 3.2939627167978326e-06, "loss": 0.04466705322265625, "step": 108425 }, { "epoch": 0.9375621481872184, "grad_norm": 2.134529455801774, "learning_rate": 3.2937599633873006e-06, "loss": 0.1720245361328125, "step": 108430 }, { "epoch": 0.9376053817087617, "grad_norm": 18.63896202503529, "learning_rate": 3.293557208621973e-06, "loss": 0.12093582153320312, "step": 108435 }, { "epoch": 0.937648615230305, "grad_norm": 0.4286541920917439, "learning_rate": 3.293354452502786e-06, "loss": 0.07202205657958985, "step": 108440 }, { "epoch": 0.9376918487518482, "grad_norm": 0.07841100942752642, "learning_rate": 3.293151695030674e-06, "loss": 0.06495857238769531, "step": 108445 }, { "epoch": 0.9377350822733915, "grad_norm": 6.154514834117447, "learning_rate": 3.2929489362065713e-06, "loss": 0.16060028076171876, "step": 108450 }, { "epoch": 0.9377783157949348, "grad_norm": 8.004165508380458, "learning_rate": 3.2927461760314152e-06, "loss": 0.120574951171875, "step": 108455 }, { "epoch": 0.937821549316478, "grad_norm": 2.2037340783222366, "learning_rate": 3.2925434145061394e-06, "loss": 0.0384765625, "step": 108460 }, { "epoch": 0.9378647828380213, "grad_norm": 18.39244833125044, "learning_rate": 3.2923406516316786e-06, "loss": 0.319573974609375, "step": 108465 }, { "epoch": 0.9379080163595646, "grad_norm": 19.93025162609197, "learning_rate": 3.2921378874089692e-06, "loss": 0.1617156982421875, "step": 108470 }, { "epoch": 0.9379512498811078, "grad_norm": 22.216444176581057, "learning_rate": 3.291935121838946e-06, "loss": 0.3554573059082031, "step": 108475 }, { "epoch": 0.9379944834026511, "grad_norm": 25.61638912909868, "learning_rate": 3.2917323549225426e-06, "loss": 0.11292724609375, "step": 108480 }, { "epoch": 0.9380377169241944, "grad_norm": 1.0714020014445194, "learning_rate": 3.291529586660697e-06, "loss": 0.07891502380371093, "step": 108485 }, { "epoch": 0.9380809504457376, "grad_norm": 3.8487521274913443, "learning_rate": 3.2913268170543417e-06, "loss": 0.03824195861816406, "step": 108490 }, { "epoch": 0.9381241839672809, "grad_norm": 7.316504371978335, "learning_rate": 3.2911240461044124e-06, "loss": 0.2934844970703125, "step": 108495 }, { "epoch": 0.9381674174888242, "grad_norm": 12.27023133588527, "learning_rate": 3.290921273811846e-06, "loss": 0.17836227416992187, "step": 108500 }, { "epoch": 0.9382106510103674, "grad_norm": 7.511173417981167, "learning_rate": 3.2907185001775752e-06, "loss": 0.1433879852294922, "step": 108505 }, { "epoch": 0.9382538845319106, "grad_norm": 5.761957952523643, "learning_rate": 3.290515725202537e-06, "loss": 0.12676467895507812, "step": 108510 }, { "epoch": 0.938297118053454, "grad_norm": 15.346110792490665, "learning_rate": 3.2903129488876656e-06, "loss": 0.242779541015625, "step": 108515 }, { "epoch": 0.9383403515749972, "grad_norm": 2.587724486379231, "learning_rate": 3.2901101712338974e-06, "loss": 0.09796695709228516, "step": 108520 }, { "epoch": 0.9383835850965404, "grad_norm": 16.89625249425045, "learning_rate": 3.289907392242166e-06, "loss": 0.1317901611328125, "step": 108525 }, { "epoch": 0.9384268186180837, "grad_norm": 1.6478509285756167, "learning_rate": 3.2897046119134072e-06, "loss": 0.091351318359375, "step": 108530 }, { "epoch": 0.938470052139627, "grad_norm": 4.474340206848763, "learning_rate": 3.2895018302485564e-06, "loss": 0.2682281494140625, "step": 108535 }, { "epoch": 0.9385132856611702, "grad_norm": 23.627837338267174, "learning_rate": 3.289299047248549e-06, "loss": 0.0520965576171875, "step": 108540 }, { "epoch": 0.9385565191827135, "grad_norm": 3.1423488034369544, "learning_rate": 3.2890962629143193e-06, "loss": 0.18656845092773439, "step": 108545 }, { "epoch": 0.9385997527042568, "grad_norm": 2.1545513172561384, "learning_rate": 3.288893477246804e-06, "loss": 0.08435592651367188, "step": 108550 }, { "epoch": 0.9386429862258, "grad_norm": 1.8743742100264729, "learning_rate": 3.288690690246938e-06, "loss": 0.10942192077636718, "step": 108555 }, { "epoch": 0.9386862197473433, "grad_norm": 1.6790816510198696, "learning_rate": 3.2884879019156545e-06, "loss": 0.0525909423828125, "step": 108560 }, { "epoch": 0.9387294532688866, "grad_norm": 15.103161031631497, "learning_rate": 3.28828511225389e-06, "loss": 0.1947662353515625, "step": 108565 }, { "epoch": 0.9387726867904298, "grad_norm": 6.803718415309107, "learning_rate": 3.2880823212625824e-06, "loss": 0.06942825317382813, "step": 108570 }, { "epoch": 0.9388159203119731, "grad_norm": 0.1763164133021101, "learning_rate": 3.2878795289426623e-06, "loss": 0.06349067687988282, "step": 108575 }, { "epoch": 0.9388591538335164, "grad_norm": 9.884462122268967, "learning_rate": 3.2876767352950693e-06, "loss": 0.0997314453125, "step": 108580 }, { "epoch": 0.9389023873550596, "grad_norm": 78.57347427448704, "learning_rate": 3.287473940320735e-06, "loss": 0.3208351135253906, "step": 108585 }, { "epoch": 0.9389456208766029, "grad_norm": 26.290053137400296, "learning_rate": 3.287271144020597e-06, "loss": 0.10606651306152344, "step": 108590 }, { "epoch": 0.9389888543981462, "grad_norm": 7.27769855448167, "learning_rate": 3.2870683463955894e-06, "loss": 0.06344757080078126, "step": 108595 }, { "epoch": 0.9390320879196894, "grad_norm": 26.44151434565737, "learning_rate": 3.2868655474466484e-06, "loss": 0.20339736938476563, "step": 108600 }, { "epoch": 0.9390753214412326, "grad_norm": 2.595401908710817, "learning_rate": 3.2866627471747093e-06, "loss": 0.054338836669921876, "step": 108605 }, { "epoch": 0.939118554962776, "grad_norm": 0.8216222564174624, "learning_rate": 3.286459945580706e-06, "loss": 0.06888313293457031, "step": 108610 }, { "epoch": 0.9391617884843192, "grad_norm": 48.631309649158034, "learning_rate": 3.286257142665575e-06, "loss": 0.304296875, "step": 108615 }, { "epoch": 0.9392050220058624, "grad_norm": 19.14375483752482, "learning_rate": 3.286054338430251e-06, "loss": 0.14843902587890626, "step": 108620 }, { "epoch": 0.9392482555274058, "grad_norm": 3.230345337003884, "learning_rate": 3.28585153287567e-06, "loss": 0.15489501953125, "step": 108625 }, { "epoch": 0.939291489048949, "grad_norm": 6.937775413152844, "learning_rate": 3.285648726002767e-06, "loss": 0.03398590087890625, "step": 108630 }, { "epoch": 0.9393347225704922, "grad_norm": 53.60096221424304, "learning_rate": 3.285445917812478e-06, "loss": 0.3322410583496094, "step": 108635 }, { "epoch": 0.9393779560920356, "grad_norm": 0.7382363412903302, "learning_rate": 3.2852431083057365e-06, "loss": 0.57225341796875, "step": 108640 }, { "epoch": 0.9394211896135788, "grad_norm": 0.7192252665229005, "learning_rate": 3.2850402974834803e-06, "loss": 0.018218994140625, "step": 108645 }, { "epoch": 0.939464423135122, "grad_norm": 5.7258190778397795, "learning_rate": 3.284837485346642e-06, "loss": 0.043154144287109376, "step": 108650 }, { "epoch": 0.9395076566566654, "grad_norm": 1.1785911617714542, "learning_rate": 3.2846346718961587e-06, "loss": 0.0627197265625, "step": 108655 }, { "epoch": 0.9395508901782086, "grad_norm": 10.815635589918333, "learning_rate": 3.2844318571329665e-06, "loss": 0.4228202819824219, "step": 108660 }, { "epoch": 0.9395941236997518, "grad_norm": 72.73319960352146, "learning_rate": 3.2842290410579993e-06, "loss": 0.267205810546875, "step": 108665 }, { "epoch": 0.9396373572212952, "grad_norm": 7.6333293481632545, "learning_rate": 3.284026223672192e-06, "loss": 0.10703125, "step": 108670 }, { "epoch": 0.9396805907428384, "grad_norm": 16.840819048274557, "learning_rate": 3.283823404976481e-06, "loss": 0.1094329833984375, "step": 108675 }, { "epoch": 0.9397238242643816, "grad_norm": 3.889228917599136, "learning_rate": 3.283620584971802e-06, "loss": 0.0323211669921875, "step": 108680 }, { "epoch": 0.9397670577859248, "grad_norm": 0.6307891685619619, "learning_rate": 3.2834177636590907e-06, "loss": 0.05038909912109375, "step": 108685 }, { "epoch": 0.9398102913074682, "grad_norm": 0.30109165431426277, "learning_rate": 3.28321494103928e-06, "loss": 0.07855815887451172, "step": 108690 }, { "epoch": 0.9398535248290114, "grad_norm": 16.19435266922549, "learning_rate": 3.2830121171133084e-06, "loss": 0.08151626586914062, "step": 108695 }, { "epoch": 0.9398967583505546, "grad_norm": 4.010338186062547, "learning_rate": 3.2828092918821095e-06, "loss": 0.05271701812744141, "step": 108700 }, { "epoch": 0.939939991872098, "grad_norm": 2.4069399139599668, "learning_rate": 3.2826064653466183e-06, "loss": 0.165771484375, "step": 108705 }, { "epoch": 0.9399832253936412, "grad_norm": 2.51790205705175, "learning_rate": 3.2824036375077726e-06, "loss": 0.0401123046875, "step": 108710 }, { "epoch": 0.9400264589151844, "grad_norm": 48.91054406051402, "learning_rate": 3.2822008083665053e-06, "loss": 0.12175979614257812, "step": 108715 }, { "epoch": 0.9400696924367278, "grad_norm": 19.195496423491456, "learning_rate": 3.2819979779237532e-06, "loss": 0.07412109375, "step": 108720 }, { "epoch": 0.940112925958271, "grad_norm": 10.638460191425242, "learning_rate": 3.2817951461804515e-06, "loss": 0.08931121826171876, "step": 108725 }, { "epoch": 0.9401561594798142, "grad_norm": 2.8268392619330975, "learning_rate": 3.2815923131375345e-06, "loss": 0.12464828491210937, "step": 108730 }, { "epoch": 0.9401993930013576, "grad_norm": 14.99403765910288, "learning_rate": 3.2813894787959393e-06, "loss": 0.06606369018554688, "step": 108735 }, { "epoch": 0.9402426265229008, "grad_norm": 9.721622182368112, "learning_rate": 3.2811866431566003e-06, "loss": 0.03730964660644531, "step": 108740 }, { "epoch": 0.940285860044444, "grad_norm": 30.116536038741796, "learning_rate": 3.2809838062204546e-06, "loss": 0.1828460693359375, "step": 108745 }, { "epoch": 0.9403290935659874, "grad_norm": 4.379321408957302, "learning_rate": 3.280780967988435e-06, "loss": 0.16886444091796876, "step": 108750 }, { "epoch": 0.9403723270875306, "grad_norm": 8.837260703582524, "learning_rate": 3.2805781284614795e-06, "loss": 0.28301010131835935, "step": 108755 }, { "epoch": 0.9404155606090738, "grad_norm": 6.803803617698886, "learning_rate": 3.280375287640521e-06, "loss": 0.33041229248046877, "step": 108760 }, { "epoch": 0.9404587941306172, "grad_norm": 2.134478762284606, "learning_rate": 3.280172445526497e-06, "loss": 0.06847763061523438, "step": 108765 }, { "epoch": 0.9405020276521604, "grad_norm": 4.692193250550059, "learning_rate": 3.279969602120343e-06, "loss": 0.096685791015625, "step": 108770 }, { "epoch": 0.9405452611737036, "grad_norm": 6.698686945208147, "learning_rate": 3.279766757422994e-06, "loss": 0.11608734130859374, "step": 108775 }, { "epoch": 0.9405884946952469, "grad_norm": 2.390199928388805, "learning_rate": 3.279563911435386e-06, "loss": 0.13157272338867188, "step": 108780 }, { "epoch": 0.9406317282167902, "grad_norm": 1.571607384446923, "learning_rate": 3.2793610641584524e-06, "loss": 0.33269615173339845, "step": 108785 }, { "epoch": 0.9406749617383334, "grad_norm": 9.758891599050115, "learning_rate": 3.2791582155931306e-06, "loss": 0.08674049377441406, "step": 108790 }, { "epoch": 0.9407181952598767, "grad_norm": 3.9773138678574798, "learning_rate": 3.2789553657403564e-06, "loss": 0.2538299560546875, "step": 108795 }, { "epoch": 0.94076142878142, "grad_norm": 21.65553867911243, "learning_rate": 3.2787525146010645e-06, "loss": 0.44298744201660156, "step": 108800 }, { "epoch": 0.9408046623029632, "grad_norm": 31.75428328802569, "learning_rate": 3.278549662176191e-06, "loss": 0.10119400024414063, "step": 108805 }, { "epoch": 0.9408478958245065, "grad_norm": 14.00008127709605, "learning_rate": 3.2783468084666707e-06, "loss": 0.34119720458984376, "step": 108810 }, { "epoch": 0.9408911293460498, "grad_norm": 0.32009906305442726, "learning_rate": 3.278143953473439e-06, "loss": 0.0646636962890625, "step": 108815 }, { "epoch": 0.940934362867593, "grad_norm": 27.53843959014206, "learning_rate": 3.277941097197432e-06, "loss": 0.21986427307128906, "step": 108820 }, { "epoch": 0.9409775963891363, "grad_norm": 6.865652459226501, "learning_rate": 3.2777382396395856e-06, "loss": 0.08681640625, "step": 108825 }, { "epoch": 0.9410208299106796, "grad_norm": 15.112779661375763, "learning_rate": 3.2775353808008353e-06, "loss": 0.305841064453125, "step": 108830 }, { "epoch": 0.9410640634322228, "grad_norm": 50.57880989588066, "learning_rate": 3.2773325206821162e-06, "loss": 0.650225830078125, "step": 108835 }, { "epoch": 0.941107296953766, "grad_norm": 37.85062857875642, "learning_rate": 3.277129659284364e-06, "loss": 0.124749755859375, "step": 108840 }, { "epoch": 0.9411505304753094, "grad_norm": 0.08375296159560086, "learning_rate": 3.2769267966085137e-06, "loss": 0.044891357421875, "step": 108845 }, { "epoch": 0.9411937639968526, "grad_norm": 16.600814523775554, "learning_rate": 3.2767239326555017e-06, "loss": 0.1801055908203125, "step": 108850 }, { "epoch": 0.9412369975183958, "grad_norm": 22.898565929631523, "learning_rate": 3.2765210674262636e-06, "loss": 0.339044189453125, "step": 108855 }, { "epoch": 0.9412802310399391, "grad_norm": 3.3561411185758336, "learning_rate": 3.2763182009217356e-06, "loss": 0.16897544860839844, "step": 108860 }, { "epoch": 0.9413234645614824, "grad_norm": 2.7142789066670523, "learning_rate": 3.276115333142851e-06, "loss": 0.1816650390625, "step": 108865 }, { "epoch": 0.9413666980830256, "grad_norm": 0.7519366382153985, "learning_rate": 3.275912464090547e-06, "loss": 0.1494415283203125, "step": 108870 }, { "epoch": 0.9414099316045689, "grad_norm": 2.383664492832254, "learning_rate": 3.2757095937657603e-06, "loss": 0.08977203369140625, "step": 108875 }, { "epoch": 0.9414531651261122, "grad_norm": 9.767564311630679, "learning_rate": 3.2755067221694242e-06, "loss": 0.0943263053894043, "step": 108880 }, { "epoch": 0.9414963986476554, "grad_norm": 0.3516322727391284, "learning_rate": 3.2753038493024764e-06, "loss": 0.2290924072265625, "step": 108885 }, { "epoch": 0.9415396321691987, "grad_norm": 2.5381494037956824, "learning_rate": 3.275100975165851e-06, "loss": 0.06533203125, "step": 108890 }, { "epoch": 0.941582865690742, "grad_norm": 13.54350378431459, "learning_rate": 3.274898099760484e-06, "loss": 0.0527618408203125, "step": 108895 }, { "epoch": 0.9416260992122852, "grad_norm": 2.789176667372888, "learning_rate": 3.2746952230873116e-06, "loss": 0.16233978271484376, "step": 108900 }, { "epoch": 0.9416693327338285, "grad_norm": 9.802465666538643, "learning_rate": 3.2744923451472696e-06, "loss": 0.06317596435546875, "step": 108905 }, { "epoch": 0.9417125662553718, "grad_norm": 2.9594063424789105, "learning_rate": 3.274289465941292e-06, "loss": 0.10281143188476563, "step": 108910 }, { "epoch": 0.941755799776915, "grad_norm": 14.220456216395991, "learning_rate": 3.274086585470317e-06, "loss": 0.0665771484375, "step": 108915 }, { "epoch": 0.9417990332984583, "grad_norm": 34.78195792929898, "learning_rate": 3.2738837037352784e-06, "loss": 0.08472442626953125, "step": 108920 }, { "epoch": 0.9418422668200016, "grad_norm": 4.327459118112083, "learning_rate": 3.2736808207371113e-06, "loss": 0.1590789794921875, "step": 108925 }, { "epoch": 0.9418855003415448, "grad_norm": 0.714461657284222, "learning_rate": 3.273477936476753e-06, "loss": 0.17448654174804687, "step": 108930 }, { "epoch": 0.9419287338630881, "grad_norm": 0.0902554106957559, "learning_rate": 3.2732750509551393e-06, "loss": 0.016934967041015624, "step": 108935 }, { "epoch": 0.9419719673846313, "grad_norm": 4.583039039532312, "learning_rate": 3.2730721641732046e-06, "loss": 0.1401336669921875, "step": 108940 }, { "epoch": 0.9420152009061746, "grad_norm": 0.11556753312138787, "learning_rate": 3.272869276131885e-06, "loss": 0.12211990356445312, "step": 108945 }, { "epoch": 0.9420584344277179, "grad_norm": 18.33228349554326, "learning_rate": 3.272666386832117e-06, "loss": 0.050230979919433594, "step": 108950 }, { "epoch": 0.9421016679492611, "grad_norm": 0.7201760271983649, "learning_rate": 3.2724634962748352e-06, "loss": 0.042000198364257814, "step": 108955 }, { "epoch": 0.9421449014708044, "grad_norm": 33.8973863566848, "learning_rate": 3.272260604460976e-06, "loss": 0.24698638916015625, "step": 108960 }, { "epoch": 0.9421881349923477, "grad_norm": 13.783519414745033, "learning_rate": 3.2720577113914753e-06, "loss": 0.18056163787841797, "step": 108965 }, { "epoch": 0.9422313685138909, "grad_norm": 22.893256902220187, "learning_rate": 3.2718548170672682e-06, "loss": 0.1172454833984375, "step": 108970 }, { "epoch": 0.9422746020354342, "grad_norm": 30.629568809142803, "learning_rate": 3.2716519214892906e-06, "loss": 0.1154144287109375, "step": 108975 }, { "epoch": 0.9423178355569775, "grad_norm": 5.087485649882501, "learning_rate": 3.271449024658479e-06, "loss": 0.0403167724609375, "step": 108980 }, { "epoch": 0.9423610690785207, "grad_norm": 0.44914442656898135, "learning_rate": 3.271246126575767e-06, "loss": 0.12222442626953126, "step": 108985 }, { "epoch": 0.942404302600064, "grad_norm": 0.8097391437245297, "learning_rate": 3.2710432272420925e-06, "loss": 0.06657867431640625, "step": 108990 }, { "epoch": 0.9424475361216073, "grad_norm": 4.4026825049705165, "learning_rate": 3.2708403266583916e-06, "loss": 0.02863922119140625, "step": 108995 }, { "epoch": 0.9424907696431505, "grad_norm": 21.85803271727308, "learning_rate": 3.2706374248255987e-06, "loss": 0.12449798583984376, "step": 109000 }, { "epoch": 0.9425340031646938, "grad_norm": 14.100310688304164, "learning_rate": 3.2704345217446494e-06, "loss": 0.07099685668945313, "step": 109005 }, { "epoch": 0.942577236686237, "grad_norm": 27.478122375453246, "learning_rate": 3.27023161741648e-06, "loss": 0.19081573486328124, "step": 109010 }, { "epoch": 0.9426204702077803, "grad_norm": 0.10071720181093366, "learning_rate": 3.2700287118420262e-06, "loss": 0.09294853210449219, "step": 109015 }, { "epoch": 0.9426637037293236, "grad_norm": 4.929462199419168, "learning_rate": 3.2698258050222235e-06, "loss": 0.12675323486328124, "step": 109020 }, { "epoch": 0.9427069372508668, "grad_norm": 10.759734156892979, "learning_rate": 3.269622896958009e-06, "loss": 0.101898193359375, "step": 109025 }, { "epoch": 0.9427501707724101, "grad_norm": 13.95940878222466, "learning_rate": 3.2694199876503173e-06, "loss": 0.08809871673583984, "step": 109030 }, { "epoch": 0.9427934042939533, "grad_norm": 21.595924804759775, "learning_rate": 3.2692170771000845e-06, "loss": 0.14126510620117189, "step": 109035 }, { "epoch": 0.9428366378154966, "grad_norm": 28.317926890983145, "learning_rate": 3.2690141653082454e-06, "loss": 0.1075714111328125, "step": 109040 }, { "epoch": 0.9428798713370399, "grad_norm": 0.8651465612962531, "learning_rate": 3.2688112522757377e-06, "loss": 0.05355377197265625, "step": 109045 }, { "epoch": 0.9429231048585831, "grad_norm": 9.26459590147621, "learning_rate": 3.268608338003496e-06, "loss": 0.07080764770507812, "step": 109050 }, { "epoch": 0.9429663383801264, "grad_norm": 3.717921027417506, "learning_rate": 3.2684054224924555e-06, "loss": 0.09096755981445312, "step": 109055 }, { "epoch": 0.9430095719016697, "grad_norm": 2.9102585294034933, "learning_rate": 3.2682025057435543e-06, "loss": 0.1086456298828125, "step": 109060 }, { "epoch": 0.9430528054232129, "grad_norm": 5.393056710225276, "learning_rate": 3.267999587757727e-06, "loss": 0.042955970764160155, "step": 109065 }, { "epoch": 0.9430960389447562, "grad_norm": 2.8830220663563204, "learning_rate": 3.2677966685359072e-06, "loss": 0.12993240356445312, "step": 109070 }, { "epoch": 0.9431392724662995, "grad_norm": 11.139677142063151, "learning_rate": 3.267593748079034e-06, "loss": 0.18079833984375, "step": 109075 }, { "epoch": 0.9431825059878427, "grad_norm": 0.6949948851267269, "learning_rate": 3.2673908263880434e-06, "loss": 0.24044952392578126, "step": 109080 }, { "epoch": 0.943225739509386, "grad_norm": 0.750796842277866, "learning_rate": 3.267187903463868e-06, "loss": 0.02779541015625, "step": 109085 }, { "epoch": 0.9432689730309293, "grad_norm": 1.088658171910655, "learning_rate": 3.2669849793074468e-06, "loss": 0.168792724609375, "step": 109090 }, { "epoch": 0.9433122065524725, "grad_norm": 2.8150405540571604, "learning_rate": 3.2667820539197137e-06, "loss": 0.0294281005859375, "step": 109095 }, { "epoch": 0.9433554400740158, "grad_norm": 0.6778207540000829, "learning_rate": 3.266579127301605e-06, "loss": 0.019640350341796876, "step": 109100 }, { "epoch": 0.9433986735955591, "grad_norm": 28.31040063413481, "learning_rate": 3.266376199454058e-06, "loss": 0.30484466552734374, "step": 109105 }, { "epoch": 0.9434419071171023, "grad_norm": 10.425298204043962, "learning_rate": 3.2661732703780075e-06, "loss": 0.11174087524414063, "step": 109110 }, { "epoch": 0.9434851406386455, "grad_norm": 3.871927382136403, "learning_rate": 3.265970340074389e-06, "loss": 0.06486663818359376, "step": 109115 }, { "epoch": 0.9435283741601889, "grad_norm": 20.272590332983675, "learning_rate": 3.2657674085441387e-06, "loss": 0.21515045166015626, "step": 109120 }, { "epoch": 0.9435716076817321, "grad_norm": 4.306017514687261, "learning_rate": 3.2655644757881926e-06, "loss": 0.248779296875, "step": 109125 }, { "epoch": 0.9436148412032753, "grad_norm": 53.3314894883907, "learning_rate": 3.265361541807487e-06, "loss": 0.34777259826660156, "step": 109130 }, { "epoch": 0.9436580747248187, "grad_norm": 1.8128506483982418, "learning_rate": 3.2651586066029567e-06, "loss": 0.09182510375976563, "step": 109135 }, { "epoch": 0.9437013082463619, "grad_norm": 16.245363953188132, "learning_rate": 3.264955670175539e-06, "loss": 0.07557525634765624, "step": 109140 }, { "epoch": 0.9437445417679051, "grad_norm": 4.591527047714826, "learning_rate": 3.2647527325261694e-06, "loss": 0.27947235107421875, "step": 109145 }, { "epoch": 0.9437877752894485, "grad_norm": 14.389936257304113, "learning_rate": 3.264549793655782e-06, "loss": 0.147344970703125, "step": 109150 }, { "epoch": 0.9438310088109917, "grad_norm": 0.6710405857432953, "learning_rate": 3.264346853565316e-06, "loss": 0.09954833984375, "step": 109155 }, { "epoch": 0.9438742423325349, "grad_norm": 11.725560649661512, "learning_rate": 3.2641439122557047e-06, "loss": 0.17386188507080078, "step": 109160 }, { "epoch": 0.9439174758540783, "grad_norm": 10.594091415045144, "learning_rate": 3.263940969727885e-06, "loss": 0.15381011962890626, "step": 109165 }, { "epoch": 0.9439607093756215, "grad_norm": 2.9025066706247715, "learning_rate": 3.263738025982794e-06, "loss": 0.15944061279296876, "step": 109170 }, { "epoch": 0.9440039428971647, "grad_norm": 19.321469896233378, "learning_rate": 3.2635350810213663e-06, "loss": 0.136798095703125, "step": 109175 }, { "epoch": 0.9440471764187081, "grad_norm": 0.5420727337764474, "learning_rate": 3.2633321348445372e-06, "loss": 0.16829071044921876, "step": 109180 }, { "epoch": 0.9440904099402513, "grad_norm": 10.798882652063554, "learning_rate": 3.2631291874532433e-06, "loss": 0.33522491455078124, "step": 109185 }, { "epoch": 0.9441336434617945, "grad_norm": 0.9161223857792513, "learning_rate": 3.262926238848422e-06, "loss": 0.08954486846923829, "step": 109190 }, { "epoch": 0.9441768769833379, "grad_norm": 14.487584354380541, "learning_rate": 3.2627232890310075e-06, "loss": 0.08830223083496094, "step": 109195 }, { "epoch": 0.9442201105048811, "grad_norm": 32.00188380259772, "learning_rate": 3.2625203380019363e-06, "loss": 0.2317047119140625, "step": 109200 }, { "epoch": 0.9442633440264243, "grad_norm": 2.0862029610186457, "learning_rate": 3.262317385762145e-06, "loss": 0.171319580078125, "step": 109205 }, { "epoch": 0.9443065775479675, "grad_norm": 5.266939423638585, "learning_rate": 3.262114432312569e-06, "loss": 0.18903732299804688, "step": 109210 }, { "epoch": 0.9443498110695109, "grad_norm": 2.3541498108445116, "learning_rate": 3.2619114776541437e-06, "loss": 0.049488067626953125, "step": 109215 }, { "epoch": 0.9443930445910541, "grad_norm": 1.5555358880328765, "learning_rate": 3.2617085217878066e-06, "loss": 0.045246124267578125, "step": 109220 }, { "epoch": 0.9444362781125973, "grad_norm": 11.663636683557236, "learning_rate": 3.261505564714493e-06, "loss": 0.10275478363037109, "step": 109225 }, { "epoch": 0.9444795116341407, "grad_norm": 6.138216249959403, "learning_rate": 3.261302606435138e-06, "loss": 0.08706436157226563, "step": 109230 }, { "epoch": 0.9445227451556839, "grad_norm": 4.274667106395314, "learning_rate": 3.2610996469506795e-06, "loss": 0.20127716064453124, "step": 109235 }, { "epoch": 0.9445659786772271, "grad_norm": 1.9926796424507123, "learning_rate": 3.2608966862620513e-06, "loss": 0.05501556396484375, "step": 109240 }, { "epoch": 0.9446092121987705, "grad_norm": 0.5338225455015352, "learning_rate": 3.2606937243701907e-06, "loss": 0.044429779052734375, "step": 109245 }, { "epoch": 0.9446524457203137, "grad_norm": 3.1048490431339393, "learning_rate": 3.2604907612760347e-06, "loss": 0.07178115844726562, "step": 109250 }, { "epoch": 0.9446956792418569, "grad_norm": 6.793095563241265, "learning_rate": 3.2602877969805183e-06, "loss": 0.1296356201171875, "step": 109255 }, { "epoch": 0.9447389127634003, "grad_norm": 8.879081337837006, "learning_rate": 3.260084831484577e-06, "loss": 0.07178955078125, "step": 109260 }, { "epoch": 0.9447821462849435, "grad_norm": 1.1180822050789014, "learning_rate": 3.2598818647891478e-06, "loss": 0.15600662231445311, "step": 109265 }, { "epoch": 0.9448253798064867, "grad_norm": 5.76526904353671, "learning_rate": 3.259678896895166e-06, "loss": 0.0693511962890625, "step": 109270 }, { "epoch": 0.9448686133280301, "grad_norm": 16.95820940592036, "learning_rate": 3.2594759278035678e-06, "loss": 0.395343017578125, "step": 109275 }, { "epoch": 0.9449118468495733, "grad_norm": 1.15391527883246, "learning_rate": 3.2592729575152907e-06, "loss": 0.08363609313964844, "step": 109280 }, { "epoch": 0.9449550803711165, "grad_norm": 0.40750864128706665, "learning_rate": 3.259069986031269e-06, "loss": 0.08163528442382813, "step": 109285 }, { "epoch": 0.9449983138926598, "grad_norm": 53.55428196812166, "learning_rate": 3.25886701335244e-06, "loss": 0.1957733154296875, "step": 109290 }, { "epoch": 0.9450415474142031, "grad_norm": 8.168941314111262, "learning_rate": 3.2586640394797374e-06, "loss": 0.06908435821533203, "step": 109295 }, { "epoch": 0.9450847809357463, "grad_norm": 32.55215767073499, "learning_rate": 3.258461064414101e-06, "loss": 0.3577606201171875, "step": 109300 }, { "epoch": 0.9451280144572896, "grad_norm": 15.487755168962803, "learning_rate": 3.2582580881564646e-06, "loss": 0.1390869140625, "step": 109305 }, { "epoch": 0.9451712479788329, "grad_norm": 4.223817791572218, "learning_rate": 3.2580551107077636e-06, "loss": 0.06929550170898438, "step": 109310 }, { "epoch": 0.9452144815003761, "grad_norm": 2.491136820790447, "learning_rate": 3.257852132068937e-06, "loss": 0.035962677001953124, "step": 109315 }, { "epoch": 0.9452577150219194, "grad_norm": 1.2852269996867147, "learning_rate": 3.2576491522409173e-06, "loss": 0.1544586181640625, "step": 109320 }, { "epoch": 0.9453009485434627, "grad_norm": 0.9878175848861069, "learning_rate": 3.257446171224643e-06, "loss": 0.13964080810546875, "step": 109325 }, { "epoch": 0.9453441820650059, "grad_norm": 3.7846689863902934, "learning_rate": 3.2572431890210507e-06, "loss": 0.0574310302734375, "step": 109330 }, { "epoch": 0.9453874155865492, "grad_norm": 17.069417042444076, "learning_rate": 3.2570402056310746e-06, "loss": 0.1794219970703125, "step": 109335 }, { "epoch": 0.9454306491080925, "grad_norm": 26.34417508721416, "learning_rate": 3.256837221055652e-06, "loss": 0.2311279296875, "step": 109340 }, { "epoch": 0.9454738826296357, "grad_norm": 4.060227008149443, "learning_rate": 3.2566342352957196e-06, "loss": 0.15367813110351564, "step": 109345 }, { "epoch": 0.945517116151179, "grad_norm": 3.092066374690727, "learning_rate": 3.2564312483522117e-06, "loss": 0.0922433853149414, "step": 109350 }, { "epoch": 0.9455603496727223, "grad_norm": 14.891592649578174, "learning_rate": 3.2562282602260658e-06, "loss": 0.08495254516601562, "step": 109355 }, { "epoch": 0.9456035831942655, "grad_norm": 9.336305246790808, "learning_rate": 3.256025270918218e-06, "loss": 0.12125701904296875, "step": 109360 }, { "epoch": 0.9456468167158087, "grad_norm": 2.194507327993506, "learning_rate": 3.2558222804296047e-06, "loss": 0.17754783630371093, "step": 109365 }, { "epoch": 0.9456900502373521, "grad_norm": 7.411288751731758, "learning_rate": 3.255619288761161e-06, "loss": 0.05885009765625, "step": 109370 }, { "epoch": 0.9457332837588953, "grad_norm": 0.9634378258822947, "learning_rate": 3.255416295913824e-06, "loss": 0.23260421752929689, "step": 109375 }, { "epoch": 0.9457765172804385, "grad_norm": 14.96834606510269, "learning_rate": 3.255213301888529e-06, "loss": 0.3317901611328125, "step": 109380 }, { "epoch": 0.9458197508019818, "grad_norm": 15.920223553377497, "learning_rate": 3.2550103066862137e-06, "loss": 0.19745635986328125, "step": 109385 }, { "epoch": 0.9458629843235251, "grad_norm": 0.1853050178176262, "learning_rate": 3.2548073103078133e-06, "loss": 0.044205856323242185, "step": 109390 }, { "epoch": 0.9459062178450683, "grad_norm": 0.48834595833687017, "learning_rate": 3.254604312754264e-06, "loss": 0.3782367706298828, "step": 109395 }, { "epoch": 0.9459494513666116, "grad_norm": 8.470142501623464, "learning_rate": 3.2544013140265024e-06, "loss": 0.29241180419921875, "step": 109400 }, { "epoch": 0.9459926848881549, "grad_norm": 2.5773768287380854, "learning_rate": 3.2541983141254635e-06, "loss": 0.12962799072265624, "step": 109405 }, { "epoch": 0.9460359184096981, "grad_norm": 0.35531805906252695, "learning_rate": 3.2539953130520844e-06, "loss": 0.2996025085449219, "step": 109410 }, { "epoch": 0.9460791519312414, "grad_norm": 4.49522593949294, "learning_rate": 3.2537923108073024e-06, "loss": 0.10558319091796875, "step": 109415 }, { "epoch": 0.9461223854527847, "grad_norm": 5.111901733609597, "learning_rate": 3.2535893073920517e-06, "loss": 0.0503662109375, "step": 109420 }, { "epoch": 0.9461656189743279, "grad_norm": 31.89027352216599, "learning_rate": 3.25338630280727e-06, "loss": 0.13416023254394532, "step": 109425 }, { "epoch": 0.9462088524958712, "grad_norm": 16.04668941964102, "learning_rate": 3.2531832970538932e-06, "loss": 0.2692298889160156, "step": 109430 }, { "epoch": 0.9462520860174145, "grad_norm": 1.6132282532097648, "learning_rate": 3.252980290132857e-06, "loss": 0.022370147705078124, "step": 109435 }, { "epoch": 0.9462953195389577, "grad_norm": 7.987944350583394, "learning_rate": 3.252777282045098e-06, "loss": 0.2136962890625, "step": 109440 }, { "epoch": 0.946338553060501, "grad_norm": 3.100933275790881, "learning_rate": 3.2525742727915534e-06, "loss": 0.21041259765625, "step": 109445 }, { "epoch": 0.9463817865820443, "grad_norm": 0.9869188316572606, "learning_rate": 3.2523712623731575e-06, "loss": 0.06714973449707032, "step": 109450 }, { "epoch": 0.9464250201035875, "grad_norm": 15.671008583604399, "learning_rate": 3.2521682507908486e-06, "loss": 0.2279216766357422, "step": 109455 }, { "epoch": 0.9464682536251308, "grad_norm": 3.8106256424215506, "learning_rate": 3.2519652380455613e-06, "loss": 0.09029273986816407, "step": 109460 }, { "epoch": 0.946511487146674, "grad_norm": 0.1124486108012292, "learning_rate": 3.251762224138233e-06, "loss": 0.11628875732421876, "step": 109465 }, { "epoch": 0.9465547206682173, "grad_norm": 1.5629642906004533, "learning_rate": 3.2515592090697986e-06, "loss": 0.2423583984375, "step": 109470 }, { "epoch": 0.9465979541897606, "grad_norm": 4.67535310929372, "learning_rate": 3.2513561928411967e-06, "loss": 0.18507032394409179, "step": 109475 }, { "epoch": 0.9466411877113038, "grad_norm": 13.815980829121884, "learning_rate": 3.2511531754533616e-06, "loss": 0.08721857070922852, "step": 109480 }, { "epoch": 0.9466844212328471, "grad_norm": 5.982215809129009, "learning_rate": 3.2509501569072306e-06, "loss": 0.1023223876953125, "step": 109485 }, { "epoch": 0.9467276547543904, "grad_norm": 1.145577143870028, "learning_rate": 3.2507471372037392e-06, "loss": 0.2939914703369141, "step": 109490 }, { "epoch": 0.9467708882759336, "grad_norm": 7.54692379007318, "learning_rate": 3.2505441163438246e-06, "loss": 0.2231109619140625, "step": 109495 }, { "epoch": 0.9468141217974769, "grad_norm": 12.399094509687153, "learning_rate": 3.250341094328422e-06, "loss": 0.5615337371826172, "step": 109500 }, { "epoch": 0.9468573553190202, "grad_norm": 0.4534494772724265, "learning_rate": 3.250138071158469e-06, "loss": 0.208172607421875, "step": 109505 }, { "epoch": 0.9469005888405634, "grad_norm": 0.2672828576659961, "learning_rate": 3.249935046834901e-06, "loss": 0.1552581787109375, "step": 109510 }, { "epoch": 0.9469438223621067, "grad_norm": 97.89722777911084, "learning_rate": 3.2497320213586545e-06, "loss": 0.2941093444824219, "step": 109515 }, { "epoch": 0.94698705588365, "grad_norm": 30.313974222815503, "learning_rate": 3.249528994730666e-06, "loss": 0.1431243896484375, "step": 109520 }, { "epoch": 0.9470302894051932, "grad_norm": 11.298403892148578, "learning_rate": 3.2493259669518727e-06, "loss": 0.06333122253417969, "step": 109525 }, { "epoch": 0.9470735229267365, "grad_norm": 1.2939013288472359, "learning_rate": 3.2491229380232086e-06, "loss": 0.20065765380859374, "step": 109530 }, { "epoch": 0.9471167564482798, "grad_norm": 24.165517080274956, "learning_rate": 3.2489199079456134e-06, "loss": 0.40264434814453126, "step": 109535 }, { "epoch": 0.947159989969823, "grad_norm": 0.0987444453065504, "learning_rate": 3.248716876720021e-06, "loss": 0.014387893676757812, "step": 109540 }, { "epoch": 0.9472032234913663, "grad_norm": 1.1038576165692753, "learning_rate": 3.248513844347367e-06, "loss": 0.18235282897949218, "step": 109545 }, { "epoch": 0.9472464570129095, "grad_norm": 0.23842854672425953, "learning_rate": 3.2483108108285896e-06, "loss": 0.018881607055664062, "step": 109550 }, { "epoch": 0.9472896905344528, "grad_norm": 7.935741691991602, "learning_rate": 3.2481077761646256e-06, "loss": 0.09339675903320313, "step": 109555 }, { "epoch": 0.947332924055996, "grad_norm": 0.2049381375891559, "learning_rate": 3.24790474035641e-06, "loss": 0.3306861877441406, "step": 109560 }, { "epoch": 0.9473761575775393, "grad_norm": 0.22331028315114237, "learning_rate": 3.2477017034048794e-06, "loss": 0.10040435791015626, "step": 109565 }, { "epoch": 0.9474193910990826, "grad_norm": 0.24731431226193362, "learning_rate": 3.2474986653109707e-06, "loss": 0.09908428192138671, "step": 109570 }, { "epoch": 0.9474626246206258, "grad_norm": 0.6602967700595981, "learning_rate": 3.24729562607562e-06, "loss": 0.03415336608886719, "step": 109575 }, { "epoch": 0.9475058581421691, "grad_norm": 5.775901829874539, "learning_rate": 3.247092585699763e-06, "loss": 0.1358612060546875, "step": 109580 }, { "epoch": 0.9475490916637124, "grad_norm": 0.7156432132332028, "learning_rate": 3.246889544184338e-06, "loss": 0.02742156982421875, "step": 109585 }, { "epoch": 0.9475923251852556, "grad_norm": 4.085848968458943, "learning_rate": 3.24668650153028e-06, "loss": 0.13629074096679689, "step": 109590 }, { "epoch": 0.9476355587067989, "grad_norm": 9.389265368869664, "learning_rate": 3.246483457738525e-06, "loss": 0.04922828674316406, "step": 109595 }, { "epoch": 0.9476787922283422, "grad_norm": 3.760587984572049, "learning_rate": 3.246280412810011e-06, "loss": 0.16936798095703126, "step": 109600 }, { "epoch": 0.9477220257498854, "grad_norm": 3.9360074869367843, "learning_rate": 3.2460773667456723e-06, "loss": 0.05266876220703125, "step": 109605 }, { "epoch": 0.9477652592714287, "grad_norm": 20.277379714328607, "learning_rate": 3.2458743195464475e-06, "loss": 0.13947219848632814, "step": 109610 }, { "epoch": 0.947808492792972, "grad_norm": 35.98385340230991, "learning_rate": 3.2456712712132717e-06, "loss": 0.5298690795898438, "step": 109615 }, { "epoch": 0.9478517263145152, "grad_norm": 4.080319264411265, "learning_rate": 3.2454682217470817e-06, "loss": 0.05168304443359375, "step": 109620 }, { "epoch": 0.9478949598360585, "grad_norm": 20.208083104240576, "learning_rate": 3.245265171148814e-06, "loss": 0.38715553283691406, "step": 109625 }, { "epoch": 0.9479381933576018, "grad_norm": 0.19537957914805468, "learning_rate": 3.2450621194194054e-06, "loss": 0.22989139556884766, "step": 109630 }, { "epoch": 0.947981426879145, "grad_norm": 0.06208411257364325, "learning_rate": 3.244859066559791e-06, "loss": 0.023369979858398438, "step": 109635 }, { "epoch": 0.9480246604006882, "grad_norm": 12.739157859320612, "learning_rate": 3.2446560125709092e-06, "loss": 0.06660690307617187, "step": 109640 }, { "epoch": 0.9480678939222316, "grad_norm": 0.39093157087779334, "learning_rate": 3.244452957453695e-06, "loss": 0.07548408508300782, "step": 109645 }, { "epoch": 0.9481111274437748, "grad_norm": 20.26678490520062, "learning_rate": 3.2442499012090853e-06, "loss": 0.14707870483398439, "step": 109650 }, { "epoch": 0.948154360965318, "grad_norm": 0.5873649980549095, "learning_rate": 3.244046843838017e-06, "loss": 0.1937286376953125, "step": 109655 }, { "epoch": 0.9481975944868614, "grad_norm": 6.3064015791382575, "learning_rate": 3.243843785341426e-06, "loss": 0.19049301147460937, "step": 109660 }, { "epoch": 0.9482408280084046, "grad_norm": 11.890172443645332, "learning_rate": 3.2436407257202486e-06, "loss": 0.20550117492675782, "step": 109665 }, { "epoch": 0.9482840615299478, "grad_norm": 0.3170160633987867, "learning_rate": 3.2434376649754224e-06, "loss": 0.21839218139648436, "step": 109670 }, { "epoch": 0.9483272950514912, "grad_norm": 1.2971501307949207, "learning_rate": 3.2432346031078823e-06, "loss": 0.03763561248779297, "step": 109675 }, { "epoch": 0.9483705285730344, "grad_norm": 6.256836332367815, "learning_rate": 3.243031540118567e-06, "loss": 0.0385711669921875, "step": 109680 }, { "epoch": 0.9484137620945776, "grad_norm": 0.25053353652228383, "learning_rate": 3.242828476008411e-06, "loss": 0.23524093627929688, "step": 109685 }, { "epoch": 0.948456995616121, "grad_norm": 7.440168842682594, "learning_rate": 3.2426254107783514e-06, "loss": 0.2103038787841797, "step": 109690 }, { "epoch": 0.9485002291376642, "grad_norm": 13.269552122158826, "learning_rate": 3.242422344429325e-06, "loss": 0.07860794067382812, "step": 109695 }, { "epoch": 0.9485434626592074, "grad_norm": 47.69798029976272, "learning_rate": 3.2422192769622685e-06, "loss": 0.402734375, "step": 109700 }, { "epoch": 0.9485866961807508, "grad_norm": 1.0427149924993315, "learning_rate": 3.242016208378117e-06, "loss": 0.13710975646972656, "step": 109705 }, { "epoch": 0.948629929702294, "grad_norm": 0.18341253341173444, "learning_rate": 3.2418131386778094e-06, "loss": 0.12240447998046874, "step": 109710 }, { "epoch": 0.9486731632238372, "grad_norm": 19.051570447952887, "learning_rate": 3.2416100678622806e-06, "loss": 0.08437576293945312, "step": 109715 }, { "epoch": 0.9487163967453806, "grad_norm": 43.57528683935058, "learning_rate": 3.241406995932467e-06, "loss": 0.10935859680175782, "step": 109720 }, { "epoch": 0.9487596302669238, "grad_norm": 16.540049395426337, "learning_rate": 3.2412039228893054e-06, "loss": 0.208984375, "step": 109725 }, { "epoch": 0.948802863788467, "grad_norm": 2.6015769549751044, "learning_rate": 3.2410008487337343e-06, "loss": 0.124224853515625, "step": 109730 }, { "epoch": 0.9488460973100102, "grad_norm": 0.12179740484598903, "learning_rate": 3.240797773466688e-06, "loss": 0.2775001525878906, "step": 109735 }, { "epoch": 0.9488893308315536, "grad_norm": 3.531969671008027, "learning_rate": 3.2405946970891024e-06, "loss": 0.11542816162109375, "step": 109740 }, { "epoch": 0.9489325643530968, "grad_norm": 3.141142214468508, "learning_rate": 3.2403916196019153e-06, "loss": 0.1281444549560547, "step": 109745 }, { "epoch": 0.94897579787464, "grad_norm": 43.00436844022133, "learning_rate": 3.2401885410060653e-06, "loss": 0.3253143310546875, "step": 109750 }, { "epoch": 0.9490190313961834, "grad_norm": 0.3461924543375924, "learning_rate": 3.239985461302485e-06, "loss": 0.01757946014404297, "step": 109755 }, { "epoch": 0.9490622649177266, "grad_norm": 0.5753956945944974, "learning_rate": 3.239782380492114e-06, "loss": 0.022254133224487306, "step": 109760 }, { "epoch": 0.9491054984392698, "grad_norm": 0.7845417852753359, "learning_rate": 3.239579298575888e-06, "loss": 0.163155460357666, "step": 109765 }, { "epoch": 0.9491487319608132, "grad_norm": 25.107287308966576, "learning_rate": 3.2393762155547423e-06, "loss": 0.3566596984863281, "step": 109770 }, { "epoch": 0.9491919654823564, "grad_norm": 4.12206449896247, "learning_rate": 3.239173131429615e-06, "loss": 0.17084922790527343, "step": 109775 }, { "epoch": 0.9492351990038996, "grad_norm": 0.06359173710121534, "learning_rate": 3.2389700462014437e-06, "loss": 0.2806999206542969, "step": 109780 }, { "epoch": 0.949278432525443, "grad_norm": 0.25437488844182804, "learning_rate": 3.2387669598711623e-06, "loss": 0.3106706619262695, "step": 109785 }, { "epoch": 0.9493216660469862, "grad_norm": 1.2856274726168853, "learning_rate": 3.2385638724397092e-06, "loss": 0.05969696044921875, "step": 109790 }, { "epoch": 0.9493648995685294, "grad_norm": 0.33128627979425496, "learning_rate": 3.2383607839080205e-06, "loss": 0.036985015869140624, "step": 109795 }, { "epoch": 0.9494081330900728, "grad_norm": 16.48701160220291, "learning_rate": 3.238157694277033e-06, "loss": 0.07719345092773437, "step": 109800 }, { "epoch": 0.949451366611616, "grad_norm": 17.188634612734, "learning_rate": 3.2379546035476834e-06, "loss": 0.2891632080078125, "step": 109805 }, { "epoch": 0.9494946001331592, "grad_norm": 9.623998531145617, "learning_rate": 3.237751511720908e-06, "loss": 0.23313751220703124, "step": 109810 }, { "epoch": 0.9495378336547025, "grad_norm": 3.3310806399561423, "learning_rate": 3.2375484187976437e-06, "loss": 0.060937118530273435, "step": 109815 }, { "epoch": 0.9495810671762458, "grad_norm": 10.534844346834502, "learning_rate": 3.237345324778827e-06, "loss": 0.169415283203125, "step": 109820 }, { "epoch": 0.949624300697789, "grad_norm": 16.59774711684429, "learning_rate": 3.2371422296653953e-06, "loss": 0.4799896240234375, "step": 109825 }, { "epoch": 0.9496675342193323, "grad_norm": 1.335056374685933, "learning_rate": 3.236939133458283e-06, "loss": 0.047307586669921874, "step": 109830 }, { "epoch": 0.9497107677408756, "grad_norm": 17.291238213461575, "learning_rate": 3.236736036158429e-06, "loss": 0.0501190185546875, "step": 109835 }, { "epoch": 0.9497540012624188, "grad_norm": 43.186933749838985, "learning_rate": 3.23653293776677e-06, "loss": 0.372503662109375, "step": 109840 }, { "epoch": 0.949797234783962, "grad_norm": 16.629918566815633, "learning_rate": 3.236329838284242e-06, "loss": 0.16514835357666016, "step": 109845 }, { "epoch": 0.9498404683055054, "grad_norm": 3.2376849750445995, "learning_rate": 3.2361267377117814e-06, "loss": 0.06808662414550781, "step": 109850 }, { "epoch": 0.9498837018270486, "grad_norm": 2.291716585372345, "learning_rate": 3.235923636050325e-06, "loss": 0.13303146362304688, "step": 109855 }, { "epoch": 0.9499269353485918, "grad_norm": 2.6693654587953404, "learning_rate": 3.23572053330081e-06, "loss": 0.20887260437011718, "step": 109860 }, { "epoch": 0.9499701688701352, "grad_norm": 16.10638531108141, "learning_rate": 3.2355174294641713e-06, "loss": 0.0498809814453125, "step": 109865 }, { "epoch": 0.9500134023916784, "grad_norm": 1.0308851667925059, "learning_rate": 3.2353143245413485e-06, "loss": 0.49585800170898436, "step": 109870 }, { "epoch": 0.9500566359132216, "grad_norm": 25.859608507094684, "learning_rate": 3.235111218533277e-06, "loss": 0.24895248413085938, "step": 109875 }, { "epoch": 0.950099869434765, "grad_norm": 4.263439376928589, "learning_rate": 3.2349081114408925e-06, "loss": 0.5203235626220704, "step": 109880 }, { "epoch": 0.9501431029563082, "grad_norm": 44.364467591925404, "learning_rate": 3.234705003265133e-06, "loss": 0.38956451416015625, "step": 109885 }, { "epoch": 0.9501863364778514, "grad_norm": 1.7119842173036235, "learning_rate": 3.2345018940069342e-06, "loss": 0.03421974182128906, "step": 109890 }, { "epoch": 0.9502295699993948, "grad_norm": 33.5485565013682, "learning_rate": 3.2342987836672337e-06, "loss": 0.1819488525390625, "step": 109895 }, { "epoch": 0.950272803520938, "grad_norm": 2.337270992466396, "learning_rate": 3.234095672246968e-06, "loss": 0.2345062255859375, "step": 109900 }, { "epoch": 0.9503160370424812, "grad_norm": 3.7184101084420673, "learning_rate": 3.2338925597470745e-06, "loss": 0.342425537109375, "step": 109905 }, { "epoch": 0.9503592705640245, "grad_norm": 1.4279269770508887, "learning_rate": 3.233689446168489e-06, "loss": 0.11778717041015625, "step": 109910 }, { "epoch": 0.9504025040855678, "grad_norm": 11.517307257835128, "learning_rate": 3.2334863315121467e-06, "loss": 0.15993423461914064, "step": 109915 }, { "epoch": 0.950445737607111, "grad_norm": 6.492995612460533, "learning_rate": 3.233283215778987e-06, "loss": 0.20589599609375, "step": 109920 }, { "epoch": 0.9504889711286543, "grad_norm": 37.41547148321761, "learning_rate": 3.2330800989699463e-06, "loss": 0.17873077392578124, "step": 109925 }, { "epoch": 0.9505322046501976, "grad_norm": 0.6631860162155595, "learning_rate": 3.232876981085961e-06, "loss": 0.0647735595703125, "step": 109930 }, { "epoch": 0.9505754381717408, "grad_norm": 5.360331664338708, "learning_rate": 3.232673862127967e-06, "loss": 0.0611968994140625, "step": 109935 }, { "epoch": 0.9506186716932841, "grad_norm": 8.843968657063385, "learning_rate": 3.2324707420969013e-06, "loss": 0.13338088989257812, "step": 109940 }, { "epoch": 0.9506619052148274, "grad_norm": 22.669515151268133, "learning_rate": 3.2322676209937016e-06, "loss": 0.26652374267578127, "step": 109945 }, { "epoch": 0.9507051387363706, "grad_norm": 0.4608474964243104, "learning_rate": 3.232064498819304e-06, "loss": 0.043043327331542966, "step": 109950 }, { "epoch": 0.9507483722579139, "grad_norm": 28.972247506650998, "learning_rate": 3.2318613755746463e-06, "loss": 0.08466033935546875, "step": 109955 }, { "epoch": 0.9507916057794572, "grad_norm": 16.870191977200587, "learning_rate": 3.231658251260663e-06, "loss": 0.11776046752929688, "step": 109960 }, { "epoch": 0.9508348393010004, "grad_norm": 1.2479071670305915, "learning_rate": 3.2314551258782936e-06, "loss": 0.049653244018554685, "step": 109965 }, { "epoch": 0.9508780728225437, "grad_norm": 0.6362268754604369, "learning_rate": 3.2312519994284724e-06, "loss": 0.2732383728027344, "step": 109970 }, { "epoch": 0.950921306344087, "grad_norm": 6.462958804970542, "learning_rate": 3.231048871912138e-06, "loss": 0.253729248046875, "step": 109975 }, { "epoch": 0.9509645398656302, "grad_norm": 9.529983358598546, "learning_rate": 3.230845743330227e-06, "loss": 0.0425262451171875, "step": 109980 }, { "epoch": 0.9510077733871735, "grad_norm": 18.11019590920071, "learning_rate": 3.230642613683676e-06, "loss": 0.1263824462890625, "step": 109985 }, { "epoch": 0.9510510069087167, "grad_norm": 0.7253701937653955, "learning_rate": 3.2304394829734216e-06, "loss": 0.5975872039794922, "step": 109990 }, { "epoch": 0.95109424043026, "grad_norm": 5.588526589595385, "learning_rate": 3.2302363512003994e-06, "loss": 0.0659423828125, "step": 109995 }, { "epoch": 0.9511374739518033, "grad_norm": 0.44169665378785333, "learning_rate": 3.2300332183655477e-06, "loss": 0.03046112060546875, "step": 110000 }, { "epoch": 0.9511807074733465, "grad_norm": 1.9514771696057378, "learning_rate": 3.229830084469804e-06, "loss": 0.079412841796875, "step": 110005 }, { "epoch": 0.9512239409948898, "grad_norm": 8.645725823501037, "learning_rate": 3.229626949514104e-06, "loss": 0.16050949096679687, "step": 110010 }, { "epoch": 0.9512671745164331, "grad_norm": 3.779479674952417, "learning_rate": 3.2294238134993846e-06, "loss": 0.059049224853515624, "step": 110015 }, { "epoch": 0.9513104080379763, "grad_norm": 11.750574603406813, "learning_rate": 3.2292206764265836e-06, "loss": 0.0818511962890625, "step": 110020 }, { "epoch": 0.9513536415595196, "grad_norm": 1.7578820176405634, "learning_rate": 3.2290175382966367e-06, "loss": 0.0465423583984375, "step": 110025 }, { "epoch": 0.9513968750810629, "grad_norm": 5.342937774512975, "learning_rate": 3.2288143991104803e-06, "loss": 0.1237213134765625, "step": 110030 }, { "epoch": 0.9514401086026061, "grad_norm": 17.450420821892514, "learning_rate": 3.2286112588690527e-06, "loss": 0.29945068359375, "step": 110035 }, { "epoch": 0.9514833421241494, "grad_norm": 12.335977256150674, "learning_rate": 3.2284081175732897e-06, "loss": 0.21875534057617188, "step": 110040 }, { "epoch": 0.9515265756456927, "grad_norm": 45.98451529402776, "learning_rate": 3.22820497522413e-06, "loss": 0.27165756225585935, "step": 110045 }, { "epoch": 0.9515698091672359, "grad_norm": 28.98079617940257, "learning_rate": 3.2280018318225086e-06, "loss": 0.08105697631835937, "step": 110050 }, { "epoch": 0.9516130426887792, "grad_norm": 0.061256098252519216, "learning_rate": 3.227798687369362e-06, "loss": 0.045256423950195315, "step": 110055 }, { "epoch": 0.9516562762103224, "grad_norm": 4.502628430927189, "learning_rate": 3.227595541865628e-06, "loss": 0.22235183715820311, "step": 110060 }, { "epoch": 0.9516995097318657, "grad_norm": 8.029583107285161, "learning_rate": 3.227392395312245e-06, "loss": 0.6532085418701172, "step": 110065 }, { "epoch": 0.951742743253409, "grad_norm": 3.778933232104088, "learning_rate": 3.2271892477101468e-06, "loss": 0.14590835571289062, "step": 110070 }, { "epoch": 0.9517859767749522, "grad_norm": 4.711727369951396, "learning_rate": 3.226986099060273e-06, "loss": 0.21379737854003905, "step": 110075 }, { "epoch": 0.9518292102964955, "grad_norm": 2.39795757874312, "learning_rate": 3.226782949363559e-06, "loss": 0.19698867797851563, "step": 110080 }, { "epoch": 0.9518724438180387, "grad_norm": 10.768517715994655, "learning_rate": 3.2265797986209416e-06, "loss": 0.09525012969970703, "step": 110085 }, { "epoch": 0.951915677339582, "grad_norm": 4.4566406611392875, "learning_rate": 3.226376646833358e-06, "loss": 0.015702056884765624, "step": 110090 }, { "epoch": 0.9519589108611253, "grad_norm": 2.051490869686838, "learning_rate": 3.226173494001746e-06, "loss": 0.06584014892578124, "step": 110095 }, { "epoch": 0.9520021443826685, "grad_norm": 0.46288607963712236, "learning_rate": 3.2259703401270423e-06, "loss": 0.1465179443359375, "step": 110100 }, { "epoch": 0.9520453779042118, "grad_norm": 0.06878976918280162, "learning_rate": 3.2257671852101823e-06, "loss": 0.20715103149414063, "step": 110105 }, { "epoch": 0.9520886114257551, "grad_norm": 0.5928192213063294, "learning_rate": 3.225564029252105e-06, "loss": 0.04011516571044922, "step": 110110 }, { "epoch": 0.9521318449472983, "grad_norm": 20.6088269586943, "learning_rate": 3.225360872253745e-06, "loss": 0.4241790771484375, "step": 110115 }, { "epoch": 0.9521750784688416, "grad_norm": 22.216402601020587, "learning_rate": 3.225157714216041e-06, "loss": 0.0851531982421875, "step": 110120 }, { "epoch": 0.9522183119903849, "grad_norm": 21.367026416956197, "learning_rate": 3.2249545551399303e-06, "loss": 0.17678985595703126, "step": 110125 }, { "epoch": 0.9522615455119281, "grad_norm": 4.690103373706053, "learning_rate": 3.2247513950263487e-06, "loss": 0.139276123046875, "step": 110130 }, { "epoch": 0.9523047790334714, "grad_norm": 4.51588073626261, "learning_rate": 3.2245482338762333e-06, "loss": 0.12066421508789063, "step": 110135 }, { "epoch": 0.9523480125550147, "grad_norm": 4.369974452398656, "learning_rate": 3.224345071690521e-06, "loss": 0.037641143798828124, "step": 110140 }, { "epoch": 0.9523912460765579, "grad_norm": 10.136712888634479, "learning_rate": 3.2241419084701496e-06, "loss": 0.3753318786621094, "step": 110145 }, { "epoch": 0.9524344795981012, "grad_norm": 48.879054087712575, "learning_rate": 3.223938744216055e-06, "loss": 0.229302978515625, "step": 110150 }, { "epoch": 0.9524777131196445, "grad_norm": 10.359041070338309, "learning_rate": 3.2237355789291755e-06, "loss": 0.09562835693359376, "step": 110155 }, { "epoch": 0.9525209466411877, "grad_norm": 7.311901191576818, "learning_rate": 3.223532412610447e-06, "loss": 0.02512969970703125, "step": 110160 }, { "epoch": 0.9525641801627309, "grad_norm": 6.391290325546852, "learning_rate": 3.2233292452608063e-06, "loss": 0.11540374755859376, "step": 110165 }, { "epoch": 0.9526074136842743, "grad_norm": 40.801762237102345, "learning_rate": 3.223126076881191e-06, "loss": 0.25812835693359376, "step": 110170 }, { "epoch": 0.9526506472058175, "grad_norm": 3.1134417683882547, "learning_rate": 3.2229229074725384e-06, "loss": 0.03262786865234375, "step": 110175 }, { "epoch": 0.9526938807273607, "grad_norm": 2.8492796153928053, "learning_rate": 3.2227197370357854e-06, "loss": 0.07790908813476563, "step": 110180 }, { "epoch": 0.9527371142489041, "grad_norm": 3.196097497642683, "learning_rate": 3.222516565571867e-06, "loss": 0.073077392578125, "step": 110185 }, { "epoch": 0.9527803477704473, "grad_norm": 15.16412521841275, "learning_rate": 3.222313393081724e-06, "loss": 0.12928924560546876, "step": 110190 }, { "epoch": 0.9528235812919905, "grad_norm": 0.09327109096715656, "learning_rate": 3.2221102195662893e-06, "loss": 0.01670989990234375, "step": 110195 }, { "epoch": 0.9528668148135339, "grad_norm": 0.18506321168232365, "learning_rate": 3.2219070450265024e-06, "loss": 0.04234161376953125, "step": 110200 }, { "epoch": 0.9529100483350771, "grad_norm": 0.35089300205824864, "learning_rate": 3.2217038694633007e-06, "loss": 0.0166015625, "step": 110205 }, { "epoch": 0.9529532818566203, "grad_norm": 7.715860975721739, "learning_rate": 3.22150069287762e-06, "loss": 0.1402130126953125, "step": 110210 }, { "epoch": 0.9529965153781637, "grad_norm": 1.4873406242100005, "learning_rate": 3.221297515270397e-06, "loss": 0.035076904296875, "step": 110215 }, { "epoch": 0.9530397488997069, "grad_norm": 14.774241427571951, "learning_rate": 3.221094336642571e-06, "loss": 0.15551910400390626, "step": 110220 }, { "epoch": 0.9530829824212501, "grad_norm": 0.5813948269615132, "learning_rate": 3.2208911569950753e-06, "loss": 0.5433135986328125, "step": 110225 }, { "epoch": 0.9531262159427935, "grad_norm": 1.9080955314503278, "learning_rate": 3.22068797632885e-06, "loss": 0.2263427734375, "step": 110230 }, { "epoch": 0.9531694494643367, "grad_norm": 1.5132869061373517, "learning_rate": 3.2204847946448314e-06, "loss": 0.34659881591796876, "step": 110235 }, { "epoch": 0.9532126829858799, "grad_norm": 49.2586396832392, "learning_rate": 3.2202816119439574e-06, "loss": 0.2928592681884766, "step": 110240 }, { "epoch": 0.9532559165074233, "grad_norm": 15.605034001113697, "learning_rate": 3.2200784282271624e-06, "loss": 0.17321929931640626, "step": 110245 }, { "epoch": 0.9532991500289665, "grad_norm": 18.591425221551702, "learning_rate": 3.2198752434953863e-06, "loss": 0.11825485229492187, "step": 110250 }, { "epoch": 0.9533423835505097, "grad_norm": 14.456200959912405, "learning_rate": 3.2196720577495636e-06, "loss": 0.2777107238769531, "step": 110255 }, { "epoch": 0.9533856170720529, "grad_norm": 3.1920492389940054, "learning_rate": 3.2194688709906345e-06, "loss": 0.22991943359375, "step": 110260 }, { "epoch": 0.9534288505935963, "grad_norm": 1.9599375549552063, "learning_rate": 3.219265683219533e-06, "loss": 0.03392333984375, "step": 110265 }, { "epoch": 0.9534720841151395, "grad_norm": 23.46098941828903, "learning_rate": 3.2190624944371985e-06, "loss": 0.12360954284667969, "step": 110270 }, { "epoch": 0.9535153176366827, "grad_norm": 22.090684000876216, "learning_rate": 3.218859304644567e-06, "loss": 0.1171722412109375, "step": 110275 }, { "epoch": 0.9535585511582261, "grad_norm": 22.855146903101872, "learning_rate": 3.218656113842575e-06, "loss": 0.1672760009765625, "step": 110280 }, { "epoch": 0.9536017846797693, "grad_norm": 14.746380876084762, "learning_rate": 3.2184529220321604e-06, "loss": 0.19913787841796876, "step": 110285 }, { "epoch": 0.9536450182013125, "grad_norm": 14.166817385408018, "learning_rate": 3.218249729214261e-06, "loss": 0.1356842041015625, "step": 110290 }, { "epoch": 0.9536882517228559, "grad_norm": 0.7223582281406666, "learning_rate": 3.218046535389813e-06, "loss": 0.061742973327636716, "step": 110295 }, { "epoch": 0.9537314852443991, "grad_norm": 26.78591239232085, "learning_rate": 3.2178433405597533e-06, "loss": 0.24648818969726563, "step": 110300 }, { "epoch": 0.9537747187659423, "grad_norm": 3.726820023490071, "learning_rate": 3.21764014472502e-06, "loss": 0.219873046875, "step": 110305 }, { "epoch": 0.9538179522874857, "grad_norm": 4.381901436831521, "learning_rate": 3.2174369478865485e-06, "loss": 0.03520050048828125, "step": 110310 }, { "epoch": 0.9538611858090289, "grad_norm": 5.218721607010756, "learning_rate": 3.2172337500452774e-06, "loss": 0.072711181640625, "step": 110315 }, { "epoch": 0.9539044193305721, "grad_norm": 1.051933555134644, "learning_rate": 3.217030551202144e-06, "loss": 0.14996414184570311, "step": 110320 }, { "epoch": 0.9539476528521155, "grad_norm": 17.315869087762273, "learning_rate": 3.216827351358084e-06, "loss": 0.08797454833984375, "step": 110325 }, { "epoch": 0.9539908863736587, "grad_norm": 8.075477287907677, "learning_rate": 3.2166241505140363e-06, "loss": 0.14448490142822265, "step": 110330 }, { "epoch": 0.9540341198952019, "grad_norm": 23.31023511892316, "learning_rate": 3.216420948670937e-06, "loss": 0.15665969848632813, "step": 110335 }, { "epoch": 0.9540773534167452, "grad_norm": 0.42879532076681653, "learning_rate": 3.2162177458297227e-06, "loss": 0.15500640869140625, "step": 110340 }, { "epoch": 0.9541205869382885, "grad_norm": 0.6507873113638484, "learning_rate": 3.2160145419913313e-06, "loss": 0.045721435546875, "step": 110345 }, { "epoch": 0.9541638204598317, "grad_norm": 3.5177490262585565, "learning_rate": 3.2158113371567012e-06, "loss": 0.022784423828125, "step": 110350 }, { "epoch": 0.954207053981375, "grad_norm": 4.500712903906002, "learning_rate": 3.2156081313267677e-06, "loss": 0.06653556823730469, "step": 110355 }, { "epoch": 0.9542502875029183, "grad_norm": 1.460404789548897, "learning_rate": 3.2154049245024674e-06, "loss": 0.15357742309570313, "step": 110360 }, { "epoch": 0.9542935210244615, "grad_norm": 12.762105042568907, "learning_rate": 3.2152017166847393e-06, "loss": 0.11629867553710938, "step": 110365 }, { "epoch": 0.9543367545460048, "grad_norm": 40.25487353015887, "learning_rate": 3.2149985078745203e-06, "loss": 0.31001434326171873, "step": 110370 }, { "epoch": 0.9543799880675481, "grad_norm": 3.535966959617858, "learning_rate": 3.214795298072747e-06, "loss": 0.3303955078125, "step": 110375 }, { "epoch": 0.9544232215890913, "grad_norm": 6.721184302447873, "learning_rate": 3.2145920872803568e-06, "loss": 0.058243179321289064, "step": 110380 }, { "epoch": 0.9544664551106345, "grad_norm": 9.965246684707937, "learning_rate": 3.214388875498287e-06, "loss": 0.05571327209472656, "step": 110385 }, { "epoch": 0.9545096886321779, "grad_norm": 3.468048039438658, "learning_rate": 3.2141856627274738e-06, "loss": 0.14121856689453124, "step": 110390 }, { "epoch": 0.9545529221537211, "grad_norm": 0.6559113429803841, "learning_rate": 3.213982448968856e-06, "loss": 0.09620742797851563, "step": 110395 }, { "epoch": 0.9545961556752643, "grad_norm": 1.4526622508213647, "learning_rate": 3.21377923422337e-06, "loss": 0.08550567626953125, "step": 110400 }, { "epoch": 0.9546393891968077, "grad_norm": 2.906663987160682, "learning_rate": 3.213576018491953e-06, "loss": 0.03834609985351563, "step": 110405 }, { "epoch": 0.9546826227183509, "grad_norm": 1.3313284353235615, "learning_rate": 3.2133728017755424e-06, "loss": 0.04182586669921875, "step": 110410 }, { "epoch": 0.9547258562398941, "grad_norm": 7.707022480106129, "learning_rate": 3.2131695840750757e-06, "loss": 0.08544921875, "step": 110415 }, { "epoch": 0.9547690897614374, "grad_norm": 6.878410029850563, "learning_rate": 3.2129663653914887e-06, "loss": 0.09385833740234376, "step": 110420 }, { "epoch": 0.9548123232829807, "grad_norm": 5.021018618516132, "learning_rate": 3.2127631457257196e-06, "loss": 0.1555255889892578, "step": 110425 }, { "epoch": 0.9548555568045239, "grad_norm": 52.81695351642401, "learning_rate": 3.2125599250787065e-06, "loss": 0.3633296966552734, "step": 110430 }, { "epoch": 0.9548987903260672, "grad_norm": 3.9534517985533717, "learning_rate": 3.2123567034513865e-06, "loss": 0.25837860107421873, "step": 110435 }, { "epoch": 0.9549420238476105, "grad_norm": 65.01702032217757, "learning_rate": 3.212153480844694e-06, "loss": 0.2925201416015625, "step": 110440 }, { "epoch": 0.9549852573691537, "grad_norm": 1.7390394599230723, "learning_rate": 3.21195025725957e-06, "loss": 0.234307861328125, "step": 110445 }, { "epoch": 0.955028490890697, "grad_norm": 11.844205433450876, "learning_rate": 3.2117470326969498e-06, "loss": 0.10588130950927735, "step": 110450 }, { "epoch": 0.9550717244122403, "grad_norm": 0.2240442159714222, "learning_rate": 3.21154380715777e-06, "loss": 0.26930999755859375, "step": 110455 }, { "epoch": 0.9551149579337835, "grad_norm": 15.69975170506084, "learning_rate": 3.21134058064297e-06, "loss": 0.240081787109375, "step": 110460 }, { "epoch": 0.9551581914553268, "grad_norm": 9.94005939237433, "learning_rate": 3.2111373531534865e-06, "loss": 0.5731460571289062, "step": 110465 }, { "epoch": 0.9552014249768701, "grad_norm": 5.4086238185236, "learning_rate": 3.210934124690255e-06, "loss": 0.18485641479492188, "step": 110470 }, { "epoch": 0.9552446584984133, "grad_norm": 2.138246561319409, "learning_rate": 3.2107308952542145e-06, "loss": 0.06961669921875, "step": 110475 }, { "epoch": 0.9552878920199566, "grad_norm": 1.8894628909212658, "learning_rate": 3.2105276648463015e-06, "loss": 0.15765838623046874, "step": 110480 }, { "epoch": 0.9553311255414999, "grad_norm": 26.360538663645986, "learning_rate": 3.210324433467453e-06, "loss": 0.11449871063232422, "step": 110485 }, { "epoch": 0.9553743590630431, "grad_norm": 1.0306951169007479, "learning_rate": 3.2101212011186076e-06, "loss": 0.1854705810546875, "step": 110490 }, { "epoch": 0.9554175925845864, "grad_norm": 7.344448768743419, "learning_rate": 3.2099179678007015e-06, "loss": 0.3610076904296875, "step": 110495 }, { "epoch": 0.9554608261061297, "grad_norm": 9.657457931874909, "learning_rate": 3.209714733514672e-06, "loss": 0.08954620361328125, "step": 110500 }, { "epoch": 0.9555040596276729, "grad_norm": 4.1275718522578835, "learning_rate": 3.2095114982614574e-06, "loss": 0.05399932861328125, "step": 110505 }, { "epoch": 0.9555472931492162, "grad_norm": 36.41336053248058, "learning_rate": 3.2093082620419934e-06, "loss": 0.484735107421875, "step": 110510 }, { "epoch": 0.9555905266707594, "grad_norm": 0.41635954942027337, "learning_rate": 3.2091050248572193e-06, "loss": 0.1916261672973633, "step": 110515 }, { "epoch": 0.9556337601923027, "grad_norm": 4.051015747893547, "learning_rate": 3.20890178670807e-06, "loss": 0.0775360107421875, "step": 110520 }, { "epoch": 0.955676993713846, "grad_norm": 12.294312094911314, "learning_rate": 3.208698547595485e-06, "loss": 0.021180343627929688, "step": 110525 }, { "epoch": 0.9557202272353892, "grad_norm": 8.331195950738826, "learning_rate": 3.208495307520401e-06, "loss": 0.0668426513671875, "step": 110530 }, { "epoch": 0.9557634607569325, "grad_norm": 0.4503530076200507, "learning_rate": 3.2082920664837546e-06, "loss": 0.06378326416015626, "step": 110535 }, { "epoch": 0.9558066942784758, "grad_norm": 12.162586153215086, "learning_rate": 3.208088824486483e-06, "loss": 0.39974250793457033, "step": 110540 }, { "epoch": 0.955849927800019, "grad_norm": 3.475085719778009, "learning_rate": 3.207885581529525e-06, "loss": 0.118096923828125, "step": 110545 }, { "epoch": 0.9558931613215623, "grad_norm": 4.155261889385248, "learning_rate": 3.2076823376138164e-06, "loss": 0.07938041687011718, "step": 110550 }, { "epoch": 0.9559363948431056, "grad_norm": 0.3162468409451337, "learning_rate": 3.2074790927402955e-06, "loss": 0.12900238037109374, "step": 110555 }, { "epoch": 0.9559796283646488, "grad_norm": 0.023987885991665323, "learning_rate": 3.2072758469098998e-06, "loss": 0.038820648193359376, "step": 110560 }, { "epoch": 0.9560228618861921, "grad_norm": 5.926754806407999, "learning_rate": 3.2070726001235655e-06, "loss": 0.241064453125, "step": 110565 }, { "epoch": 0.9560660954077354, "grad_norm": 11.794867394240509, "learning_rate": 3.206869352382231e-06, "loss": 0.11514511108398437, "step": 110570 }, { "epoch": 0.9561093289292786, "grad_norm": 6.538331809380305, "learning_rate": 3.2066661036868337e-06, "loss": 0.14952564239501953, "step": 110575 }, { "epoch": 0.9561525624508219, "grad_norm": 0.6045083513083561, "learning_rate": 3.20646285403831e-06, "loss": 0.11164588928222656, "step": 110580 }, { "epoch": 0.9561957959723651, "grad_norm": 1.9800750455404479, "learning_rate": 3.206259603437598e-06, "loss": 0.0654754638671875, "step": 110585 }, { "epoch": 0.9562390294939084, "grad_norm": 2.848818229865349, "learning_rate": 3.206056351885635e-06, "loss": 0.0239654541015625, "step": 110590 }, { "epoch": 0.9562822630154516, "grad_norm": 4.520477948144509, "learning_rate": 3.2058530993833585e-06, "loss": 0.038465118408203124, "step": 110595 }, { "epoch": 0.956325496536995, "grad_norm": 17.824859552779337, "learning_rate": 3.2056498459317055e-06, "loss": 0.1968385696411133, "step": 110600 }, { "epoch": 0.9563687300585382, "grad_norm": 0.40391306125539805, "learning_rate": 3.2054465915316135e-06, "loss": 0.14229278564453124, "step": 110605 }, { "epoch": 0.9564119635800814, "grad_norm": 1.251627790808717, "learning_rate": 3.2052433361840208e-06, "loss": 0.16332473754882812, "step": 110610 }, { "epoch": 0.9564551971016247, "grad_norm": 0.22991071326248164, "learning_rate": 3.2050400798898624e-06, "loss": 0.024468231201171874, "step": 110615 }, { "epoch": 0.956498430623168, "grad_norm": 54.76405331204136, "learning_rate": 3.204836822650078e-06, "loss": 0.0896514892578125, "step": 110620 }, { "epoch": 0.9565416641447112, "grad_norm": 4.991560316431429, "learning_rate": 3.204633564465605e-06, "loss": 0.35123491287231445, "step": 110625 }, { "epoch": 0.9565848976662545, "grad_norm": 22.675192003734896, "learning_rate": 3.204430305337379e-06, "loss": 0.13383560180664061, "step": 110630 }, { "epoch": 0.9566281311877978, "grad_norm": 0.029144001371406575, "learning_rate": 3.2042270452663393e-06, "loss": 0.10246391296386718, "step": 110635 }, { "epoch": 0.956671364709341, "grad_norm": 3.3999220247769686, "learning_rate": 3.2040237842534226e-06, "loss": 0.16843032836914062, "step": 110640 }, { "epoch": 0.9567145982308843, "grad_norm": 14.421248964023638, "learning_rate": 3.2038205222995653e-06, "loss": 0.08077163696289062, "step": 110645 }, { "epoch": 0.9567578317524276, "grad_norm": 3.8623390664777113, "learning_rate": 3.203617259405706e-06, "loss": 0.1442108154296875, "step": 110650 }, { "epoch": 0.9568010652739708, "grad_norm": 0.9323134111368125, "learning_rate": 3.2034139955727825e-06, "loss": 0.038275146484375, "step": 110655 }, { "epoch": 0.9568442987955141, "grad_norm": 1.0057505018620625, "learning_rate": 3.203210730801731e-06, "loss": 0.2602867126464844, "step": 110660 }, { "epoch": 0.9568875323170574, "grad_norm": 0.24721008164936312, "learning_rate": 3.2030074650934904e-06, "loss": 0.00967559814453125, "step": 110665 }, { "epoch": 0.9569307658386006, "grad_norm": 0.04760444036388584, "learning_rate": 3.2028041984489968e-06, "loss": 0.20412216186523438, "step": 110670 }, { "epoch": 0.9569739993601439, "grad_norm": 7.383892346294666, "learning_rate": 3.202600930869188e-06, "loss": 0.19112625122070312, "step": 110675 }, { "epoch": 0.9570172328816872, "grad_norm": 0.5391383423370749, "learning_rate": 3.2023976623550015e-06, "loss": 0.0495880126953125, "step": 110680 }, { "epoch": 0.9570604664032304, "grad_norm": 22.09854963007559, "learning_rate": 3.2021943929073747e-06, "loss": 0.29410400390625, "step": 110685 }, { "epoch": 0.9571036999247736, "grad_norm": 0.7266625293236224, "learning_rate": 3.2019911225272464e-06, "loss": 0.13676300048828124, "step": 110690 }, { "epoch": 0.957146933446317, "grad_norm": 15.471374962470291, "learning_rate": 3.201787851215552e-06, "loss": 0.19927215576171875, "step": 110695 }, { "epoch": 0.9571901669678602, "grad_norm": 12.4034157593092, "learning_rate": 3.2015845789732304e-06, "loss": 0.036833000183105466, "step": 110700 }, { "epoch": 0.9572334004894034, "grad_norm": 12.190889310355063, "learning_rate": 3.2013813058012176e-06, "loss": 0.17545318603515625, "step": 110705 }, { "epoch": 0.9572766340109468, "grad_norm": 15.514543950972357, "learning_rate": 3.2011780317004518e-06, "loss": 0.3004608154296875, "step": 110710 }, { "epoch": 0.95731986753249, "grad_norm": 25.46226157619388, "learning_rate": 3.2009747566718723e-06, "loss": 0.3383796691894531, "step": 110715 }, { "epoch": 0.9573631010540332, "grad_norm": 1.0833795967628752, "learning_rate": 3.200771480716414e-06, "loss": 0.19077701568603517, "step": 110720 }, { "epoch": 0.9574063345755766, "grad_norm": 40.75961435942934, "learning_rate": 3.200568203835015e-06, "loss": 0.1110626220703125, "step": 110725 }, { "epoch": 0.9574495680971198, "grad_norm": 1.3166608690723152, "learning_rate": 3.200364926028614e-06, "loss": 0.3377994537353516, "step": 110730 }, { "epoch": 0.957492801618663, "grad_norm": 0.2883034554323189, "learning_rate": 3.200161647298147e-06, "loss": 0.08128433227539063, "step": 110735 }, { "epoch": 0.9575360351402064, "grad_norm": 15.42642194913792, "learning_rate": 3.1999583676445524e-06, "loss": 0.119415283203125, "step": 110740 }, { "epoch": 0.9575792686617496, "grad_norm": 3.2130686114270666, "learning_rate": 3.199755087068768e-06, "loss": 0.0387176513671875, "step": 110745 }, { "epoch": 0.9576225021832928, "grad_norm": 6.7361510518966154, "learning_rate": 3.1995518055717305e-06, "loss": 0.1526958465576172, "step": 110750 }, { "epoch": 0.9576657357048362, "grad_norm": 3.6722418902212945, "learning_rate": 3.1993485231543772e-06, "loss": 0.11775341033935546, "step": 110755 }, { "epoch": 0.9577089692263794, "grad_norm": 25.985616756635476, "learning_rate": 3.1991452398176467e-06, "loss": 0.41406097412109377, "step": 110760 }, { "epoch": 0.9577522027479226, "grad_norm": 4.140923623539087, "learning_rate": 3.198941955562476e-06, "loss": 0.17923812866210936, "step": 110765 }, { "epoch": 0.9577954362694658, "grad_norm": 0.35352854785908655, "learning_rate": 3.198738670389802e-06, "loss": 0.0442230224609375, "step": 110770 }, { "epoch": 0.9578386697910092, "grad_norm": 0.12560076734647868, "learning_rate": 3.1985353843005633e-06, "loss": 0.2578380584716797, "step": 110775 }, { "epoch": 0.9578819033125524, "grad_norm": 0.6365310649079301, "learning_rate": 3.198332097295697e-06, "loss": 0.04340972900390625, "step": 110780 }, { "epoch": 0.9579251368340956, "grad_norm": 1.1510885089768113, "learning_rate": 3.1981288093761395e-06, "loss": 0.065240478515625, "step": 110785 }, { "epoch": 0.957968370355639, "grad_norm": 4.253420767844587, "learning_rate": 3.19792552054283e-06, "loss": 0.08992691040039062, "step": 110790 }, { "epoch": 0.9580116038771822, "grad_norm": 42.2141827859285, "learning_rate": 3.1977222307967063e-06, "loss": 0.19419021606445314, "step": 110795 }, { "epoch": 0.9580548373987254, "grad_norm": 4.715727734027359, "learning_rate": 3.197518940138705e-06, "loss": 0.06250686645507812, "step": 110800 }, { "epoch": 0.9580980709202688, "grad_norm": 0.23182099973842427, "learning_rate": 3.197315648569763e-06, "loss": 0.11286392211914062, "step": 110805 }, { "epoch": 0.958141304441812, "grad_norm": 7.519147642626269, "learning_rate": 3.1971123560908195e-06, "loss": 0.13078155517578124, "step": 110810 }, { "epoch": 0.9581845379633552, "grad_norm": 5.282031218584406, "learning_rate": 3.1969090627028106e-06, "loss": 0.10083942413330078, "step": 110815 }, { "epoch": 0.9582277714848986, "grad_norm": 19.184799643553603, "learning_rate": 3.196705768406674e-06, "loss": 0.09382209777832032, "step": 110820 }, { "epoch": 0.9582710050064418, "grad_norm": 1.1445162881905624, "learning_rate": 3.196502473203349e-06, "loss": 0.025796890258789062, "step": 110825 }, { "epoch": 0.958314238527985, "grad_norm": 5.691591834547065, "learning_rate": 3.196299177093772e-06, "loss": 0.09666023254394532, "step": 110830 }, { "epoch": 0.9583574720495284, "grad_norm": 1.5953238073700546, "learning_rate": 3.1960958800788796e-06, "loss": 0.04929161071777344, "step": 110835 }, { "epoch": 0.9584007055710716, "grad_norm": 7.343556132395507, "learning_rate": 3.1958925821596112e-06, "loss": 0.10548858642578125, "step": 110840 }, { "epoch": 0.9584439390926148, "grad_norm": 27.09397376282731, "learning_rate": 3.195689283336902e-06, "loss": 0.1961181640625, "step": 110845 }, { "epoch": 0.9584871726141582, "grad_norm": 8.369421283965247, "learning_rate": 3.1954859836116927e-06, "loss": 0.20114288330078126, "step": 110850 }, { "epoch": 0.9585304061357014, "grad_norm": 7.335631085732296, "learning_rate": 3.1952826829849187e-06, "loss": 0.11303634643554687, "step": 110855 }, { "epoch": 0.9585736396572446, "grad_norm": 0.24355332241470556, "learning_rate": 3.1950793814575187e-06, "loss": 0.08656940460205079, "step": 110860 }, { "epoch": 0.9586168731787879, "grad_norm": 8.2545387945657, "learning_rate": 3.1948760790304295e-06, "loss": 0.083349609375, "step": 110865 }, { "epoch": 0.9586601067003312, "grad_norm": 2.106468175501637, "learning_rate": 3.194672775704589e-06, "loss": 0.1205474853515625, "step": 110870 }, { "epoch": 0.9587033402218744, "grad_norm": 10.05780195622641, "learning_rate": 3.1944694714809347e-06, "loss": 0.09102020263671876, "step": 110875 }, { "epoch": 0.9587465737434177, "grad_norm": 0.8968572181376746, "learning_rate": 3.194266166360405e-06, "loss": 0.2574432373046875, "step": 110880 }, { "epoch": 0.958789807264961, "grad_norm": 9.233403144782518, "learning_rate": 3.1940628603439356e-06, "loss": 0.18330230712890624, "step": 110885 }, { "epoch": 0.9588330407865042, "grad_norm": 8.098428670944088, "learning_rate": 3.193859553432467e-06, "loss": 0.5719165802001953, "step": 110890 }, { "epoch": 0.9588762743080474, "grad_norm": 1.0083664419196334, "learning_rate": 3.193656245626935e-06, "loss": 0.44130096435546873, "step": 110895 }, { "epoch": 0.9589195078295908, "grad_norm": 14.845040201186459, "learning_rate": 3.1934529369282765e-06, "loss": 0.13099136352539062, "step": 110900 }, { "epoch": 0.958962741351134, "grad_norm": 0.4131571252685712, "learning_rate": 3.1932496273374295e-06, "loss": 0.2974740982055664, "step": 110905 }, { "epoch": 0.9590059748726772, "grad_norm": 7.0348270612754185, "learning_rate": 3.193046316855334e-06, "loss": 0.2827880859375, "step": 110910 }, { "epoch": 0.9590492083942206, "grad_norm": 0.678931049852465, "learning_rate": 3.1928430054829254e-06, "loss": 0.05395698547363281, "step": 110915 }, { "epoch": 0.9590924419157638, "grad_norm": 4.739818073607133, "learning_rate": 3.192639693221142e-06, "loss": 0.29756317138671873, "step": 110920 }, { "epoch": 0.959135675437307, "grad_norm": 4.675428821892834, "learning_rate": 3.192436380070922e-06, "loss": 0.414764404296875, "step": 110925 }, { "epoch": 0.9591789089588504, "grad_norm": 1.0181001377598138, "learning_rate": 3.192233066033201e-06, "loss": 0.05039138793945312, "step": 110930 }, { "epoch": 0.9592221424803936, "grad_norm": 7.671894411406306, "learning_rate": 3.192029751108919e-06, "loss": 0.1516876220703125, "step": 110935 }, { "epoch": 0.9592653760019368, "grad_norm": 8.717341419024997, "learning_rate": 3.1918264352990127e-06, "loss": 0.09448165893554687, "step": 110940 }, { "epoch": 0.9593086095234801, "grad_norm": 13.12701543777508, "learning_rate": 3.1916231186044197e-06, "loss": 0.05257720947265625, "step": 110945 }, { "epoch": 0.9593518430450234, "grad_norm": 8.866510693065635, "learning_rate": 3.191419801026078e-06, "loss": 0.3640045166015625, "step": 110950 }, { "epoch": 0.9593950765665666, "grad_norm": 7.767153316872274, "learning_rate": 3.1912164825649247e-06, "loss": 0.15260696411132812, "step": 110955 }, { "epoch": 0.9594383100881099, "grad_norm": 17.611929932670183, "learning_rate": 3.1910131632218977e-06, "loss": 0.09300537109375, "step": 110960 }, { "epoch": 0.9594815436096532, "grad_norm": 0.33126725980109495, "learning_rate": 3.190809842997935e-06, "loss": 0.07114791870117188, "step": 110965 }, { "epoch": 0.9595247771311964, "grad_norm": 14.07325753054633, "learning_rate": 3.1906065218939745e-06, "loss": 0.23495101928710938, "step": 110970 }, { "epoch": 0.9595680106527397, "grad_norm": 59.25966217769865, "learning_rate": 3.190403199910954e-06, "loss": 0.13221588134765624, "step": 110975 }, { "epoch": 0.959611244174283, "grad_norm": 23.576394449403168, "learning_rate": 3.1901998770498096e-06, "loss": 0.13107452392578126, "step": 110980 }, { "epoch": 0.9596544776958262, "grad_norm": 0.905929456789742, "learning_rate": 3.1899965533114807e-06, "loss": 0.0408538818359375, "step": 110985 }, { "epoch": 0.9596977112173695, "grad_norm": 0.5863848574369052, "learning_rate": 3.1897932286969048e-06, "loss": 0.2447296142578125, "step": 110990 }, { "epoch": 0.9597409447389128, "grad_norm": 15.721684790783678, "learning_rate": 3.189589903207019e-06, "loss": 0.18093376159667968, "step": 110995 }, { "epoch": 0.959784178260456, "grad_norm": 3.0375329331538117, "learning_rate": 3.1893865768427613e-06, "loss": 0.02251014709472656, "step": 111000 }, { "epoch": 0.9598274117819993, "grad_norm": 5.719992935777959, "learning_rate": 3.18918324960507e-06, "loss": 0.12350120544433593, "step": 111005 }, { "epoch": 0.9598706453035426, "grad_norm": 2.6559607462468597, "learning_rate": 3.1889799214948813e-06, "loss": 0.05792312622070313, "step": 111010 }, { "epoch": 0.9599138788250858, "grad_norm": 2.817106754942598, "learning_rate": 3.1887765925131343e-06, "loss": 0.15328445434570312, "step": 111015 }, { "epoch": 0.9599571123466291, "grad_norm": 4.586897252781191, "learning_rate": 3.1885732626607666e-06, "loss": 0.1262298583984375, "step": 111020 }, { "epoch": 0.9600003458681724, "grad_norm": 1.0015363203143504, "learning_rate": 3.188369931938715e-06, "loss": 0.041710281372070314, "step": 111025 }, { "epoch": 0.9600435793897156, "grad_norm": 0.408905784563543, "learning_rate": 3.1881666003479186e-06, "loss": 0.1389850616455078, "step": 111030 }, { "epoch": 0.9600868129112589, "grad_norm": 1.26911309000746, "learning_rate": 3.1879632678893148e-06, "loss": 0.07881927490234375, "step": 111035 }, { "epoch": 0.9601300464328021, "grad_norm": 2.5247952416860553, "learning_rate": 3.1877599345638405e-06, "loss": 0.1012939453125, "step": 111040 }, { "epoch": 0.9601732799543454, "grad_norm": 0.3054824605474061, "learning_rate": 3.1875566003724335e-06, "loss": 0.05379638671875, "step": 111045 }, { "epoch": 0.9602165134758887, "grad_norm": 6.7471682952771355, "learning_rate": 3.1873532653160326e-06, "loss": 0.21649665832519532, "step": 111050 }, { "epoch": 0.9602597469974319, "grad_norm": 3.725697595062789, "learning_rate": 3.1871499293955755e-06, "loss": 0.0986083984375, "step": 111055 }, { "epoch": 0.9603029805189752, "grad_norm": 7.920986130654675, "learning_rate": 3.1869465926119986e-06, "loss": 0.0395751953125, "step": 111060 }, { "epoch": 0.9603462140405185, "grad_norm": 0.151367328223591, "learning_rate": 3.186743254966241e-06, "loss": 0.08073654174804687, "step": 111065 }, { "epoch": 0.9603894475620617, "grad_norm": 4.397903562545854, "learning_rate": 3.1865399164592395e-06, "loss": 0.1470672607421875, "step": 111070 }, { "epoch": 0.960432681083605, "grad_norm": 0.8816973600962419, "learning_rate": 3.186336577091932e-06, "loss": 0.15635719299316406, "step": 111075 }, { "epoch": 0.9604759146051483, "grad_norm": 2.069219387126587, "learning_rate": 3.1861332368652583e-06, "loss": 0.33481178283691404, "step": 111080 }, { "epoch": 0.9605191481266915, "grad_norm": 0.5425999661865902, "learning_rate": 3.1859298957801538e-06, "loss": 0.1555755615234375, "step": 111085 }, { "epoch": 0.9605623816482348, "grad_norm": 5.554360990166647, "learning_rate": 3.1857265538375563e-06, "loss": 0.1667572021484375, "step": 111090 }, { "epoch": 0.960605615169778, "grad_norm": 7.923257441577377, "learning_rate": 3.1855232110384056e-06, "loss": 0.056482696533203126, "step": 111095 }, { "epoch": 0.9606488486913213, "grad_norm": 58.805179457991876, "learning_rate": 3.1853198673836373e-06, "loss": 0.27817459106445314, "step": 111100 }, { "epoch": 0.9606920822128646, "grad_norm": 0.23131866089227612, "learning_rate": 3.18511652287419e-06, "loss": 0.1246337890625, "step": 111105 }, { "epoch": 0.9607353157344078, "grad_norm": 3.3087331662942367, "learning_rate": 3.184913177511002e-06, "loss": 0.06244220733642578, "step": 111110 }, { "epoch": 0.9607785492559511, "grad_norm": 0.30175419924163527, "learning_rate": 3.184709831295011e-06, "loss": 0.2979743957519531, "step": 111115 }, { "epoch": 0.9608217827774943, "grad_norm": 19.896231201753636, "learning_rate": 3.184506484227155e-06, "loss": 0.13078155517578124, "step": 111120 }, { "epoch": 0.9608650162990376, "grad_norm": 21.40022401344008, "learning_rate": 3.184303136308371e-06, "loss": 0.42562103271484375, "step": 111125 }, { "epoch": 0.9609082498205809, "grad_norm": 15.83263445143739, "learning_rate": 3.1840997875395963e-06, "loss": 0.2741851806640625, "step": 111130 }, { "epoch": 0.9609514833421241, "grad_norm": 14.782838183509542, "learning_rate": 3.1838964379217707e-06, "loss": 0.37445220947265623, "step": 111135 }, { "epoch": 0.9609947168636674, "grad_norm": 5.843124904044339, "learning_rate": 3.18369308745583e-06, "loss": 0.027762222290039062, "step": 111140 }, { "epoch": 0.9610379503852107, "grad_norm": 2.8374525918244298, "learning_rate": 3.1834897361427146e-06, "loss": 0.0810882568359375, "step": 111145 }, { "epoch": 0.9610811839067539, "grad_norm": 4.455081377913945, "learning_rate": 3.18328638398336e-06, "loss": 0.05462722778320313, "step": 111150 }, { "epoch": 0.9611244174282972, "grad_norm": 6.55017778833958, "learning_rate": 3.1830830309787043e-06, "loss": 0.12686767578125, "step": 111155 }, { "epoch": 0.9611676509498405, "grad_norm": 5.400831156806025, "learning_rate": 3.182879677129686e-06, "loss": 0.47566375732421873, "step": 111160 }, { "epoch": 0.9612108844713837, "grad_norm": 32.071265511654644, "learning_rate": 3.182676322437243e-06, "loss": 0.19027023315429686, "step": 111165 }, { "epoch": 0.961254117992927, "grad_norm": 0.2161086627103608, "learning_rate": 3.1824729669023124e-06, "loss": 0.06832275390625, "step": 111170 }, { "epoch": 0.9612973515144703, "grad_norm": 2.9396096954563977, "learning_rate": 3.1822696105258335e-06, "loss": 0.05463104248046875, "step": 111175 }, { "epoch": 0.9613405850360135, "grad_norm": 0.7599290098618248, "learning_rate": 3.1820662533087434e-06, "loss": 0.0306396484375, "step": 111180 }, { "epoch": 0.9613838185575568, "grad_norm": 6.1041473199535226, "learning_rate": 3.181862895251978e-06, "loss": 0.06852340698242188, "step": 111185 }, { "epoch": 0.9614270520791001, "grad_norm": 7.1942917787374006, "learning_rate": 3.181659536356479e-06, "loss": 0.048075103759765626, "step": 111190 }, { "epoch": 0.9614702856006433, "grad_norm": 2.0715674930980135, "learning_rate": 3.1814561766231814e-06, "loss": 0.10370330810546875, "step": 111195 }, { "epoch": 0.9615135191221866, "grad_norm": 34.82634784426655, "learning_rate": 3.1812528160530237e-06, "loss": 0.378656005859375, "step": 111200 }, { "epoch": 0.9615567526437299, "grad_norm": 12.158057369427683, "learning_rate": 3.1810494546469453e-06, "loss": 0.13988037109375, "step": 111205 }, { "epoch": 0.9615999861652731, "grad_norm": 11.269695328159512, "learning_rate": 3.1808460924058817e-06, "loss": 0.2504547119140625, "step": 111210 }, { "epoch": 0.9616432196868163, "grad_norm": 1.5173342733682333, "learning_rate": 3.180642729330772e-06, "loss": 0.1090606689453125, "step": 111215 }, { "epoch": 0.9616864532083597, "grad_norm": 0.2651337972911044, "learning_rate": 3.180439365422554e-06, "loss": 0.06589736938476562, "step": 111220 }, { "epoch": 0.9617296867299029, "grad_norm": 0.7472588174405743, "learning_rate": 3.1802360006821654e-06, "loss": 0.0744659423828125, "step": 111225 }, { "epoch": 0.9617729202514461, "grad_norm": 15.00565115381793, "learning_rate": 3.1800326351105454e-06, "loss": 0.1658233642578125, "step": 111230 }, { "epoch": 0.9618161537729895, "grad_norm": 0.4349437496324852, "learning_rate": 3.1798292687086298e-06, "loss": 0.05116195678710937, "step": 111235 }, { "epoch": 0.9618593872945327, "grad_norm": 0.7196070399393145, "learning_rate": 3.179625901477357e-06, "loss": 0.05856781005859375, "step": 111240 }, { "epoch": 0.9619026208160759, "grad_norm": 26.875834165546, "learning_rate": 3.1794225334176668e-06, "loss": 0.3813629150390625, "step": 111245 }, { "epoch": 0.9619458543376193, "grad_norm": 18.97087166374117, "learning_rate": 3.179219164530495e-06, "loss": 0.15091590881347655, "step": 111250 }, { "epoch": 0.9619890878591625, "grad_norm": 28.28639436168117, "learning_rate": 3.17901579481678e-06, "loss": 0.207879638671875, "step": 111255 }, { "epoch": 0.9620323213807057, "grad_norm": 7.079129694164568, "learning_rate": 3.1788124242774606e-06, "loss": 0.1738056182861328, "step": 111260 }, { "epoch": 0.962075554902249, "grad_norm": 48.28598452414642, "learning_rate": 3.178609052913474e-06, "loss": 0.5263263702392578, "step": 111265 }, { "epoch": 0.9621187884237923, "grad_norm": 2.9621012723480047, "learning_rate": 3.1784056807257575e-06, "loss": 0.34731330871582033, "step": 111270 }, { "epoch": 0.9621620219453355, "grad_norm": 11.018249709642173, "learning_rate": 3.1782023077152506e-06, "loss": 0.08703327178955078, "step": 111275 }, { "epoch": 0.9622052554668789, "grad_norm": 12.327930929035762, "learning_rate": 3.1779989338828893e-06, "loss": 0.130792236328125, "step": 111280 }, { "epoch": 0.9622484889884221, "grad_norm": 19.4723631550267, "learning_rate": 3.1777955592296137e-06, "loss": 0.3306529998779297, "step": 111285 }, { "epoch": 0.9622917225099653, "grad_norm": 0.1263856768871394, "learning_rate": 3.177592183756361e-06, "loss": 0.18765716552734374, "step": 111290 }, { "epoch": 0.9623349560315085, "grad_norm": 11.384877705788062, "learning_rate": 3.1773888074640678e-06, "loss": 0.07459487915039062, "step": 111295 }, { "epoch": 0.9623781895530519, "grad_norm": 5.051606130517039, "learning_rate": 3.1771854303536732e-06, "loss": 0.0472076416015625, "step": 111300 }, { "epoch": 0.9624214230745951, "grad_norm": 0.6494430704843103, "learning_rate": 3.1769820524261155e-06, "loss": 0.02203216552734375, "step": 111305 }, { "epoch": 0.9624646565961383, "grad_norm": 0.41608508165735325, "learning_rate": 3.1767786736823327e-06, "loss": 0.333599853515625, "step": 111310 }, { "epoch": 0.9625078901176817, "grad_norm": 4.249437145215692, "learning_rate": 3.1765752941232617e-06, "loss": 0.06044769287109375, "step": 111315 }, { "epoch": 0.9625511236392249, "grad_norm": 0.379765014057841, "learning_rate": 3.176371913749841e-06, "loss": 0.383428955078125, "step": 111320 }, { "epoch": 0.9625943571607681, "grad_norm": 15.141389627944008, "learning_rate": 3.1761685325630084e-06, "loss": 0.10537071228027343, "step": 111325 }, { "epoch": 0.9626375906823115, "grad_norm": 12.198090670400978, "learning_rate": 3.175965150563702e-06, "loss": 0.05383720397949219, "step": 111330 }, { "epoch": 0.9626808242038547, "grad_norm": 7.184702659333605, "learning_rate": 3.1757617677528607e-06, "loss": 0.11869354248046875, "step": 111335 }, { "epoch": 0.9627240577253979, "grad_norm": 10.669452598886181, "learning_rate": 3.1755583841314213e-06, "loss": 0.15491180419921874, "step": 111340 }, { "epoch": 0.9627672912469413, "grad_norm": 9.018044218153024, "learning_rate": 3.1753549997003214e-06, "loss": 0.02490081787109375, "step": 111345 }, { "epoch": 0.9628105247684845, "grad_norm": 1.6660545740661736, "learning_rate": 3.175151614460501e-06, "loss": 0.09627685546875, "step": 111350 }, { "epoch": 0.9628537582900277, "grad_norm": 24.364742111570948, "learning_rate": 3.1749482284128954e-06, "loss": 0.13072662353515624, "step": 111355 }, { "epoch": 0.9628969918115711, "grad_norm": 4.614681211475359, "learning_rate": 3.1747448415584444e-06, "loss": 0.0521026611328125, "step": 111360 }, { "epoch": 0.9629402253331143, "grad_norm": 2.543492565029426, "learning_rate": 3.174541453898087e-06, "loss": 0.059150314331054686, "step": 111365 }, { "epoch": 0.9629834588546575, "grad_norm": 31.73572017339651, "learning_rate": 3.1743380654327586e-06, "loss": 0.293963623046875, "step": 111370 }, { "epoch": 0.9630266923762009, "grad_norm": 12.07482434959601, "learning_rate": 3.174134676163399e-06, "loss": 0.06397705078125, "step": 111375 }, { "epoch": 0.9630699258977441, "grad_norm": 1.0554173292102345, "learning_rate": 3.1739312860909455e-06, "loss": 0.06105117797851563, "step": 111380 }, { "epoch": 0.9631131594192873, "grad_norm": 0.9854917422856988, "learning_rate": 3.173727895216336e-06, "loss": 0.5562156677246094, "step": 111385 }, { "epoch": 0.9631563929408306, "grad_norm": 31.16884576090797, "learning_rate": 3.173524503540509e-06, "loss": 0.249951171875, "step": 111390 }, { "epoch": 0.9631996264623739, "grad_norm": 3.5319693647320145, "learning_rate": 3.1733211110644022e-06, "loss": 0.15518646240234374, "step": 111395 }, { "epoch": 0.9632428599839171, "grad_norm": 1.8152464742860175, "learning_rate": 3.1731177177889544e-06, "loss": 0.17042083740234376, "step": 111400 }, { "epoch": 0.9632860935054604, "grad_norm": 3.2649357932700176, "learning_rate": 3.172914323715102e-06, "loss": 0.4238275527954102, "step": 111405 }, { "epoch": 0.9633293270270037, "grad_norm": 13.678252680661641, "learning_rate": 3.1727109288437837e-06, "loss": 0.26722412109375, "step": 111410 }, { "epoch": 0.9633725605485469, "grad_norm": 0.9274546858448712, "learning_rate": 3.1725075331759387e-06, "loss": 0.054864501953125, "step": 111415 }, { "epoch": 0.9634157940700901, "grad_norm": 5.786689173490908, "learning_rate": 3.172304136712505e-06, "loss": 0.08592910766601562, "step": 111420 }, { "epoch": 0.9634590275916335, "grad_norm": 0.493783833234649, "learning_rate": 3.1721007394544183e-06, "loss": 0.02523345947265625, "step": 111425 }, { "epoch": 0.9635022611131767, "grad_norm": 1.0396090758513166, "learning_rate": 3.171897341402619e-06, "loss": 0.048541259765625, "step": 111430 }, { "epoch": 0.96354549463472, "grad_norm": 15.799209298036358, "learning_rate": 3.171693942558044e-06, "loss": 0.14490814208984376, "step": 111435 }, { "epoch": 0.9635887281562633, "grad_norm": 3.846193580270547, "learning_rate": 3.1714905429216314e-06, "loss": 0.04669189453125, "step": 111440 }, { "epoch": 0.9636319616778065, "grad_norm": 0.028528744206607313, "learning_rate": 3.1712871424943206e-06, "loss": 0.04774818420410156, "step": 111445 }, { "epoch": 0.9636751951993497, "grad_norm": 29.55908379997806, "learning_rate": 3.1710837412770486e-06, "loss": 0.5194823265075683, "step": 111450 }, { "epoch": 0.9637184287208931, "grad_norm": 3.03038789685753, "learning_rate": 3.1708803392707527e-06, "loss": 0.20155372619628906, "step": 111455 }, { "epoch": 0.9637616622424363, "grad_norm": 60.64784073143553, "learning_rate": 3.1706769364763724e-06, "loss": 0.1479400634765625, "step": 111460 }, { "epoch": 0.9638048957639795, "grad_norm": 0.0865928521377501, "learning_rate": 3.1704735328948442e-06, "loss": 0.10044746398925782, "step": 111465 }, { "epoch": 0.9638481292855228, "grad_norm": 7.2349581969367875, "learning_rate": 3.170270128527108e-06, "loss": 0.19879608154296874, "step": 111470 }, { "epoch": 0.9638913628070661, "grad_norm": 19.275345368682387, "learning_rate": 3.1700667233741006e-06, "loss": 0.0644012451171875, "step": 111475 }, { "epoch": 0.9639345963286093, "grad_norm": 21.331717166301786, "learning_rate": 3.1698633174367613e-06, "loss": 0.078021240234375, "step": 111480 }, { "epoch": 0.9639778298501526, "grad_norm": 9.67011877151677, "learning_rate": 3.169659910716027e-06, "loss": 0.063360595703125, "step": 111485 }, { "epoch": 0.9640210633716959, "grad_norm": 27.70432872006361, "learning_rate": 3.1694565032128354e-06, "loss": 0.3993247985839844, "step": 111490 }, { "epoch": 0.9640642968932391, "grad_norm": 47.64896043882806, "learning_rate": 3.1692530949281255e-06, "loss": 0.21357269287109376, "step": 111495 }, { "epoch": 0.9641075304147824, "grad_norm": 24.836191615294613, "learning_rate": 3.1690496858628362e-06, "loss": 0.07510986328125, "step": 111500 }, { "epoch": 0.9641507639363257, "grad_norm": 0.38855111997323794, "learning_rate": 3.168846276017904e-06, "loss": 0.10218582153320313, "step": 111505 }, { "epoch": 0.9641939974578689, "grad_norm": 20.397617937381888, "learning_rate": 3.1686428653942676e-06, "loss": 0.12463035583496093, "step": 111510 }, { "epoch": 0.9642372309794122, "grad_norm": 14.768819012817104, "learning_rate": 3.168439453992866e-06, "loss": 0.07274856567382812, "step": 111515 }, { "epoch": 0.9642804645009555, "grad_norm": 5.284423578920956, "learning_rate": 3.168236041814636e-06, "loss": 0.09962310791015624, "step": 111520 }, { "epoch": 0.9643236980224987, "grad_norm": 0.044729938131704484, "learning_rate": 3.1680326288605158e-06, "loss": 0.036957931518554685, "step": 111525 }, { "epoch": 0.964366931544042, "grad_norm": 21.771480195915583, "learning_rate": 3.167829215131445e-06, "loss": 0.290472412109375, "step": 111530 }, { "epoch": 0.9644101650655853, "grad_norm": 8.001638563621812, "learning_rate": 3.1676258006283595e-06, "loss": 0.045380783081054685, "step": 111535 }, { "epoch": 0.9644533985871285, "grad_norm": 12.578705325790573, "learning_rate": 3.1674223853521994e-06, "loss": 0.20870437622070312, "step": 111540 }, { "epoch": 0.9644966321086718, "grad_norm": 3.868108818105837, "learning_rate": 3.1672189693039027e-06, "loss": 0.02135772705078125, "step": 111545 }, { "epoch": 0.9645398656302151, "grad_norm": 1.630618406625462, "learning_rate": 3.1670155524844054e-06, "loss": 0.37011566162109377, "step": 111550 }, { "epoch": 0.9645830991517583, "grad_norm": 28.567434548703265, "learning_rate": 3.1668121348946473e-06, "loss": 0.399530029296875, "step": 111555 }, { "epoch": 0.9646263326733016, "grad_norm": 0.7632349334868209, "learning_rate": 3.1666087165355672e-06, "loss": 0.05325164794921875, "step": 111560 }, { "epoch": 0.9646695661948448, "grad_norm": 0.5661808924218537, "learning_rate": 3.1664052974081025e-06, "loss": 0.12611236572265624, "step": 111565 }, { "epoch": 0.9647127997163881, "grad_norm": 9.733945935101103, "learning_rate": 3.166201877513191e-06, "loss": 0.06286087036132812, "step": 111570 }, { "epoch": 0.9647560332379314, "grad_norm": 37.46253069564221, "learning_rate": 3.1659984568517706e-06, "loss": 0.37688369750976564, "step": 111575 }, { "epoch": 0.9647992667594746, "grad_norm": 0.6409001033261402, "learning_rate": 3.1657950354247804e-06, "loss": 0.10157623291015624, "step": 111580 }, { "epoch": 0.9648425002810179, "grad_norm": 5.8836765255030725, "learning_rate": 3.1655916132331575e-06, "loss": 0.135308837890625, "step": 111585 }, { "epoch": 0.9648857338025612, "grad_norm": 0.08640360799576328, "learning_rate": 3.1653881902778416e-06, "loss": 0.0607269287109375, "step": 111590 }, { "epoch": 0.9649289673241044, "grad_norm": 1.3159513727165817, "learning_rate": 3.16518476655977e-06, "loss": 0.455010986328125, "step": 111595 }, { "epoch": 0.9649722008456477, "grad_norm": 6.161699352044521, "learning_rate": 3.1649813420798803e-06, "loss": 0.04412384033203125, "step": 111600 }, { "epoch": 0.965015434367191, "grad_norm": 5.009293349064166, "learning_rate": 3.164777916839112e-06, "loss": 0.051715850830078125, "step": 111605 }, { "epoch": 0.9650586678887342, "grad_norm": 1.163072494935025, "learning_rate": 3.1645744908384014e-06, "loss": 0.1844329833984375, "step": 111610 }, { "epoch": 0.9651019014102775, "grad_norm": 0.5246402781236668, "learning_rate": 3.1643710640786883e-06, "loss": 0.015523529052734375, "step": 111615 }, { "epoch": 0.9651451349318207, "grad_norm": 0.7563905599098358, "learning_rate": 3.1641676365609108e-06, "loss": 0.0524658203125, "step": 111620 }, { "epoch": 0.965188368453364, "grad_norm": 17.509873426188815, "learning_rate": 3.163964208286007e-06, "loss": 0.2188426971435547, "step": 111625 }, { "epoch": 0.9652316019749073, "grad_norm": 0.6122894937317457, "learning_rate": 3.1637607792549135e-06, "loss": 0.016362762451171874, "step": 111630 }, { "epoch": 0.9652748354964505, "grad_norm": 3.05649798266547, "learning_rate": 3.16355734946857e-06, "loss": 0.04633827209472656, "step": 111635 }, { "epoch": 0.9653180690179938, "grad_norm": 5.406906789133297, "learning_rate": 3.1633539189279153e-06, "loss": 0.14651947021484374, "step": 111640 }, { "epoch": 0.965361302539537, "grad_norm": 0.7228387867516834, "learning_rate": 3.1631504876338865e-06, "loss": 0.059374237060546876, "step": 111645 }, { "epoch": 0.9654045360610803, "grad_norm": 0.17513800096880588, "learning_rate": 3.1629470555874215e-06, "loss": 0.054058074951171875, "step": 111650 }, { "epoch": 0.9654477695826236, "grad_norm": 0.8398743742913107, "learning_rate": 3.1627436227894596e-06, "loss": 0.42395477294921874, "step": 111655 }, { "epoch": 0.9654910031041668, "grad_norm": 35.74326494675246, "learning_rate": 3.162540189240938e-06, "loss": 0.27625732421875, "step": 111660 }, { "epoch": 0.9655342366257101, "grad_norm": 15.271654017260913, "learning_rate": 3.1623367549427954e-06, "loss": 0.09302520751953125, "step": 111665 }, { "epoch": 0.9655774701472534, "grad_norm": 1.161903056728575, "learning_rate": 3.1621333198959707e-06, "loss": 0.05844039916992187, "step": 111670 }, { "epoch": 0.9656207036687966, "grad_norm": 1.4687242558814013, "learning_rate": 3.161929884101401e-06, "loss": 0.2387847900390625, "step": 111675 }, { "epoch": 0.9656639371903399, "grad_norm": 6.413451035046645, "learning_rate": 3.1617264475600248e-06, "loss": 0.062413597106933595, "step": 111680 }, { "epoch": 0.9657071707118832, "grad_norm": 14.924321888185403, "learning_rate": 3.1615230102727814e-06, "loss": 0.304376220703125, "step": 111685 }, { "epoch": 0.9657504042334264, "grad_norm": 29.807631782485675, "learning_rate": 3.1613195722406073e-06, "loss": 0.2135498046875, "step": 111690 }, { "epoch": 0.9657936377549697, "grad_norm": 3.71436000365719, "learning_rate": 3.1611161334644414e-06, "loss": 0.05269317626953125, "step": 111695 }, { "epoch": 0.965836871276513, "grad_norm": 2.762881446333561, "learning_rate": 3.1609126939452226e-06, "loss": 0.17137794494628905, "step": 111700 }, { "epoch": 0.9658801047980562, "grad_norm": 3.9015693626532633, "learning_rate": 3.1607092536838894e-06, "loss": 0.05487270355224609, "step": 111705 }, { "epoch": 0.9659233383195995, "grad_norm": 0.09198958442536617, "learning_rate": 3.160505812681378e-06, "loss": 0.117529296875, "step": 111710 }, { "epoch": 0.9659665718411428, "grad_norm": 8.368820612422155, "learning_rate": 3.1603023709386294e-06, "loss": 0.25947036743164065, "step": 111715 }, { "epoch": 0.966009805362686, "grad_norm": 5.875678509223494, "learning_rate": 3.1600989284565782e-06, "loss": 0.1608978271484375, "step": 111720 }, { "epoch": 0.9660530388842293, "grad_norm": 3.466196052643621, "learning_rate": 3.1598954852361664e-06, "loss": 0.12402381896972656, "step": 111725 }, { "epoch": 0.9660962724057726, "grad_norm": 0.22710054748477612, "learning_rate": 3.1596920412783306e-06, "loss": 0.29480743408203125, "step": 111730 }, { "epoch": 0.9661395059273158, "grad_norm": 9.15433067950387, "learning_rate": 3.15948859658401e-06, "loss": 0.098052978515625, "step": 111735 }, { "epoch": 0.966182739448859, "grad_norm": 1.0701932827292122, "learning_rate": 3.1592851511541418e-06, "loss": 0.01270294189453125, "step": 111740 }, { "epoch": 0.9662259729704024, "grad_norm": 3.197996619138887, "learning_rate": 3.1590817049896632e-06, "loss": 0.0809539794921875, "step": 111745 }, { "epoch": 0.9662692064919456, "grad_norm": 10.510328287819021, "learning_rate": 3.1588782580915143e-06, "loss": 0.0982086181640625, "step": 111750 }, { "epoch": 0.9663124400134888, "grad_norm": 2.3247936832920475, "learning_rate": 3.158674810460634e-06, "loss": 0.4840038299560547, "step": 111755 }, { "epoch": 0.9663556735350322, "grad_norm": 1.2570042742034138, "learning_rate": 3.158471362097958e-06, "loss": 0.03549041748046875, "step": 111760 }, { "epoch": 0.9663989070565754, "grad_norm": 8.614694307519509, "learning_rate": 3.1582679130044273e-06, "loss": 0.1930248260498047, "step": 111765 }, { "epoch": 0.9664421405781186, "grad_norm": 1.3179789333004905, "learning_rate": 3.158064463180979e-06, "loss": 0.27786941528320314, "step": 111770 }, { "epoch": 0.966485374099662, "grad_norm": 7.614792168732223, "learning_rate": 3.157861012628551e-06, "loss": 0.04200439453125, "step": 111775 }, { "epoch": 0.9665286076212052, "grad_norm": 38.21777876265045, "learning_rate": 3.1576575613480814e-06, "loss": 0.22525520324707032, "step": 111780 }, { "epoch": 0.9665718411427484, "grad_norm": 6.480336945097095, "learning_rate": 3.1574541093405095e-06, "loss": 0.14050979614257814, "step": 111785 }, { "epoch": 0.9666150746642918, "grad_norm": 1.1549676017752166, "learning_rate": 3.1572506566067733e-06, "loss": 0.08225936889648437, "step": 111790 }, { "epoch": 0.966658308185835, "grad_norm": 0.786108670222589, "learning_rate": 3.1570472031478113e-06, "loss": 0.05968780517578125, "step": 111795 }, { "epoch": 0.9667015417073782, "grad_norm": 4.694810329492065, "learning_rate": 3.156843748964561e-06, "loss": 0.05988807678222656, "step": 111800 }, { "epoch": 0.9667447752289215, "grad_norm": 101.42754995960755, "learning_rate": 3.156640294057961e-06, "loss": 0.18579864501953125, "step": 111805 }, { "epoch": 0.9667880087504648, "grad_norm": 0.6343330096997891, "learning_rate": 3.1564368384289493e-06, "loss": 0.1677093505859375, "step": 111810 }, { "epoch": 0.966831242272008, "grad_norm": 28.172433636602744, "learning_rate": 3.1562333820784663e-06, "loss": 0.03244094848632813, "step": 111815 }, { "epoch": 0.9668744757935512, "grad_norm": 2.6951427863371853, "learning_rate": 3.1560299250074483e-06, "loss": 0.03879051208496094, "step": 111820 }, { "epoch": 0.9669177093150946, "grad_norm": 35.678522762489706, "learning_rate": 3.155826467216833e-06, "loss": 0.25325698852539064, "step": 111825 }, { "epoch": 0.9669609428366378, "grad_norm": 22.67601290370916, "learning_rate": 3.155623008707561e-06, "loss": 0.140765380859375, "step": 111830 }, { "epoch": 0.967004176358181, "grad_norm": 0.8962903540470353, "learning_rate": 3.1554195494805688e-06, "loss": 0.04200582504272461, "step": 111835 }, { "epoch": 0.9670474098797244, "grad_norm": 0.8337747360970453, "learning_rate": 3.1552160895367945e-06, "loss": 0.0524688720703125, "step": 111840 }, { "epoch": 0.9670906434012676, "grad_norm": 2.0147426728456383, "learning_rate": 3.155012628877179e-06, "loss": 0.5277637481689453, "step": 111845 }, { "epoch": 0.9671338769228108, "grad_norm": 1.4767957889299845, "learning_rate": 3.1548091675026587e-06, "loss": 0.011673736572265624, "step": 111850 }, { "epoch": 0.9671771104443542, "grad_norm": 10.284398719796128, "learning_rate": 3.154605705414171e-06, "loss": 0.256500244140625, "step": 111855 }, { "epoch": 0.9672203439658974, "grad_norm": 3.902410795094992, "learning_rate": 3.154402242612656e-06, "loss": 0.15133819580078126, "step": 111860 }, { "epoch": 0.9672635774874406, "grad_norm": 5.582078756307376, "learning_rate": 3.154198779099052e-06, "loss": 0.10440292358398437, "step": 111865 }, { "epoch": 0.967306811008984, "grad_norm": 3.9740516406901008, "learning_rate": 3.1539953148742963e-06, "loss": 0.08783340454101562, "step": 111870 }, { "epoch": 0.9673500445305272, "grad_norm": 15.717520208659689, "learning_rate": 3.153791849939328e-06, "loss": 0.33439178466796876, "step": 111875 }, { "epoch": 0.9673932780520704, "grad_norm": 2.873382968962089, "learning_rate": 3.153588384295086e-06, "loss": 0.1401458740234375, "step": 111880 }, { "epoch": 0.9674365115736138, "grad_norm": 114.52503476929193, "learning_rate": 3.153384917942507e-06, "loss": 0.24568862915039064, "step": 111885 }, { "epoch": 0.967479745095157, "grad_norm": 0.6165893850512613, "learning_rate": 3.15318145088253e-06, "loss": 0.200372314453125, "step": 111890 }, { "epoch": 0.9675229786167002, "grad_norm": 0.8960894777382598, "learning_rate": 3.1529779831160943e-06, "loss": 0.20200042724609374, "step": 111895 }, { "epoch": 0.9675662121382435, "grad_norm": 0.31347291268896876, "learning_rate": 3.1527745146441383e-06, "loss": 0.10008392333984376, "step": 111900 }, { "epoch": 0.9676094456597868, "grad_norm": 12.532735732444886, "learning_rate": 3.152571045467599e-06, "loss": 0.1606719970703125, "step": 111905 }, { "epoch": 0.96765267918133, "grad_norm": 1.008416299381478, "learning_rate": 3.1523675755874155e-06, "loss": 0.3069114685058594, "step": 111910 }, { "epoch": 0.9676959127028733, "grad_norm": 1.7594992135501966, "learning_rate": 3.152164105004526e-06, "loss": 0.04278106689453125, "step": 111915 }, { "epoch": 0.9677391462244166, "grad_norm": 2.4194700470379775, "learning_rate": 3.151960633719869e-06, "loss": 0.07512741088867188, "step": 111920 }, { "epoch": 0.9677823797459598, "grad_norm": 20.78353322905245, "learning_rate": 3.151757161734384e-06, "loss": 0.11029510498046875, "step": 111925 }, { "epoch": 0.967825613267503, "grad_norm": 1.4851135653411534, "learning_rate": 3.151553689049008e-06, "loss": 0.13405380249023438, "step": 111930 }, { "epoch": 0.9678688467890464, "grad_norm": 9.40986924296542, "learning_rate": 3.151350215664679e-06, "loss": 0.145941162109375, "step": 111935 }, { "epoch": 0.9679120803105896, "grad_norm": 0.07040212937710422, "learning_rate": 3.1511467415823375e-06, "loss": 0.12109107971191406, "step": 111940 }, { "epoch": 0.9679553138321328, "grad_norm": 8.233644228161554, "learning_rate": 3.1509432668029193e-06, "loss": 0.21241912841796876, "step": 111945 }, { "epoch": 0.9679985473536762, "grad_norm": 8.036309806579299, "learning_rate": 3.150739791327364e-06, "loss": 0.24519195556640624, "step": 111950 }, { "epoch": 0.9680417808752194, "grad_norm": 14.067143500085859, "learning_rate": 3.150536315156611e-06, "loss": 0.16845321655273438, "step": 111955 }, { "epoch": 0.9680850143967626, "grad_norm": 0.7165533792329418, "learning_rate": 3.1503328382915983e-06, "loss": 0.013926315307617187, "step": 111960 }, { "epoch": 0.968128247918306, "grad_norm": 11.205171007748389, "learning_rate": 3.1501293607332624e-06, "loss": 0.06540985107421875, "step": 111965 }, { "epoch": 0.9681714814398492, "grad_norm": 2.4971329837111607, "learning_rate": 3.1499258824825443e-06, "loss": 0.16744461059570312, "step": 111970 }, { "epoch": 0.9682147149613924, "grad_norm": 1.1618415909638184, "learning_rate": 3.1497224035403805e-06, "loss": 0.03551673889160156, "step": 111975 }, { "epoch": 0.9682579484829358, "grad_norm": 26.456816682356152, "learning_rate": 3.14951892390771e-06, "loss": 0.219525146484375, "step": 111980 }, { "epoch": 0.968301182004479, "grad_norm": 1.269954013170479, "learning_rate": 3.149315443585472e-06, "loss": 0.05353851318359375, "step": 111985 }, { "epoch": 0.9683444155260222, "grad_norm": 0.34066284139867437, "learning_rate": 3.1491119625746047e-06, "loss": 0.13322601318359376, "step": 111990 }, { "epoch": 0.9683876490475655, "grad_norm": 4.192547570405804, "learning_rate": 3.1489084808760463e-06, "loss": 0.0954681396484375, "step": 111995 }, { "epoch": 0.9684308825691088, "grad_norm": 0.9722101039556812, "learning_rate": 3.148704998490734e-06, "loss": 0.22932510375976561, "step": 112000 }, { "epoch": 0.968474116090652, "grad_norm": 11.054937212990527, "learning_rate": 3.1485015154196075e-06, "loss": 0.19906463623046874, "step": 112005 }, { "epoch": 0.9685173496121953, "grad_norm": 8.918246366490239, "learning_rate": 3.1482980316636064e-06, "loss": 0.1952392578125, "step": 112010 }, { "epoch": 0.9685605831337386, "grad_norm": 4.657074576914169, "learning_rate": 3.1480945472236665e-06, "loss": 0.14338455200195313, "step": 112015 }, { "epoch": 0.9686038166552818, "grad_norm": 1.7702912805173818, "learning_rate": 3.1478910621007283e-06, "loss": 0.09913063049316406, "step": 112020 }, { "epoch": 0.9686470501768251, "grad_norm": 12.263103287106578, "learning_rate": 3.1476875762957303e-06, "loss": 0.35574188232421877, "step": 112025 }, { "epoch": 0.9686902836983684, "grad_norm": 2.722762508213224, "learning_rate": 3.1474840898096095e-06, "loss": 0.33348808288574217, "step": 112030 }, { "epoch": 0.9687335172199116, "grad_norm": 39.33603099574927, "learning_rate": 3.1472806026433042e-06, "loss": 0.2561618804931641, "step": 112035 }, { "epoch": 0.9687767507414549, "grad_norm": 9.789941024505945, "learning_rate": 3.1470771147977552e-06, "loss": 0.043351173400878906, "step": 112040 }, { "epoch": 0.9688199842629982, "grad_norm": 9.442015470116965, "learning_rate": 3.146873626273899e-06, "loss": 0.09213714599609375, "step": 112045 }, { "epoch": 0.9688632177845414, "grad_norm": 1.422449207073487, "learning_rate": 3.146670137072675e-06, "loss": 0.0313079833984375, "step": 112050 }, { "epoch": 0.9689064513060847, "grad_norm": 10.024185212658475, "learning_rate": 3.14646664719502e-06, "loss": 0.0592010498046875, "step": 112055 }, { "epoch": 0.968949684827628, "grad_norm": 0.8555661368975166, "learning_rate": 3.146263156641875e-06, "loss": 0.022349739074707033, "step": 112060 }, { "epoch": 0.9689929183491712, "grad_norm": 19.236906351513422, "learning_rate": 3.1460596654141766e-06, "loss": 0.07822265625, "step": 112065 }, { "epoch": 0.9690361518707145, "grad_norm": 40.07691380101301, "learning_rate": 3.1458561735128644e-06, "loss": 0.24430465698242188, "step": 112070 }, { "epoch": 0.9690793853922577, "grad_norm": 45.058198085174695, "learning_rate": 3.1456526809388772e-06, "loss": 0.1169525146484375, "step": 112075 }, { "epoch": 0.969122618913801, "grad_norm": 0.23117587642914517, "learning_rate": 3.1454491876931514e-06, "loss": 0.06745948791503906, "step": 112080 }, { "epoch": 0.9691658524353443, "grad_norm": 1.1023450086790592, "learning_rate": 3.1452456937766263e-06, "loss": 0.14293365478515624, "step": 112085 }, { "epoch": 0.9692090859568875, "grad_norm": 21.046405882459656, "learning_rate": 3.145042199190242e-06, "loss": 0.17830810546875, "step": 112090 }, { "epoch": 0.9692523194784308, "grad_norm": 2.6877378916885983, "learning_rate": 3.1448387039349357e-06, "loss": 0.1495513916015625, "step": 112095 }, { "epoch": 0.969295552999974, "grad_norm": 0.1485376971909444, "learning_rate": 3.1446352080116464e-06, "loss": 0.23016586303710937, "step": 112100 }, { "epoch": 0.9693387865215173, "grad_norm": 0.14365361498979148, "learning_rate": 3.1444317114213123e-06, "loss": 0.057233428955078124, "step": 112105 }, { "epoch": 0.9693820200430606, "grad_norm": 11.334846932635266, "learning_rate": 3.144228214164871e-06, "loss": 0.34395751953125, "step": 112110 }, { "epoch": 0.9694252535646039, "grad_norm": 7.917835980876091, "learning_rate": 3.144024716243262e-06, "loss": 0.1351837158203125, "step": 112115 }, { "epoch": 0.9694684870861471, "grad_norm": 1.9086693275822577, "learning_rate": 3.143821217657425e-06, "loss": 0.024000930786132812, "step": 112120 }, { "epoch": 0.9695117206076904, "grad_norm": 1.364474486803534, "learning_rate": 3.143617718408296e-06, "loss": 0.08633060455322265, "step": 112125 }, { "epoch": 0.9695549541292336, "grad_norm": 5.361341557245814, "learning_rate": 3.1434142184968157e-06, "loss": 0.18327293395996094, "step": 112130 }, { "epoch": 0.9695981876507769, "grad_norm": 7.609562423269734, "learning_rate": 3.143210717923922e-06, "loss": 0.13351192474365234, "step": 112135 }, { "epoch": 0.9696414211723202, "grad_norm": 1.1663431894155192, "learning_rate": 3.1430072166905514e-06, "loss": 0.1311553955078125, "step": 112140 }, { "epoch": 0.9696846546938634, "grad_norm": 19.927920259996966, "learning_rate": 3.1428037147976453e-06, "loss": 0.07166824340820313, "step": 112145 }, { "epoch": 0.9697278882154067, "grad_norm": 0.14412650615522812, "learning_rate": 3.142600212246141e-06, "loss": 0.007953643798828125, "step": 112150 }, { "epoch": 0.96977112173695, "grad_norm": 5.58666718934744, "learning_rate": 3.142396709036977e-06, "loss": 0.18309326171875, "step": 112155 }, { "epoch": 0.9698143552584932, "grad_norm": 5.434454445713974, "learning_rate": 3.142193205171092e-06, "loss": 0.09711570739746093, "step": 112160 }, { "epoch": 0.9698575887800365, "grad_norm": 9.365522171043045, "learning_rate": 3.141989700649425e-06, "loss": 0.13214492797851562, "step": 112165 }, { "epoch": 0.9699008223015797, "grad_norm": 16.404232605463406, "learning_rate": 3.141786195472913e-06, "loss": 0.211138916015625, "step": 112170 }, { "epoch": 0.969944055823123, "grad_norm": 17.078469351326167, "learning_rate": 3.1415826896424956e-06, "loss": 0.1660064697265625, "step": 112175 }, { "epoch": 0.9699872893446663, "grad_norm": 3.1371354784956766, "learning_rate": 3.1413791831591127e-06, "loss": 0.29605884552001954, "step": 112180 }, { "epoch": 0.9700305228662095, "grad_norm": 10.058349686345233, "learning_rate": 3.1411756760237008e-06, "loss": 0.43728790283203123, "step": 112185 }, { "epoch": 0.9700737563877528, "grad_norm": 1.6923955433590947, "learning_rate": 3.140972168237199e-06, "loss": 0.0413482666015625, "step": 112190 }, { "epoch": 0.9701169899092961, "grad_norm": 6.30586082390097, "learning_rate": 3.1407686598005465e-06, "loss": 0.095794677734375, "step": 112195 }, { "epoch": 0.9701602234308393, "grad_norm": 0.4755059212388937, "learning_rate": 3.1405651507146803e-06, "loss": 0.319110107421875, "step": 112200 }, { "epoch": 0.9702034569523826, "grad_norm": 0.17770148011807452, "learning_rate": 3.1403616409805404e-06, "loss": 0.34000511169433595, "step": 112205 }, { "epoch": 0.9702466904739259, "grad_norm": 0.8021051375186511, "learning_rate": 3.1401581305990657e-06, "loss": 0.10572280883789062, "step": 112210 }, { "epoch": 0.9702899239954691, "grad_norm": 4.79578104870636, "learning_rate": 3.139954619571194e-06, "loss": 0.11162109375, "step": 112215 }, { "epoch": 0.9703331575170124, "grad_norm": 21.211778711587037, "learning_rate": 3.139751107897863e-06, "loss": 0.05276813507080078, "step": 112220 }, { "epoch": 0.9703763910385557, "grad_norm": 8.490290454926354, "learning_rate": 3.139547595580013e-06, "loss": 0.1382232666015625, "step": 112225 }, { "epoch": 0.9704196245600989, "grad_norm": 8.660778051325881, "learning_rate": 3.1393440826185816e-06, "loss": 0.115118408203125, "step": 112230 }, { "epoch": 0.9704628580816422, "grad_norm": 14.554669313561483, "learning_rate": 3.139140569014508e-06, "loss": 0.35175514221191406, "step": 112235 }, { "epoch": 0.9705060916031855, "grad_norm": 38.97510287851103, "learning_rate": 3.1389370547687303e-06, "loss": 0.12252655029296874, "step": 112240 }, { "epoch": 0.9705493251247287, "grad_norm": 9.6693055339356, "learning_rate": 3.138733539882187e-06, "loss": 0.06798782348632812, "step": 112245 }, { "epoch": 0.9705925586462719, "grad_norm": 1.033794332503491, "learning_rate": 3.138530024355817e-06, "loss": 0.17201080322265624, "step": 112250 }, { "epoch": 0.9706357921678153, "grad_norm": 3.1810116169875493, "learning_rate": 3.1383265081905575e-06, "loss": 0.05144195556640625, "step": 112255 }, { "epoch": 0.9706790256893585, "grad_norm": 1.7999014700850495, "learning_rate": 3.13812299138735e-06, "loss": 0.09677734375, "step": 112260 }, { "epoch": 0.9707222592109017, "grad_norm": 1.7664180176355955, "learning_rate": 3.137919473947131e-06, "loss": 0.11680440902709961, "step": 112265 }, { "epoch": 0.9707654927324451, "grad_norm": 6.300396036822842, "learning_rate": 3.1377159558708397e-06, "loss": 0.10479469299316406, "step": 112270 }, { "epoch": 0.9708087262539883, "grad_norm": 1.4352349280744574, "learning_rate": 3.1375124371594147e-06, "loss": 0.07217254638671874, "step": 112275 }, { "epoch": 0.9708519597755315, "grad_norm": 19.56731760971623, "learning_rate": 3.1373089178137937e-06, "loss": 0.11290740966796875, "step": 112280 }, { "epoch": 0.9708951932970749, "grad_norm": 0.4785584603469558, "learning_rate": 3.137105397834916e-06, "loss": 0.1267364501953125, "step": 112285 }, { "epoch": 0.9709384268186181, "grad_norm": 17.476811482966852, "learning_rate": 3.1369018772237216e-06, "loss": 0.12689056396484374, "step": 112290 }, { "epoch": 0.9709816603401613, "grad_norm": 29.09879934349467, "learning_rate": 3.136698355981147e-06, "loss": 0.20855941772460937, "step": 112295 }, { "epoch": 0.9710248938617047, "grad_norm": 3.7726764518953106, "learning_rate": 3.1364948341081323e-06, "loss": 0.02257709503173828, "step": 112300 }, { "epoch": 0.9710681273832479, "grad_norm": 10.500734817044664, "learning_rate": 3.1362913116056153e-06, "loss": 0.04398841857910156, "step": 112305 }, { "epoch": 0.9711113609047911, "grad_norm": 9.979371167001656, "learning_rate": 3.1360877884745338e-06, "loss": 0.4746044158935547, "step": 112310 }, { "epoch": 0.9711545944263344, "grad_norm": 2.4263526874420087, "learning_rate": 3.135884264715828e-06, "loss": 0.16455078125, "step": 112315 }, { "epoch": 0.9711978279478777, "grad_norm": 6.138135695113758, "learning_rate": 3.1356807403304368e-06, "loss": 0.0545623779296875, "step": 112320 }, { "epoch": 0.9712410614694209, "grad_norm": 0.4573485988696741, "learning_rate": 3.135477215319297e-06, "loss": 0.030426788330078124, "step": 112325 }, { "epoch": 0.9712842949909642, "grad_norm": 10.029917235969409, "learning_rate": 3.1352736896833485e-06, "loss": 0.307861328125, "step": 112330 }, { "epoch": 0.9713275285125075, "grad_norm": 1.583411430190494, "learning_rate": 3.13507016342353e-06, "loss": 0.08273773193359375, "step": 112335 }, { "epoch": 0.9713707620340507, "grad_norm": 3.5029106105644012, "learning_rate": 3.1348666365407792e-06, "loss": 0.07546234130859375, "step": 112340 }, { "epoch": 0.9714139955555939, "grad_norm": 4.607959913373419, "learning_rate": 3.1346631090360363e-06, "loss": 0.15836620330810547, "step": 112345 }, { "epoch": 0.9714572290771373, "grad_norm": 1.2383417070517395, "learning_rate": 3.1344595809102383e-06, "loss": 0.0877777099609375, "step": 112350 }, { "epoch": 0.9715004625986805, "grad_norm": 1.1446297632538642, "learning_rate": 3.1342560521643253e-06, "loss": 0.0344085693359375, "step": 112355 }, { "epoch": 0.9715436961202237, "grad_norm": 5.731111292275337, "learning_rate": 3.134052522799235e-06, "loss": 0.112066650390625, "step": 112360 }, { "epoch": 0.9715869296417671, "grad_norm": 6.667840541709808, "learning_rate": 3.1338489928159064e-06, "loss": 0.12794570922851561, "step": 112365 }, { "epoch": 0.9716301631633103, "grad_norm": 15.425161700231707, "learning_rate": 3.133645462215277e-06, "loss": 0.22672252655029296, "step": 112370 }, { "epoch": 0.9716733966848535, "grad_norm": 1.8016291630637076, "learning_rate": 3.1334419309982874e-06, "loss": 0.10675773620605469, "step": 112375 }, { "epoch": 0.9717166302063969, "grad_norm": 0.9165758838368898, "learning_rate": 3.133238399165875e-06, "loss": 0.12728271484375, "step": 112380 }, { "epoch": 0.9717598637279401, "grad_norm": 1.0605142398528387, "learning_rate": 3.13303486671898e-06, "loss": 0.2841835021972656, "step": 112385 }, { "epoch": 0.9718030972494833, "grad_norm": 3.751246243348125, "learning_rate": 3.1328313336585393e-06, "loss": 0.26047744750976565, "step": 112390 }, { "epoch": 0.9718463307710267, "grad_norm": 17.21059973742275, "learning_rate": 3.132627799985492e-06, "loss": 0.4910894393920898, "step": 112395 }, { "epoch": 0.9718895642925699, "grad_norm": 2.649082246263759, "learning_rate": 3.1324242657007767e-06, "loss": 0.1866546630859375, "step": 112400 }, { "epoch": 0.9719327978141131, "grad_norm": 0.1389796853535103, "learning_rate": 3.1322207308053334e-06, "loss": 0.02000885009765625, "step": 112405 }, { "epoch": 0.9719760313356565, "grad_norm": 68.12269157682525, "learning_rate": 3.1320171953000987e-06, "loss": 0.29587478637695314, "step": 112410 }, { "epoch": 0.9720192648571997, "grad_norm": 3.6785269741982543, "learning_rate": 3.131813659186013e-06, "loss": 0.06356925964355468, "step": 112415 }, { "epoch": 0.9720624983787429, "grad_norm": 8.2144882237446, "learning_rate": 3.1316101224640147e-06, "loss": 0.1302581787109375, "step": 112420 }, { "epoch": 0.9721057319002862, "grad_norm": 4.335059872814892, "learning_rate": 3.131406585135041e-06, "loss": 0.0595733642578125, "step": 112425 }, { "epoch": 0.9721489654218295, "grad_norm": 20.78220265022481, "learning_rate": 3.131203047200032e-06, "loss": 0.2497316360473633, "step": 112430 }, { "epoch": 0.9721921989433727, "grad_norm": 12.376428479030924, "learning_rate": 3.130999508659927e-06, "loss": 0.07894363403320312, "step": 112435 }, { "epoch": 0.972235432464916, "grad_norm": 28.84571803929129, "learning_rate": 3.1307959695156633e-06, "loss": 0.13666648864746095, "step": 112440 }, { "epoch": 0.9722786659864593, "grad_norm": 5.774999466959954, "learning_rate": 3.1305924297681802e-06, "loss": 0.06717529296875, "step": 112445 }, { "epoch": 0.9723218995080025, "grad_norm": 5.4726339613058235, "learning_rate": 3.1303888894184164e-06, "loss": 0.1015472412109375, "step": 112450 }, { "epoch": 0.9723651330295457, "grad_norm": 14.338801032308742, "learning_rate": 3.1301853484673105e-06, "loss": 0.20790367126464843, "step": 112455 }, { "epoch": 0.9724083665510891, "grad_norm": 11.239584748438478, "learning_rate": 3.1299818069158005e-06, "loss": 0.16177215576171874, "step": 112460 }, { "epoch": 0.9724516000726323, "grad_norm": 14.180611969325716, "learning_rate": 3.1297782647648276e-06, "loss": 0.27901554107666016, "step": 112465 }, { "epoch": 0.9724948335941755, "grad_norm": 18.148694685107206, "learning_rate": 3.1295747220153276e-06, "loss": 0.1513946533203125, "step": 112470 }, { "epoch": 0.9725380671157189, "grad_norm": 4.864213951926714, "learning_rate": 3.1293711786682407e-06, "loss": 0.1021942138671875, "step": 112475 }, { "epoch": 0.9725813006372621, "grad_norm": 7.8336238308754345, "learning_rate": 3.1291676347245047e-06, "loss": 0.056584930419921874, "step": 112480 }, { "epoch": 0.9726245341588053, "grad_norm": 1.7638397522101636, "learning_rate": 3.12896409018506e-06, "loss": 0.20341796875, "step": 112485 }, { "epoch": 0.9726677676803487, "grad_norm": 9.474593276454353, "learning_rate": 3.128760545050844e-06, "loss": 0.03511505126953125, "step": 112490 }, { "epoch": 0.9727110012018919, "grad_norm": 4.817746632550351, "learning_rate": 3.128556999322796e-06, "loss": 0.06170578002929687, "step": 112495 }, { "epoch": 0.9727542347234351, "grad_norm": 2.118174927308956, "learning_rate": 3.1283534530018546e-06, "loss": 0.10417976379394531, "step": 112500 }, { "epoch": 0.9727974682449785, "grad_norm": 37.24076807366444, "learning_rate": 3.1281499060889572e-06, "loss": 0.18379287719726561, "step": 112505 }, { "epoch": 0.9728407017665217, "grad_norm": 0.45489247856667786, "learning_rate": 3.1279463585850448e-06, "loss": 0.30265274047851565, "step": 112510 }, { "epoch": 0.9728839352880649, "grad_norm": 2.8965793489404255, "learning_rate": 3.127742810491055e-06, "loss": 0.20064697265625, "step": 112515 }, { "epoch": 0.9729271688096082, "grad_norm": 7.514834004589916, "learning_rate": 3.127539261807927e-06, "loss": 0.039080047607421876, "step": 112520 }, { "epoch": 0.9729704023311515, "grad_norm": 25.532656637848582, "learning_rate": 3.127335712536598e-06, "loss": 0.0952056884765625, "step": 112525 }, { "epoch": 0.9730136358526947, "grad_norm": 7.21954384055485, "learning_rate": 3.127132162678009e-06, "loss": 0.11132888793945313, "step": 112530 }, { "epoch": 0.973056869374238, "grad_norm": 11.53113099353969, "learning_rate": 3.126928612233097e-06, "loss": 0.10676651000976563, "step": 112535 }, { "epoch": 0.9731001028957813, "grad_norm": 3.3906367155378345, "learning_rate": 3.126725061202802e-06, "loss": 0.07578048706054688, "step": 112540 }, { "epoch": 0.9731433364173245, "grad_norm": 7.367010858078704, "learning_rate": 3.126521509588062e-06, "loss": 0.11638088226318359, "step": 112545 }, { "epoch": 0.9731865699388678, "grad_norm": 1.0234233044931669, "learning_rate": 3.126317957389816e-06, "loss": 0.14570770263671876, "step": 112550 }, { "epoch": 0.9732298034604111, "grad_norm": 5.513547819349191, "learning_rate": 3.1261144046090025e-06, "loss": 0.21710357666015626, "step": 112555 }, { "epoch": 0.9732730369819543, "grad_norm": 1.2505658098271606, "learning_rate": 3.1259108512465616e-06, "loss": 0.36048126220703125, "step": 112560 }, { "epoch": 0.9733162705034976, "grad_norm": 0.2714488120279901, "learning_rate": 3.1257072973034297e-06, "loss": 0.029947662353515626, "step": 112565 }, { "epoch": 0.9733595040250409, "grad_norm": 13.755789369836487, "learning_rate": 3.1255037427805466e-06, "loss": 0.04122314453125, "step": 112570 }, { "epoch": 0.9734027375465841, "grad_norm": 2.8855567984004433, "learning_rate": 3.1253001876788528e-06, "loss": 0.17257080078125, "step": 112575 }, { "epoch": 0.9734459710681274, "grad_norm": 27.944364863567497, "learning_rate": 3.1250966319992847e-06, "loss": 0.12941226959228516, "step": 112580 }, { "epoch": 0.9734892045896707, "grad_norm": 28.008865347292456, "learning_rate": 3.124893075742782e-06, "loss": 0.1352558135986328, "step": 112585 }, { "epoch": 0.9735324381112139, "grad_norm": 5.1104584026127, "learning_rate": 3.1246895189102837e-06, "loss": 0.05038604736328125, "step": 112590 }, { "epoch": 0.9735756716327572, "grad_norm": 10.37294968599793, "learning_rate": 3.1244859615027277e-06, "loss": 0.2496612548828125, "step": 112595 }, { "epoch": 0.9736189051543004, "grad_norm": 0.6456253422131006, "learning_rate": 3.1242824035210545e-06, "loss": 0.08853759765625, "step": 112600 }, { "epoch": 0.9736621386758437, "grad_norm": 22.993450090803567, "learning_rate": 3.124078844966201e-06, "loss": 0.09107408523559571, "step": 112605 }, { "epoch": 0.973705372197387, "grad_norm": 16.07661114990637, "learning_rate": 3.123875285839107e-06, "loss": 0.2722206115722656, "step": 112610 }, { "epoch": 0.9737486057189302, "grad_norm": 0.6405926529353335, "learning_rate": 3.123671726140712e-06, "loss": 0.055517578125, "step": 112615 }, { "epoch": 0.9737918392404735, "grad_norm": 7.234211993386434, "learning_rate": 3.123468165871953e-06, "loss": 0.0474090576171875, "step": 112620 }, { "epoch": 0.9738350727620168, "grad_norm": 0.18555459143090622, "learning_rate": 3.123264605033769e-06, "loss": 0.2745487213134766, "step": 112625 }, { "epoch": 0.97387830628356, "grad_norm": 1.3564551838417205, "learning_rate": 3.123061043627101e-06, "loss": 0.030522489547729494, "step": 112630 }, { "epoch": 0.9739215398051033, "grad_norm": 15.432300797258845, "learning_rate": 3.1228574816528854e-06, "loss": 0.07772636413574219, "step": 112635 }, { "epoch": 0.9739647733266465, "grad_norm": 0.9528505595319247, "learning_rate": 3.1226539191120633e-06, "loss": 0.0460052490234375, "step": 112640 }, { "epoch": 0.9740080068481898, "grad_norm": 0.325837430099371, "learning_rate": 3.1224503560055714e-06, "loss": 0.12710418701171874, "step": 112645 }, { "epoch": 0.9740512403697331, "grad_norm": 59.55292965697087, "learning_rate": 3.1222467923343483e-06, "loss": 0.35936279296875, "step": 112650 }, { "epoch": 0.9740944738912763, "grad_norm": 1.4511076817513449, "learning_rate": 3.1220432280993346e-06, "loss": 0.049249267578125, "step": 112655 }, { "epoch": 0.9741377074128196, "grad_norm": 3.8447043643982206, "learning_rate": 3.1218396633014687e-06, "loss": 0.1094970703125, "step": 112660 }, { "epoch": 0.9741809409343629, "grad_norm": 1.1450999715910828, "learning_rate": 3.1216360979416883e-06, "loss": 0.15178604125976564, "step": 112665 }, { "epoch": 0.9742241744559061, "grad_norm": 28.469532436735367, "learning_rate": 3.1214325320209333e-06, "loss": 0.06567153930664063, "step": 112670 }, { "epoch": 0.9742674079774494, "grad_norm": 2.787086007997781, "learning_rate": 3.1212289655401425e-06, "loss": 0.060797119140625, "step": 112675 }, { "epoch": 0.9743106414989927, "grad_norm": 1.7721360155083525, "learning_rate": 3.121025398500253e-06, "loss": 0.1663848876953125, "step": 112680 }, { "epoch": 0.9743538750205359, "grad_norm": 26.66909657059564, "learning_rate": 3.1208218309022066e-06, "loss": 0.11538238525390625, "step": 112685 }, { "epoch": 0.9743971085420792, "grad_norm": 22.768597896623806, "learning_rate": 3.120618262746941e-06, "loss": 0.1206268310546875, "step": 112690 }, { "epoch": 0.9744403420636224, "grad_norm": 4.142742444096484, "learning_rate": 3.120414694035394e-06, "loss": 0.2339771270751953, "step": 112695 }, { "epoch": 0.9744835755851657, "grad_norm": 6.97260807349374, "learning_rate": 3.1202111247685044e-06, "loss": 0.186614990234375, "step": 112700 }, { "epoch": 0.974526809106709, "grad_norm": 1.0702959565470365, "learning_rate": 3.120007554947212e-06, "loss": 0.1267253875732422, "step": 112705 }, { "epoch": 0.9745700426282522, "grad_norm": 0.5098692083595936, "learning_rate": 3.1198039845724556e-06, "loss": 0.045205307006835935, "step": 112710 }, { "epoch": 0.9746132761497955, "grad_norm": 81.22081124167059, "learning_rate": 3.119600413645174e-06, "loss": 0.3878509521484375, "step": 112715 }, { "epoch": 0.9746565096713388, "grad_norm": 2.279938464623643, "learning_rate": 3.1193968421663055e-06, "loss": 0.08589401245117187, "step": 112720 }, { "epoch": 0.974699743192882, "grad_norm": 6.757385644547692, "learning_rate": 3.1191932701367895e-06, "loss": 0.021672821044921874, "step": 112725 }, { "epoch": 0.9747429767144253, "grad_norm": 35.01263319447369, "learning_rate": 3.1189896975575644e-06, "loss": 0.5070953369140625, "step": 112730 }, { "epoch": 0.9747862102359686, "grad_norm": 0.7056123343383, "learning_rate": 3.118786124429569e-06, "loss": 0.018229293823242187, "step": 112735 }, { "epoch": 0.9748294437575118, "grad_norm": 9.017606300649136, "learning_rate": 3.118582550753744e-06, "loss": 0.594525146484375, "step": 112740 }, { "epoch": 0.9748726772790551, "grad_norm": 1.8409368403965862, "learning_rate": 3.1183789765310253e-06, "loss": 0.24191741943359374, "step": 112745 }, { "epoch": 0.9749159108005984, "grad_norm": 0.8684279188715408, "learning_rate": 3.1181754017623535e-06, "loss": 0.06087303161621094, "step": 112750 }, { "epoch": 0.9749591443221416, "grad_norm": 8.707864856765651, "learning_rate": 3.117971826448668e-06, "loss": 0.08420639038085938, "step": 112755 }, { "epoch": 0.9750023778436849, "grad_norm": 0.9454906632910026, "learning_rate": 3.117768250590906e-06, "loss": 0.009944534301757813, "step": 112760 }, { "epoch": 0.9750456113652282, "grad_norm": 2.7697685832728416, "learning_rate": 3.1175646741900073e-06, "loss": 0.0764404296875, "step": 112765 }, { "epoch": 0.9750888448867714, "grad_norm": 2.835760869697227, "learning_rate": 3.1173610972469108e-06, "loss": 0.34841766357421877, "step": 112770 }, { "epoch": 0.9751320784083146, "grad_norm": 7.070760489845002, "learning_rate": 3.1171575197625555e-06, "loss": 0.071087646484375, "step": 112775 }, { "epoch": 0.975175311929858, "grad_norm": 9.152944741408303, "learning_rate": 3.1169539417378797e-06, "loss": 0.18740692138671874, "step": 112780 }, { "epoch": 0.9752185454514012, "grad_norm": 6.524946099012612, "learning_rate": 3.116750363173823e-06, "loss": 0.06823501586914063, "step": 112785 }, { "epoch": 0.9752617789729444, "grad_norm": 7.79963902042771, "learning_rate": 3.116546784071324e-06, "loss": 0.06704330444335938, "step": 112790 }, { "epoch": 0.9753050124944878, "grad_norm": 10.152721076861141, "learning_rate": 3.116343204431321e-06, "loss": 0.1120870590209961, "step": 112795 }, { "epoch": 0.975348246016031, "grad_norm": 0.9997394849310218, "learning_rate": 3.116139624254754e-06, "loss": 0.061277008056640624, "step": 112800 }, { "epoch": 0.9753914795375742, "grad_norm": 0.9940034116206697, "learning_rate": 3.115936043542561e-06, "loss": 0.06669464111328124, "step": 112805 }, { "epoch": 0.9754347130591176, "grad_norm": 0.061896337565242474, "learning_rate": 3.1157324622956817e-06, "loss": 0.23061676025390626, "step": 112810 }, { "epoch": 0.9754779465806608, "grad_norm": 5.895125308243072, "learning_rate": 3.1155288805150543e-06, "loss": 0.05571060180664063, "step": 112815 }, { "epoch": 0.975521180102204, "grad_norm": 9.28558164586237, "learning_rate": 3.1153252982016177e-06, "loss": 0.032142257690429686, "step": 112820 }, { "epoch": 0.9755644136237474, "grad_norm": 18.514609874675482, "learning_rate": 3.1151217153563105e-06, "loss": 0.16714706420898437, "step": 112825 }, { "epoch": 0.9756076471452906, "grad_norm": 32.88759650243853, "learning_rate": 3.114918131980073e-06, "loss": 0.16970062255859375, "step": 112830 }, { "epoch": 0.9756508806668338, "grad_norm": 0.31082250274873413, "learning_rate": 3.114714548073843e-06, "loss": 0.35335617065429686, "step": 112835 }, { "epoch": 0.9756941141883771, "grad_norm": 24.92721513367181, "learning_rate": 3.114510963638559e-06, "loss": 0.0614105224609375, "step": 112840 }, { "epoch": 0.9757373477099204, "grad_norm": 0.262937985172027, "learning_rate": 3.1143073786751618e-06, "loss": 0.24828948974609374, "step": 112845 }, { "epoch": 0.9757805812314636, "grad_norm": 1.1490904646207547, "learning_rate": 3.114103793184588e-06, "loss": 0.11528701782226562, "step": 112850 }, { "epoch": 0.975823814753007, "grad_norm": 1.2324483382313287, "learning_rate": 3.1139002071677777e-06, "loss": 0.305902099609375, "step": 112855 }, { "epoch": 0.9758670482745502, "grad_norm": 2.5247406903557823, "learning_rate": 3.1136966206256706e-06, "loss": 0.1205078125, "step": 112860 }, { "epoch": 0.9759102817960934, "grad_norm": 0.19202359635065863, "learning_rate": 3.113493033559204e-06, "loss": 0.2832185745239258, "step": 112865 }, { "epoch": 0.9759535153176366, "grad_norm": 18.515079037712812, "learning_rate": 3.1132894459693183e-06, "loss": 0.111376953125, "step": 112870 }, { "epoch": 0.97599674883918, "grad_norm": 15.138205120519908, "learning_rate": 3.11308585785695e-06, "loss": 0.1045654296875, "step": 112875 }, { "epoch": 0.9760399823607232, "grad_norm": 2.522069316318466, "learning_rate": 3.112882269223041e-06, "loss": 0.08606719970703125, "step": 112880 }, { "epoch": 0.9760832158822664, "grad_norm": 2.9963447405898402, "learning_rate": 3.112678680068529e-06, "loss": 0.09769611358642578, "step": 112885 }, { "epoch": 0.9761264494038098, "grad_norm": 6.96809370809019, "learning_rate": 3.112475090394352e-06, "loss": 0.05949592590332031, "step": 112890 }, { "epoch": 0.976169682925353, "grad_norm": 0.8681953179315137, "learning_rate": 3.1122715002014513e-06, "loss": 0.0895355224609375, "step": 112895 }, { "epoch": 0.9762129164468962, "grad_norm": 30.121700405223503, "learning_rate": 3.112067909490763e-06, "loss": 0.41564407348632815, "step": 112900 }, { "epoch": 0.9762561499684396, "grad_norm": 8.882159662439612, "learning_rate": 3.111864318263227e-06, "loss": 0.08969955444335938, "step": 112905 }, { "epoch": 0.9762993834899828, "grad_norm": 16.31794416492043, "learning_rate": 3.1116607265197837e-06, "loss": 0.3362846374511719, "step": 112910 }, { "epoch": 0.976342617011526, "grad_norm": 41.77886904817739, "learning_rate": 3.111457134261372e-06, "loss": 0.22561111450195312, "step": 112915 }, { "epoch": 0.9763858505330694, "grad_norm": 1.0594446619158444, "learning_rate": 3.1112535414889274e-06, "loss": 0.0560302734375, "step": 112920 }, { "epoch": 0.9764290840546126, "grad_norm": 1.925274068729068, "learning_rate": 3.1110499482033932e-06, "loss": 0.2324737548828125, "step": 112925 }, { "epoch": 0.9764723175761558, "grad_norm": 0.6259485786826933, "learning_rate": 3.1108463544057056e-06, "loss": 0.1926250457763672, "step": 112930 }, { "epoch": 0.9765155510976992, "grad_norm": 23.537920965616802, "learning_rate": 3.1106427600968045e-06, "loss": 0.07804107666015625, "step": 112935 }, { "epoch": 0.9765587846192424, "grad_norm": 10.601173181272516, "learning_rate": 3.110439165277629e-06, "loss": 0.14169769287109374, "step": 112940 }, { "epoch": 0.9766020181407856, "grad_norm": 0.5369221053975533, "learning_rate": 3.1102355699491183e-06, "loss": 0.31604576110839844, "step": 112945 }, { "epoch": 0.9766452516623289, "grad_norm": 2.8201834723484978, "learning_rate": 3.1100319741122107e-06, "loss": 0.1348876953125, "step": 112950 }, { "epoch": 0.9766884851838722, "grad_norm": 0.9194043488602999, "learning_rate": 3.109828377767844e-06, "loss": 0.032213592529296876, "step": 112955 }, { "epoch": 0.9767317187054154, "grad_norm": 1.4988146343322484, "learning_rate": 3.1096247809169597e-06, "loss": 0.16764068603515625, "step": 112960 }, { "epoch": 0.9767749522269586, "grad_norm": 12.21415474427918, "learning_rate": 3.109421183560496e-06, "loss": 0.0635711669921875, "step": 112965 }, { "epoch": 0.976818185748502, "grad_norm": 1.4562870654670077, "learning_rate": 3.10921758569939e-06, "loss": 0.0445556640625, "step": 112970 }, { "epoch": 0.9768614192700452, "grad_norm": 25.485671348777032, "learning_rate": 3.1090139873345833e-06, "loss": 0.1498645782470703, "step": 112975 }, { "epoch": 0.9769046527915884, "grad_norm": 0.6054656425495659, "learning_rate": 3.108810388467014e-06, "loss": 0.065850830078125, "step": 112980 }, { "epoch": 0.9769478863131318, "grad_norm": 31.010641649534644, "learning_rate": 3.1086067890976194e-06, "loss": 0.19759731292724608, "step": 112985 }, { "epoch": 0.976991119834675, "grad_norm": 9.091475682955803, "learning_rate": 3.108403189227341e-06, "loss": 0.16340179443359376, "step": 112990 }, { "epoch": 0.9770343533562182, "grad_norm": 30.51277408907867, "learning_rate": 3.1081995888571164e-06, "loss": 0.1119964599609375, "step": 112995 }, { "epoch": 0.9770775868777616, "grad_norm": 0.3109150716820502, "learning_rate": 3.1079959879878845e-06, "loss": 0.32938995361328127, "step": 113000 }, { "epoch": 0.9771208203993048, "grad_norm": 10.746986093027354, "learning_rate": 3.1077923866205853e-06, "loss": 0.08715581893920898, "step": 113005 }, { "epoch": 0.977164053920848, "grad_norm": 11.978503009122026, "learning_rate": 3.1075887847561574e-06, "loss": 0.09001007080078124, "step": 113010 }, { "epoch": 0.9772072874423914, "grad_norm": 0.2830641184731976, "learning_rate": 3.107385182395539e-06, "loss": 0.18862457275390626, "step": 113015 }, { "epoch": 0.9772505209639346, "grad_norm": 1.638249883840251, "learning_rate": 3.1071815795396687e-06, "loss": 0.3741172790527344, "step": 113020 }, { "epoch": 0.9772937544854778, "grad_norm": 1.7704012073534505, "learning_rate": 3.106977976189488e-06, "loss": 0.0498016357421875, "step": 113025 }, { "epoch": 0.9773369880070212, "grad_norm": 1.031880343576725, "learning_rate": 3.1067743723459328e-06, "loss": 0.09179954528808594, "step": 113030 }, { "epoch": 0.9773802215285644, "grad_norm": 2.6202322645485197, "learning_rate": 3.1065707680099453e-06, "loss": 0.06650676727294921, "step": 113035 }, { "epoch": 0.9774234550501076, "grad_norm": 2.527275017137076, "learning_rate": 3.106367163182462e-06, "loss": 0.081707763671875, "step": 113040 }, { "epoch": 0.9774666885716509, "grad_norm": 0.5345838761231546, "learning_rate": 3.1061635578644224e-06, "loss": 0.03937606811523438, "step": 113045 }, { "epoch": 0.9775099220931942, "grad_norm": 6.246740444301408, "learning_rate": 3.105959952056766e-06, "loss": 0.3409404754638672, "step": 113050 }, { "epoch": 0.9775531556147374, "grad_norm": 9.687536389318229, "learning_rate": 3.1057563457604318e-06, "loss": 0.1642822265625, "step": 113055 }, { "epoch": 0.9775963891362807, "grad_norm": 27.127996045312024, "learning_rate": 3.105552738976359e-06, "loss": 0.14366531372070312, "step": 113060 }, { "epoch": 0.977639622657824, "grad_norm": 9.280745428064634, "learning_rate": 3.105349131705486e-06, "loss": 0.09394378662109375, "step": 113065 }, { "epoch": 0.9776828561793672, "grad_norm": 2.866402205740052, "learning_rate": 3.1051455239487525e-06, "loss": 0.09051742553710937, "step": 113070 }, { "epoch": 0.9777260897009105, "grad_norm": 0.6008258778747575, "learning_rate": 3.1049419157070966e-06, "loss": 0.2017059326171875, "step": 113075 }, { "epoch": 0.9777693232224538, "grad_norm": 6.559212810442241, "learning_rate": 3.1047383069814574e-06, "loss": 0.290869140625, "step": 113080 }, { "epoch": 0.977812556743997, "grad_norm": 3.527397884719405, "learning_rate": 3.1045346977727758e-06, "loss": 0.1387847900390625, "step": 113085 }, { "epoch": 0.9778557902655403, "grad_norm": 1.8169907335111193, "learning_rate": 3.1043310880819887e-06, "loss": 0.3062774658203125, "step": 113090 }, { "epoch": 0.9778990237870836, "grad_norm": 30.462555470707244, "learning_rate": 3.1041274779100354e-06, "loss": 0.28067779541015625, "step": 113095 }, { "epoch": 0.9779422573086268, "grad_norm": 2.6083922916123505, "learning_rate": 3.1039238672578562e-06, "loss": 0.07238597869873047, "step": 113100 }, { "epoch": 0.9779854908301701, "grad_norm": 5.245717667095015, "learning_rate": 3.103720256126388e-06, "loss": 0.054308319091796876, "step": 113105 }, { "epoch": 0.9780287243517134, "grad_norm": 1.7933500137959308, "learning_rate": 3.103516644516572e-06, "loss": 0.09646759033203126, "step": 113110 }, { "epoch": 0.9780719578732566, "grad_norm": 11.325850668263744, "learning_rate": 3.103313032429347e-06, "loss": 0.4029632568359375, "step": 113115 }, { "epoch": 0.9781151913947999, "grad_norm": 2.8735390527937295, "learning_rate": 3.1031094198656507e-06, "loss": 0.137274169921875, "step": 113120 }, { "epoch": 0.9781584249163431, "grad_norm": 0.7686509346836582, "learning_rate": 3.102905806826423e-06, "loss": 0.042218017578125, "step": 113125 }, { "epoch": 0.9782016584378864, "grad_norm": 0.8580329283212756, "learning_rate": 3.1027021933126024e-06, "loss": 0.07282524108886719, "step": 113130 }, { "epoch": 0.9782448919594297, "grad_norm": 13.195437183471672, "learning_rate": 3.102498579325129e-06, "loss": 0.05363540649414063, "step": 113135 }, { "epoch": 0.9782881254809729, "grad_norm": 1.5412565239163192, "learning_rate": 3.102294964864941e-06, "loss": 0.17107086181640624, "step": 113140 }, { "epoch": 0.9783313590025162, "grad_norm": 1.3261313552413865, "learning_rate": 3.1020913499329773e-06, "loss": 0.1138427734375, "step": 113145 }, { "epoch": 0.9783745925240594, "grad_norm": 28.57709820153421, "learning_rate": 3.1018877345301783e-06, "loss": 0.2865631103515625, "step": 113150 }, { "epoch": 0.9784178260456027, "grad_norm": 2.0323152658894585, "learning_rate": 3.101684118657481e-06, "loss": 0.036734390258789065, "step": 113155 }, { "epoch": 0.978461059567146, "grad_norm": 11.858459136207857, "learning_rate": 3.1014805023158256e-06, "loss": 0.1346282958984375, "step": 113160 }, { "epoch": 0.9785042930886892, "grad_norm": 1.3163207043120366, "learning_rate": 3.101276885506152e-06, "loss": 0.083404541015625, "step": 113165 }, { "epoch": 0.9785475266102325, "grad_norm": 21.970424589812882, "learning_rate": 3.101073268229398e-06, "loss": 0.14618988037109376, "step": 113170 }, { "epoch": 0.9785907601317758, "grad_norm": 7.470688332769162, "learning_rate": 3.1008696504865027e-06, "loss": 0.3125419616699219, "step": 113175 }, { "epoch": 0.978633993653319, "grad_norm": 15.339533032007365, "learning_rate": 3.1006660322784056e-06, "loss": 0.0590606689453125, "step": 113180 }, { "epoch": 0.9786772271748623, "grad_norm": 1.2656214466995745, "learning_rate": 3.100462413606045e-06, "loss": 0.2509613037109375, "step": 113185 }, { "epoch": 0.9787204606964056, "grad_norm": 0.4727500339249947, "learning_rate": 3.1002587944703612e-06, "loss": 0.03137359619140625, "step": 113190 }, { "epoch": 0.9787636942179488, "grad_norm": 0.6363657999715902, "learning_rate": 3.1000551748722936e-06, "loss": 0.050537109375, "step": 113195 }, { "epoch": 0.9788069277394921, "grad_norm": 7.386102897410646, "learning_rate": 3.0998515548127793e-06, "loss": 0.14073581695556642, "step": 113200 }, { "epoch": 0.9788501612610353, "grad_norm": 10.904101099243592, "learning_rate": 3.0996479342927587e-06, "loss": 0.1891021728515625, "step": 113205 }, { "epoch": 0.9788933947825786, "grad_norm": 18.30566382775224, "learning_rate": 3.0994443133131706e-06, "loss": 0.05513687133789062, "step": 113210 }, { "epoch": 0.9789366283041219, "grad_norm": 5.596948622087882, "learning_rate": 3.099240691874954e-06, "loss": 0.1907217025756836, "step": 113215 }, { "epoch": 0.9789798618256651, "grad_norm": 9.385464458380266, "learning_rate": 3.099037069979048e-06, "loss": 0.1743255615234375, "step": 113220 }, { "epoch": 0.9790230953472084, "grad_norm": 0.2509270189011642, "learning_rate": 3.0988334476263916e-06, "loss": 0.04868621826171875, "step": 113225 }, { "epoch": 0.9790663288687517, "grad_norm": 5.23091653067733, "learning_rate": 3.098629824817925e-06, "loss": 0.05811004638671875, "step": 113230 }, { "epoch": 0.9791095623902949, "grad_norm": 23.50609197416519, "learning_rate": 3.0984262015545867e-06, "loss": 0.08865966796875, "step": 113235 }, { "epoch": 0.9791527959118382, "grad_norm": 0.1949738452390753, "learning_rate": 3.0982225778373144e-06, "loss": 0.0435699462890625, "step": 113240 }, { "epoch": 0.9791960294333815, "grad_norm": 11.766167694007242, "learning_rate": 3.0980189536670476e-06, "loss": 0.21567840576171876, "step": 113245 }, { "epoch": 0.9792392629549247, "grad_norm": 15.26127225699314, "learning_rate": 3.0978153290447274e-06, "loss": 0.04685134887695312, "step": 113250 }, { "epoch": 0.979282496476468, "grad_norm": 2.94803528661919, "learning_rate": 3.097611703971291e-06, "loss": 0.1643035888671875, "step": 113255 }, { "epoch": 0.9793257299980113, "grad_norm": 6.320143458628221, "learning_rate": 3.097408078447678e-06, "loss": 0.038860321044921875, "step": 113260 }, { "epoch": 0.9793689635195545, "grad_norm": 41.330303139328116, "learning_rate": 3.0972044524748283e-06, "loss": 0.17049179077148438, "step": 113265 }, { "epoch": 0.9794121970410978, "grad_norm": 6.839074866206, "learning_rate": 3.097000826053679e-06, "loss": 0.0667510986328125, "step": 113270 }, { "epoch": 0.9794554305626411, "grad_norm": 55.579801569995205, "learning_rate": 3.0967971991851706e-06, "loss": 0.22034683227539062, "step": 113275 }, { "epoch": 0.9794986640841843, "grad_norm": 14.710065536016879, "learning_rate": 3.096593571870243e-06, "loss": 0.18559341430664061, "step": 113280 }, { "epoch": 0.9795418976057276, "grad_norm": 6.0514633944298035, "learning_rate": 3.0963899441098336e-06, "loss": 0.11884613037109375, "step": 113285 }, { "epoch": 0.9795851311272709, "grad_norm": 11.39598254867336, "learning_rate": 3.0961863159048833e-06, "loss": 0.11727218627929688, "step": 113290 }, { "epoch": 0.9796283646488141, "grad_norm": 2.1040209743336815, "learning_rate": 3.0959826872563296e-06, "loss": 0.085760498046875, "step": 113295 }, { "epoch": 0.9796715981703573, "grad_norm": 20.80806912320949, "learning_rate": 3.0957790581651114e-06, "loss": 0.13826751708984375, "step": 113300 }, { "epoch": 0.9797148316919007, "grad_norm": 22.286146680456444, "learning_rate": 3.095575428632169e-06, "loss": 0.19305267333984374, "step": 113305 }, { "epoch": 0.9797580652134439, "grad_norm": 14.928434807197954, "learning_rate": 3.0953717986584424e-06, "loss": 0.046990966796875, "step": 113310 }, { "epoch": 0.9798012987349871, "grad_norm": 9.606995373114277, "learning_rate": 3.0951681682448685e-06, "loss": 0.1168426513671875, "step": 113315 }, { "epoch": 0.9798445322565305, "grad_norm": 27.1725193580534, "learning_rate": 3.094964537392387e-06, "loss": 0.23846435546875, "step": 113320 }, { "epoch": 0.9798877657780737, "grad_norm": 2.632453223449269, "learning_rate": 3.094760906101938e-06, "loss": 0.09092864990234376, "step": 113325 }, { "epoch": 0.9799309992996169, "grad_norm": 44.353553264582395, "learning_rate": 3.0945572743744608e-06, "loss": 0.4672401428222656, "step": 113330 }, { "epoch": 0.9799742328211603, "grad_norm": 9.43193655083499, "learning_rate": 3.0943536422108923e-06, "loss": 0.09795761108398438, "step": 113335 }, { "epoch": 0.9800174663427035, "grad_norm": 2.8172669810171476, "learning_rate": 3.0941500096121746e-06, "loss": 0.097210693359375, "step": 113340 }, { "epoch": 0.9800606998642467, "grad_norm": 2.8612323788632517, "learning_rate": 3.0939463765792446e-06, "loss": 0.10818977355957031, "step": 113345 }, { "epoch": 0.98010393338579, "grad_norm": 38.88606630044984, "learning_rate": 3.093742743113042e-06, "loss": 0.16036376953125, "step": 113350 }, { "epoch": 0.9801471669073333, "grad_norm": 5.634635079591732, "learning_rate": 3.093539109214507e-06, "loss": 0.31761932373046875, "step": 113355 }, { "epoch": 0.9801904004288765, "grad_norm": 0.14105519954468063, "learning_rate": 3.093335474884577e-06, "loss": 0.18881263732910156, "step": 113360 }, { "epoch": 0.9802336339504198, "grad_norm": 0.36723941591885284, "learning_rate": 3.0931318401241924e-06, "loss": 0.10246429443359376, "step": 113365 }, { "epoch": 0.9802768674719631, "grad_norm": 3.4231142021699195, "learning_rate": 3.092928204934292e-06, "loss": 0.29431304931640623, "step": 113370 }, { "epoch": 0.9803201009935063, "grad_norm": 0.02066100580073109, "learning_rate": 3.092724569315815e-06, "loss": 0.055690956115722653, "step": 113375 }, { "epoch": 0.9803633345150495, "grad_norm": 11.265925282994152, "learning_rate": 3.0925209332697e-06, "loss": 0.04344635009765625, "step": 113380 }, { "epoch": 0.9804065680365929, "grad_norm": 6.09290670549124, "learning_rate": 3.0923172967968866e-06, "loss": 0.08572158813476563, "step": 113385 }, { "epoch": 0.9804498015581361, "grad_norm": 1.060767153577919, "learning_rate": 3.092113659898315e-06, "loss": 0.03643798828125, "step": 113390 }, { "epoch": 0.9804930350796793, "grad_norm": 41.53288694187843, "learning_rate": 3.0919100225749233e-06, "loss": 0.22684555053710936, "step": 113395 }, { "epoch": 0.9805362686012227, "grad_norm": 17.6940483547782, "learning_rate": 3.0917063848276497e-06, "loss": 0.13233184814453125, "step": 113400 }, { "epoch": 0.9805795021227659, "grad_norm": 2.477169401911379, "learning_rate": 3.0915027466574348e-06, "loss": 0.16849517822265625, "step": 113405 }, { "epoch": 0.9806227356443091, "grad_norm": 1.2582437494497958, "learning_rate": 3.0912991080652174e-06, "loss": 0.134375, "step": 113410 }, { "epoch": 0.9806659691658525, "grad_norm": 16.19294475440778, "learning_rate": 3.091095469051936e-06, "loss": 0.1268869400024414, "step": 113415 }, { "epoch": 0.9807092026873957, "grad_norm": 17.255360424247275, "learning_rate": 3.0908918296185312e-06, "loss": 0.1181243896484375, "step": 113420 }, { "epoch": 0.9807524362089389, "grad_norm": 6.007877327455367, "learning_rate": 3.0906881897659416e-06, "loss": 0.08400726318359375, "step": 113425 }, { "epoch": 0.9807956697304823, "grad_norm": 16.197528455235776, "learning_rate": 3.090484549495105e-06, "loss": 0.35376739501953125, "step": 113430 }, { "epoch": 0.9808389032520255, "grad_norm": 2.9715287040596383, "learning_rate": 3.0902809088069624e-06, "loss": 0.6183753967285156, "step": 113435 }, { "epoch": 0.9808821367735687, "grad_norm": 11.121040781512296, "learning_rate": 3.0900772677024513e-06, "loss": 0.11452178955078125, "step": 113440 }, { "epoch": 0.9809253702951121, "grad_norm": 1.5093972328386613, "learning_rate": 3.0898736261825124e-06, "loss": 0.03737564086914062, "step": 113445 }, { "epoch": 0.9809686038166553, "grad_norm": 10.153569071086057, "learning_rate": 3.0896699842480843e-06, "loss": 0.08190345764160156, "step": 113450 }, { "epoch": 0.9810118373381985, "grad_norm": 1.0152588714156556, "learning_rate": 3.0894663419001063e-06, "loss": 0.0566558837890625, "step": 113455 }, { "epoch": 0.9810550708597419, "grad_norm": 26.236589222904037, "learning_rate": 3.089262699139517e-06, "loss": 0.16392250061035157, "step": 113460 }, { "epoch": 0.9810983043812851, "grad_norm": 1.8275861851554585, "learning_rate": 3.0890590559672566e-06, "loss": 0.09498062133789062, "step": 113465 }, { "epoch": 0.9811415379028283, "grad_norm": 63.236723759389115, "learning_rate": 3.0888554123842635e-06, "loss": 0.4109954833984375, "step": 113470 }, { "epoch": 0.9811847714243715, "grad_norm": 22.99126391029818, "learning_rate": 3.088651768391477e-06, "loss": 0.17913036346435546, "step": 113475 }, { "epoch": 0.9812280049459149, "grad_norm": 48.24878370043301, "learning_rate": 3.088448123989836e-06, "loss": 0.19270553588867187, "step": 113480 }, { "epoch": 0.9812712384674581, "grad_norm": 0.6788680268659988, "learning_rate": 3.0882444791802812e-06, "loss": 0.02631988525390625, "step": 113485 }, { "epoch": 0.9813144719890013, "grad_norm": 10.169628737774511, "learning_rate": 3.08804083396375e-06, "loss": 0.05835113525390625, "step": 113490 }, { "epoch": 0.9813577055105447, "grad_norm": 9.608440058460273, "learning_rate": 3.087837188341182e-06, "loss": 0.09548873901367187, "step": 113495 }, { "epoch": 0.9814009390320879, "grad_norm": 3.2176637764568516, "learning_rate": 3.0876335423135163e-06, "loss": 0.06082706451416016, "step": 113500 }, { "epoch": 0.9814441725536311, "grad_norm": 8.33755125458627, "learning_rate": 3.0874298958816935e-06, "loss": 0.053765869140625, "step": 113505 }, { "epoch": 0.9814874060751745, "grad_norm": 3.456105435520941, "learning_rate": 3.087226249046651e-06, "loss": 0.030917930603027343, "step": 113510 }, { "epoch": 0.9815306395967177, "grad_norm": 15.556121917728841, "learning_rate": 3.0870226018093294e-06, "loss": 0.212591552734375, "step": 113515 }, { "epoch": 0.9815738731182609, "grad_norm": 8.762299296438085, "learning_rate": 3.0868189541706667e-06, "loss": 0.23563232421875, "step": 113520 }, { "epoch": 0.9816171066398043, "grad_norm": 18.692987667275343, "learning_rate": 3.086615306131602e-06, "loss": 0.0998138427734375, "step": 113525 }, { "epoch": 0.9816603401613475, "grad_norm": 1.4861359786103008, "learning_rate": 3.0864116576930763e-06, "loss": 0.06045989990234375, "step": 113530 }, { "epoch": 0.9817035736828907, "grad_norm": 6.094023965393822, "learning_rate": 3.0862080088560276e-06, "loss": 0.108270263671875, "step": 113535 }, { "epoch": 0.9817468072044341, "grad_norm": 1.7799553443559484, "learning_rate": 3.0860043596213944e-06, "loss": 0.15811004638671874, "step": 113540 }, { "epoch": 0.9817900407259773, "grad_norm": 0.09145428960250566, "learning_rate": 3.0858007099901172e-06, "loss": 0.19831619262695313, "step": 113545 }, { "epoch": 0.9818332742475205, "grad_norm": 29.064759253006564, "learning_rate": 3.0855970599631347e-06, "loss": 0.24008750915527344, "step": 113550 }, { "epoch": 0.9818765077690638, "grad_norm": 5.018414169676406, "learning_rate": 3.0853934095413865e-06, "loss": 0.029775238037109374, "step": 113555 }, { "epoch": 0.9819197412906071, "grad_norm": 52.43495957386796, "learning_rate": 3.0851897587258107e-06, "loss": 0.0793243408203125, "step": 113560 }, { "epoch": 0.9819629748121503, "grad_norm": 16.319719424646074, "learning_rate": 3.0849861075173476e-06, "loss": 0.15460433959960937, "step": 113565 }, { "epoch": 0.9820062083336936, "grad_norm": 1.724038650286398, "learning_rate": 3.0847824559169365e-06, "loss": 0.085748291015625, "step": 113570 }, { "epoch": 0.9820494418552369, "grad_norm": 3.681294385408634, "learning_rate": 3.084578803925515e-06, "loss": 0.08235893249511719, "step": 113575 }, { "epoch": 0.9820926753767801, "grad_norm": 2.670186179084016, "learning_rate": 3.084375151544024e-06, "loss": 0.055147552490234376, "step": 113580 }, { "epoch": 0.9821359088983234, "grad_norm": 54.25770674445339, "learning_rate": 3.084171498773403e-06, "loss": 0.6874923706054688, "step": 113585 }, { "epoch": 0.9821791424198667, "grad_norm": 12.361355220147802, "learning_rate": 3.08396784561459e-06, "loss": 0.32918624877929686, "step": 113590 }, { "epoch": 0.9822223759414099, "grad_norm": 3.1369324271515047, "learning_rate": 3.0837641920685247e-06, "loss": 0.20720367431640624, "step": 113595 }, { "epoch": 0.9822656094629532, "grad_norm": 2.780320090839978, "learning_rate": 3.0835605381361468e-06, "loss": 0.12471694946289062, "step": 113600 }, { "epoch": 0.9823088429844965, "grad_norm": 0.15712995194870383, "learning_rate": 3.0833568838183947e-06, "loss": 0.059468841552734374, "step": 113605 }, { "epoch": 0.9823520765060397, "grad_norm": 5.191442377419283, "learning_rate": 3.083153229116208e-06, "loss": 0.09957733154296874, "step": 113610 }, { "epoch": 0.982395310027583, "grad_norm": 3.832462405308464, "learning_rate": 3.0829495740305256e-06, "loss": 0.048410797119140626, "step": 113615 }, { "epoch": 0.9824385435491263, "grad_norm": 7.303713897580179, "learning_rate": 3.0827459185622876e-06, "loss": 0.16457748413085938, "step": 113620 }, { "epoch": 0.9824817770706695, "grad_norm": 0.9715444642001076, "learning_rate": 3.0825422627124325e-06, "loss": 0.05549774169921875, "step": 113625 }, { "epoch": 0.9825250105922128, "grad_norm": 0.8430111643942766, "learning_rate": 3.0823386064819005e-06, "loss": 0.02758636474609375, "step": 113630 }, { "epoch": 0.9825682441137561, "grad_norm": 34.237175952945435, "learning_rate": 3.0821349498716292e-06, "loss": 0.11532135009765625, "step": 113635 }, { "epoch": 0.9826114776352993, "grad_norm": 0.11858171791918459, "learning_rate": 3.0819312928825585e-06, "loss": 0.22184677124023439, "step": 113640 }, { "epoch": 0.9826547111568426, "grad_norm": 51.768549928466214, "learning_rate": 3.081727635515629e-06, "loss": 0.34546289443969724, "step": 113645 }, { "epoch": 0.9826979446783858, "grad_norm": 10.481452917850268, "learning_rate": 3.0815239777717784e-06, "loss": 0.14446563720703126, "step": 113650 }, { "epoch": 0.9827411781999291, "grad_norm": 21.094449655007978, "learning_rate": 3.081320319651946e-06, "loss": 0.17432327270507814, "step": 113655 }, { "epoch": 0.9827844117214724, "grad_norm": 23.736021247316906, "learning_rate": 3.0811166611570723e-06, "loss": 0.15674514770507814, "step": 113660 }, { "epoch": 0.9828276452430156, "grad_norm": 1.0824840528672321, "learning_rate": 3.0809130022880945e-06, "loss": 0.18484649658203126, "step": 113665 }, { "epoch": 0.9828708787645589, "grad_norm": 1.15659694079943, "learning_rate": 3.080709343045954e-06, "loss": 0.11249847412109375, "step": 113670 }, { "epoch": 0.9829141122861021, "grad_norm": 16.817150253980117, "learning_rate": 3.080505683431589e-06, "loss": 0.22796401977539063, "step": 113675 }, { "epoch": 0.9829573458076454, "grad_norm": 4.435109324022531, "learning_rate": 3.0803020234459393e-06, "loss": 0.17384986877441405, "step": 113680 }, { "epoch": 0.9830005793291887, "grad_norm": 13.414865411871286, "learning_rate": 3.080098363089943e-06, "loss": 0.418853759765625, "step": 113685 }, { "epoch": 0.983043812850732, "grad_norm": 0.48815356462725495, "learning_rate": 3.079894702364541e-06, "loss": 0.0185333251953125, "step": 113690 }, { "epoch": 0.9830870463722752, "grad_norm": 31.962948586489464, "learning_rate": 3.079691041270671e-06, "loss": 0.23176422119140624, "step": 113695 }, { "epoch": 0.9831302798938185, "grad_norm": 0.7553125575324102, "learning_rate": 3.079487379809273e-06, "loss": 0.11001968383789062, "step": 113700 }, { "epoch": 0.9831735134153617, "grad_norm": 3.5788990482150536, "learning_rate": 3.079283717981286e-06, "loss": 0.0368011474609375, "step": 113705 }, { "epoch": 0.983216746936905, "grad_norm": 10.744719482881418, "learning_rate": 3.0790800557876505e-06, "loss": 0.03855438232421875, "step": 113710 }, { "epoch": 0.9832599804584483, "grad_norm": 17.038326117622645, "learning_rate": 3.0788763932293038e-06, "loss": 0.20519332885742186, "step": 113715 }, { "epoch": 0.9833032139799915, "grad_norm": 6.91143883376352, "learning_rate": 3.078672730307187e-06, "loss": 0.048413848876953124, "step": 113720 }, { "epoch": 0.9833464475015348, "grad_norm": 1.9103692299677173, "learning_rate": 3.0784690670222375e-06, "loss": 0.037311553955078125, "step": 113725 }, { "epoch": 0.983389681023078, "grad_norm": 6.06839173685411, "learning_rate": 3.0782654033753963e-06, "loss": 0.066015625, "step": 113730 }, { "epoch": 0.9834329145446213, "grad_norm": 11.471237190589262, "learning_rate": 3.0780617393676013e-06, "loss": 0.086395263671875, "step": 113735 }, { "epoch": 0.9834761480661646, "grad_norm": 4.947968967471392, "learning_rate": 3.077858074999793e-06, "loss": 0.22220268249511718, "step": 113740 }, { "epoch": 0.9835193815877078, "grad_norm": 3.0635314506092186, "learning_rate": 3.077654410272911e-06, "loss": 0.12761917114257812, "step": 113745 }, { "epoch": 0.9835626151092511, "grad_norm": 8.507525767809371, "learning_rate": 3.077450745187891e-06, "loss": 0.524871826171875, "step": 113750 }, { "epoch": 0.9836058486307944, "grad_norm": 1.0094476002117558, "learning_rate": 3.0772470797456776e-06, "loss": 0.2828033447265625, "step": 113755 }, { "epoch": 0.9836490821523376, "grad_norm": 8.607662995096565, "learning_rate": 3.0770434139472066e-06, "loss": 0.156573486328125, "step": 113760 }, { "epoch": 0.9836923156738809, "grad_norm": 2.34717420889378, "learning_rate": 3.076839747793418e-06, "loss": 0.06650161743164062, "step": 113765 }, { "epoch": 0.9837355491954242, "grad_norm": 1.4429545682734903, "learning_rate": 3.0766360812852517e-06, "loss": 0.08545455932617188, "step": 113770 }, { "epoch": 0.9837787827169674, "grad_norm": 0.5724874820614917, "learning_rate": 3.076432414423646e-06, "loss": 0.34602785110473633, "step": 113775 }, { "epoch": 0.9838220162385107, "grad_norm": 56.78857801479526, "learning_rate": 3.076228747209541e-06, "loss": 0.427874755859375, "step": 113780 }, { "epoch": 0.983865249760054, "grad_norm": 0.10936671543294836, "learning_rate": 3.0760250796438766e-06, "loss": 0.04091625213623047, "step": 113785 }, { "epoch": 0.9839084832815972, "grad_norm": 13.263817933827431, "learning_rate": 3.075821411727591e-06, "loss": 0.08172149658203125, "step": 113790 }, { "epoch": 0.9839517168031405, "grad_norm": 13.77486573612989, "learning_rate": 3.075617743461623e-06, "loss": 0.036672019958496095, "step": 113795 }, { "epoch": 0.9839949503246838, "grad_norm": 19.470087470334942, "learning_rate": 3.075414074846913e-06, "loss": 0.13894500732421874, "step": 113800 }, { "epoch": 0.984038183846227, "grad_norm": 12.514994071777828, "learning_rate": 3.0752104058844e-06, "loss": 0.04506683349609375, "step": 113805 }, { "epoch": 0.9840814173677703, "grad_norm": 12.843496130451173, "learning_rate": 3.0750067365750224e-06, "loss": 0.11171340942382812, "step": 113810 }, { "epoch": 0.9841246508893136, "grad_norm": 2.1675427807455407, "learning_rate": 3.074803066919721e-06, "loss": 0.14706802368164062, "step": 113815 }, { "epoch": 0.9841678844108568, "grad_norm": 0.7428173030059015, "learning_rate": 3.0745993969194357e-06, "loss": 0.08760528564453125, "step": 113820 }, { "epoch": 0.9842111179324, "grad_norm": 1.2527847087181099, "learning_rate": 3.0743957265751033e-06, "loss": 0.0944814682006836, "step": 113825 }, { "epoch": 0.9842543514539434, "grad_norm": 3.322717894893897, "learning_rate": 3.0741920558876645e-06, "loss": 0.0833892822265625, "step": 113830 }, { "epoch": 0.9842975849754866, "grad_norm": 1.0863196018489765, "learning_rate": 3.0739883848580583e-06, "loss": 0.09117202758789063, "step": 113835 }, { "epoch": 0.9843408184970298, "grad_norm": 3.4574508613798747, "learning_rate": 3.0737847134872244e-06, "loss": 0.08027191162109375, "step": 113840 }, { "epoch": 0.9843840520185732, "grad_norm": 0.28872983359975296, "learning_rate": 3.073581041776102e-06, "loss": 0.12813873291015626, "step": 113845 }, { "epoch": 0.9844272855401164, "grad_norm": 12.794143096657656, "learning_rate": 3.0733773697256303e-06, "loss": 0.12908859252929689, "step": 113850 }, { "epoch": 0.9844705190616596, "grad_norm": 16.573567158481943, "learning_rate": 3.0731736973367492e-06, "loss": 0.09458465576171875, "step": 113855 }, { "epoch": 0.984513752583203, "grad_norm": 0.6929948689334213, "learning_rate": 3.0729700246103962e-06, "loss": 0.1429351806640625, "step": 113860 }, { "epoch": 0.9845569861047462, "grad_norm": 6.175898335640306, "learning_rate": 3.0727663515475124e-06, "loss": 0.3160400390625, "step": 113865 }, { "epoch": 0.9846002196262894, "grad_norm": 0.5381793319804831, "learning_rate": 3.0725626781490375e-06, "loss": 0.0465240478515625, "step": 113870 }, { "epoch": 0.9846434531478327, "grad_norm": 7.275739932053156, "learning_rate": 3.072359004415909e-06, "loss": 0.1289337158203125, "step": 113875 }, { "epoch": 0.984686686669376, "grad_norm": 2.4114702080813464, "learning_rate": 3.0721553303490677e-06, "loss": 0.05478057861328125, "step": 113880 }, { "epoch": 0.9847299201909192, "grad_norm": 1.4650269409978165, "learning_rate": 3.071951655949452e-06, "loss": 0.10737037658691406, "step": 113885 }, { "epoch": 0.9847731537124625, "grad_norm": 6.339496317875203, "learning_rate": 3.0717479812180018e-06, "loss": 0.05306167602539062, "step": 113890 }, { "epoch": 0.9848163872340058, "grad_norm": 40.43928813370783, "learning_rate": 3.071544306155656e-06, "loss": 0.368524169921875, "step": 113895 }, { "epoch": 0.984859620755549, "grad_norm": 1.6880242038768896, "learning_rate": 3.0713406307633545e-06, "loss": 0.13928680419921874, "step": 113900 }, { "epoch": 0.9849028542770922, "grad_norm": 0.05583760380940721, "learning_rate": 3.0711369550420366e-06, "loss": 0.26377410888671876, "step": 113905 }, { "epoch": 0.9849460877986356, "grad_norm": 1.4243407602981817, "learning_rate": 3.0709332789926404e-06, "loss": 0.06943511962890625, "step": 113910 }, { "epoch": 0.9849893213201788, "grad_norm": 0.8736352453068464, "learning_rate": 3.0707296026161073e-06, "loss": 0.043239593505859375, "step": 113915 }, { "epoch": 0.985032554841722, "grad_norm": 7.967505607490444, "learning_rate": 3.0705259259133743e-06, "loss": 0.2726470947265625, "step": 113920 }, { "epoch": 0.9850757883632654, "grad_norm": 0.5567149280768591, "learning_rate": 3.0703222488853822e-06, "loss": 0.1448455810546875, "step": 113925 }, { "epoch": 0.9851190218848086, "grad_norm": 0.4960404314855453, "learning_rate": 3.070118571533071e-06, "loss": 0.1189422607421875, "step": 113930 }, { "epoch": 0.9851622554063518, "grad_norm": 57.044705401173744, "learning_rate": 3.069914893857379e-06, "loss": 0.20055999755859374, "step": 113935 }, { "epoch": 0.9852054889278952, "grad_norm": 16.594914592693513, "learning_rate": 3.069711215859245e-06, "loss": 0.2003772735595703, "step": 113940 }, { "epoch": 0.9852487224494384, "grad_norm": 7.512209214610338, "learning_rate": 3.06950753753961e-06, "loss": 0.3319427490234375, "step": 113945 }, { "epoch": 0.9852919559709816, "grad_norm": 0.4075114124634373, "learning_rate": 3.069303858899411e-06, "loss": 0.23476486206054686, "step": 113950 }, { "epoch": 0.985335189492525, "grad_norm": 3.0193091271099597, "learning_rate": 3.06910017993959e-06, "loss": 0.347576904296875, "step": 113955 }, { "epoch": 0.9853784230140682, "grad_norm": 3.419600212125369, "learning_rate": 3.0688965006610846e-06, "loss": 0.05924568176269531, "step": 113960 }, { "epoch": 0.9854216565356114, "grad_norm": 2.0561674907294507, "learning_rate": 3.068692821064835e-06, "loss": 0.056681060791015626, "step": 113965 }, { "epoch": 0.9854648900571548, "grad_norm": 0.2386544396591173, "learning_rate": 3.0684891411517792e-06, "loss": 0.551019287109375, "step": 113970 }, { "epoch": 0.985508123578698, "grad_norm": 1.799381572278513, "learning_rate": 3.0682854609228578e-06, "loss": 0.179400634765625, "step": 113975 }, { "epoch": 0.9855513571002412, "grad_norm": 0.6741653198003777, "learning_rate": 3.0680817803790107e-06, "loss": 0.2367870330810547, "step": 113980 }, { "epoch": 0.9855945906217846, "grad_norm": 8.48559943711084, "learning_rate": 3.0678780995211753e-06, "loss": 0.18411102294921874, "step": 113985 }, { "epoch": 0.9856378241433278, "grad_norm": 1.416840430577512, "learning_rate": 3.0676744183502935e-06, "loss": 0.19022598266601562, "step": 113990 }, { "epoch": 0.985681057664871, "grad_norm": 1.3646488537597625, "learning_rate": 3.0674707368673024e-06, "loss": 0.25079193115234377, "step": 113995 }, { "epoch": 0.9857242911864142, "grad_norm": 1.4171192955918341, "learning_rate": 3.067267055073142e-06, "loss": 0.131005859375, "step": 114000 }, { "epoch": 0.9857675247079576, "grad_norm": 0.30488901211534825, "learning_rate": 3.067063372968752e-06, "loss": 0.31568222045898436, "step": 114005 }, { "epoch": 0.9858107582295008, "grad_norm": 20.689574867907996, "learning_rate": 3.066859690555072e-06, "loss": 0.08788604736328125, "step": 114010 }, { "epoch": 0.985853991751044, "grad_norm": 5.855714088411563, "learning_rate": 3.0666560078330416e-06, "loss": 0.11479415893554687, "step": 114015 }, { "epoch": 0.9858972252725874, "grad_norm": 15.459313346563782, "learning_rate": 3.066452324803598e-06, "loss": 0.162646484375, "step": 114020 }, { "epoch": 0.9859404587941306, "grad_norm": 0.7732462168723895, "learning_rate": 3.066248641467684e-06, "loss": 0.0559051513671875, "step": 114025 }, { "epoch": 0.9859836923156738, "grad_norm": 6.258044487004343, "learning_rate": 3.066044957826235e-06, "loss": 0.26737632751464846, "step": 114030 }, { "epoch": 0.9860269258372172, "grad_norm": 20.447061344199756, "learning_rate": 3.065841273880194e-06, "loss": 0.13558349609375, "step": 114035 }, { "epoch": 0.9860701593587604, "grad_norm": 9.554373396075661, "learning_rate": 3.0656375896304985e-06, "loss": 0.13272514343261718, "step": 114040 }, { "epoch": 0.9861133928803036, "grad_norm": 2.0359427691602083, "learning_rate": 3.0654339050780888e-06, "loss": 0.2102447509765625, "step": 114045 }, { "epoch": 0.986156626401847, "grad_norm": 0.2923587547532228, "learning_rate": 3.0652302202239024e-06, "loss": 0.06790008544921874, "step": 114050 }, { "epoch": 0.9861998599233902, "grad_norm": 0.14299412599083702, "learning_rate": 3.0650265350688812e-06, "loss": 0.036434173583984375, "step": 114055 }, { "epoch": 0.9862430934449334, "grad_norm": 45.013913603684436, "learning_rate": 3.0648228496139626e-06, "loss": 0.26714439392089845, "step": 114060 }, { "epoch": 0.9862863269664768, "grad_norm": 8.368446767638375, "learning_rate": 3.0646191638600866e-06, "loss": 0.34617252349853517, "step": 114065 }, { "epoch": 0.98632956048802, "grad_norm": 1.7829917273034162, "learning_rate": 3.0644154778081935e-06, "loss": 0.172857666015625, "step": 114070 }, { "epoch": 0.9863727940095632, "grad_norm": 0.35057754524974005, "learning_rate": 3.0642117914592222e-06, "loss": 0.126910400390625, "step": 114075 }, { "epoch": 0.9864160275311065, "grad_norm": 4.040601105325127, "learning_rate": 3.064008104814111e-06, "loss": 0.0715057373046875, "step": 114080 }, { "epoch": 0.9864592610526498, "grad_norm": 7.296368073857016, "learning_rate": 3.0638044178737996e-06, "loss": 0.171234130859375, "step": 114085 }, { "epoch": 0.986502494574193, "grad_norm": 16.873452567317646, "learning_rate": 3.0636007306392288e-06, "loss": 0.1749879837036133, "step": 114090 }, { "epoch": 0.9865457280957363, "grad_norm": 3.05657989541057, "learning_rate": 3.063397043111337e-06, "loss": 0.05895729064941406, "step": 114095 }, { "epoch": 0.9865889616172796, "grad_norm": 1.919766614569061, "learning_rate": 3.063193355291063e-06, "loss": 0.028980255126953125, "step": 114100 }, { "epoch": 0.9866321951388228, "grad_norm": 4.896568794833659, "learning_rate": 3.0629896671793474e-06, "loss": 0.224957275390625, "step": 114105 }, { "epoch": 0.9866754286603661, "grad_norm": 20.77431277153977, "learning_rate": 3.062785978777129e-06, "loss": 0.14117660522460937, "step": 114110 }, { "epoch": 0.9867186621819094, "grad_norm": 1.40894423848233, "learning_rate": 3.062582290085347e-06, "loss": 0.030440521240234376, "step": 114115 }, { "epoch": 0.9867618957034526, "grad_norm": 6.325349982039012, "learning_rate": 3.0623786011049405e-06, "loss": 0.040603256225585936, "step": 114120 }, { "epoch": 0.9868051292249959, "grad_norm": 1.9601073723977964, "learning_rate": 3.06217491183685e-06, "loss": 0.14087142944335937, "step": 114125 }, { "epoch": 0.9868483627465392, "grad_norm": 13.55403651515942, "learning_rate": 3.0619712222820146e-06, "loss": 0.20276145935058593, "step": 114130 }, { "epoch": 0.9868915962680824, "grad_norm": 14.243174175010662, "learning_rate": 3.061767532441373e-06, "loss": 0.22497787475585937, "step": 114135 }, { "epoch": 0.9869348297896257, "grad_norm": 30.629523504595678, "learning_rate": 3.0615638423158653e-06, "loss": 0.09141464233398437, "step": 114140 }, { "epoch": 0.986978063311169, "grad_norm": 2.8104709629838043, "learning_rate": 3.0613601519064303e-06, "loss": 0.05905303955078125, "step": 114145 }, { "epoch": 0.9870212968327122, "grad_norm": 3.4547717014121826, "learning_rate": 3.0611564612140075e-06, "loss": 0.119134521484375, "step": 114150 }, { "epoch": 0.9870645303542555, "grad_norm": 1.041954649446427, "learning_rate": 3.0609527702395367e-06, "loss": 0.17832069396972655, "step": 114155 }, { "epoch": 0.9871077638757988, "grad_norm": 0.4478672910806166, "learning_rate": 3.060749078983957e-06, "loss": 0.011145782470703126, "step": 114160 }, { "epoch": 0.987150997397342, "grad_norm": 16.076008238434444, "learning_rate": 3.0605453874482085e-06, "loss": 0.07681007385253906, "step": 114165 }, { "epoch": 0.9871942309188853, "grad_norm": 0.7743099398766056, "learning_rate": 3.0603416956332294e-06, "loss": 0.018278121948242188, "step": 114170 }, { "epoch": 0.9872374644404285, "grad_norm": 0.8815151957839785, "learning_rate": 3.0601380035399602e-06, "loss": 0.3253143310546875, "step": 114175 }, { "epoch": 0.9872806979619718, "grad_norm": 43.04391701136089, "learning_rate": 3.059934311169339e-06, "loss": 0.248779296875, "step": 114180 }, { "epoch": 0.987323931483515, "grad_norm": 16.217390113011927, "learning_rate": 3.059730618522307e-06, "loss": 0.06188201904296875, "step": 114185 }, { "epoch": 0.9873671650050583, "grad_norm": 0.7611601741683853, "learning_rate": 3.0595269255998025e-06, "loss": 0.0753173828125, "step": 114190 }, { "epoch": 0.9874103985266016, "grad_norm": 0.8685598868042279, "learning_rate": 3.0593232324027647e-06, "loss": 0.03798370361328125, "step": 114195 }, { "epoch": 0.9874536320481448, "grad_norm": 4.3341656610049535, "learning_rate": 3.059119538932133e-06, "loss": 0.1008514404296875, "step": 114200 }, { "epoch": 0.9874968655696881, "grad_norm": 11.167438974370233, "learning_rate": 3.0589158451888488e-06, "loss": 0.1134674072265625, "step": 114205 }, { "epoch": 0.9875400990912314, "grad_norm": 67.17031770004996, "learning_rate": 3.058712151173848e-06, "loss": 0.3464225769042969, "step": 114210 }, { "epoch": 0.9875833326127746, "grad_norm": 13.289510727215788, "learning_rate": 3.0585084568880736e-06, "loss": 0.0895416259765625, "step": 114215 }, { "epoch": 0.9876265661343179, "grad_norm": 21.164398835088, "learning_rate": 3.0583047623324627e-06, "loss": 0.14486236572265626, "step": 114220 }, { "epoch": 0.9876697996558612, "grad_norm": 0.1378371104738897, "learning_rate": 3.0581010675079552e-06, "loss": 0.33060569763183595, "step": 114225 }, { "epoch": 0.9877130331774044, "grad_norm": 9.075469954184424, "learning_rate": 3.0578973724154906e-06, "loss": 0.1537200927734375, "step": 114230 }, { "epoch": 0.9877562666989477, "grad_norm": 6.493011855063489, "learning_rate": 3.0576936770560086e-06, "loss": 0.17044525146484374, "step": 114235 }, { "epoch": 0.987799500220491, "grad_norm": 2.2844516147166845, "learning_rate": 3.0574899814304485e-06, "loss": 0.05509796142578125, "step": 114240 }, { "epoch": 0.9878427337420342, "grad_norm": 7.509828588760094, "learning_rate": 3.0572862855397498e-06, "loss": 0.085308837890625, "step": 114245 }, { "epoch": 0.9878859672635775, "grad_norm": 14.75390250489835, "learning_rate": 3.057082589384852e-06, "loss": 0.09878997802734375, "step": 114250 }, { "epoch": 0.9879292007851207, "grad_norm": 4.232166075921649, "learning_rate": 3.056878892966694e-06, "loss": 0.153851318359375, "step": 114255 }, { "epoch": 0.987972434306664, "grad_norm": 0.021955011860349396, "learning_rate": 3.056675196286215e-06, "loss": 0.10652236938476563, "step": 114260 }, { "epoch": 0.9880156678282073, "grad_norm": 23.098289030069978, "learning_rate": 3.0564714993443565e-06, "loss": 0.2815546035766602, "step": 114265 }, { "epoch": 0.9880589013497505, "grad_norm": 23.744873693172764, "learning_rate": 3.056267802142055e-06, "loss": 0.10201454162597656, "step": 114270 }, { "epoch": 0.9881021348712938, "grad_norm": 6.253237638233143, "learning_rate": 3.056064104680252e-06, "loss": 0.2523681640625, "step": 114275 }, { "epoch": 0.9881453683928371, "grad_norm": 18.622246471167532, "learning_rate": 3.055860406959887e-06, "loss": 0.1009246826171875, "step": 114280 }, { "epoch": 0.9881886019143803, "grad_norm": 0.0732436309691742, "learning_rate": 3.055656708981897e-06, "loss": 0.013238143920898438, "step": 114285 }, { "epoch": 0.9882318354359236, "grad_norm": 0.5763096017233615, "learning_rate": 3.055453010747224e-06, "loss": 0.106390380859375, "step": 114290 }, { "epoch": 0.9882750689574669, "grad_norm": 6.543370844759797, "learning_rate": 3.0552493122568074e-06, "loss": 0.12620086669921876, "step": 114295 }, { "epoch": 0.9883183024790101, "grad_norm": 17.523503036419847, "learning_rate": 3.055045613511585e-06, "loss": 0.24175262451171875, "step": 114300 }, { "epoch": 0.9883615360005534, "grad_norm": 1.368027179672835, "learning_rate": 3.0548419145124975e-06, "loss": 0.3582908630371094, "step": 114305 }, { "epoch": 0.9884047695220967, "grad_norm": 8.769748160713544, "learning_rate": 3.0546382152604836e-06, "loss": 0.0613372802734375, "step": 114310 }, { "epoch": 0.9884480030436399, "grad_norm": 18.579828892544665, "learning_rate": 3.054434515756483e-06, "loss": 0.08806915283203125, "step": 114315 }, { "epoch": 0.9884912365651832, "grad_norm": 3.141795664927369, "learning_rate": 3.054230816001435e-06, "loss": 0.1051065444946289, "step": 114320 }, { "epoch": 0.9885344700867265, "grad_norm": 2.8782590781351844, "learning_rate": 3.0540271159962807e-06, "loss": 0.1919342041015625, "step": 114325 }, { "epoch": 0.9885777036082697, "grad_norm": 37.89202376846093, "learning_rate": 3.053823415741957e-06, "loss": 0.1388824462890625, "step": 114330 }, { "epoch": 0.988620937129813, "grad_norm": 4.368315974119807, "learning_rate": 3.0536197152394045e-06, "loss": 0.03195953369140625, "step": 114335 }, { "epoch": 0.9886641706513563, "grad_norm": 33.16147544038433, "learning_rate": 3.0534160144895627e-06, "loss": 0.24441070556640626, "step": 114340 }, { "epoch": 0.9887074041728995, "grad_norm": 16.606745479866223, "learning_rate": 3.0532123134933704e-06, "loss": 0.1124908447265625, "step": 114345 }, { "epoch": 0.9887506376944427, "grad_norm": 3.8410376149531262, "learning_rate": 3.053008612251769e-06, "loss": 0.19503860473632811, "step": 114350 }, { "epoch": 0.988793871215986, "grad_norm": 0.42396056485104044, "learning_rate": 3.052804910765695e-06, "loss": 0.18733978271484375, "step": 114355 }, { "epoch": 0.9888371047375293, "grad_norm": 26.891781773690138, "learning_rate": 3.052601209036091e-06, "loss": 0.19661598205566405, "step": 114360 }, { "epoch": 0.9888803382590725, "grad_norm": 2.2868776359322935, "learning_rate": 3.052397507063894e-06, "loss": 0.065960693359375, "step": 114365 }, { "epoch": 0.9889235717806159, "grad_norm": 11.45822610718832, "learning_rate": 3.0521938048500435e-06, "loss": 0.2322418212890625, "step": 114370 }, { "epoch": 0.9889668053021591, "grad_norm": 36.54191863352815, "learning_rate": 3.051990102395481e-06, "loss": 0.20747528076171876, "step": 114375 }, { "epoch": 0.9890100388237023, "grad_norm": 12.827079079613126, "learning_rate": 3.0517863997011447e-06, "loss": 0.15323486328125, "step": 114380 }, { "epoch": 0.9890532723452456, "grad_norm": 0.7678260606853788, "learning_rate": 3.051582696767973e-06, "loss": 0.0530853271484375, "step": 114385 }, { "epoch": 0.9890965058667889, "grad_norm": 0.06540488911907633, "learning_rate": 3.051378993596908e-06, "loss": 0.42976837158203124, "step": 114390 }, { "epoch": 0.9891397393883321, "grad_norm": 5.387718328159541, "learning_rate": 3.0511752901888867e-06, "loss": 0.03822021484375, "step": 114395 }, { "epoch": 0.9891829729098754, "grad_norm": 1.9234799946626877, "learning_rate": 3.050971586544849e-06, "loss": 0.09473495483398438, "step": 114400 }, { "epoch": 0.9892262064314187, "grad_norm": 2.0624425604712138, "learning_rate": 3.0507678826657355e-06, "loss": 0.48941497802734374, "step": 114405 }, { "epoch": 0.9892694399529619, "grad_norm": 27.11662382668905, "learning_rate": 3.0505641785524856e-06, "loss": 0.20417327880859376, "step": 114410 }, { "epoch": 0.9893126734745052, "grad_norm": 39.7370806200111, "learning_rate": 3.0503604742060374e-06, "loss": 0.18545989990234374, "step": 114415 }, { "epoch": 0.9893559069960485, "grad_norm": 13.10926636659338, "learning_rate": 3.0501567696273315e-06, "loss": 0.22604217529296874, "step": 114420 }, { "epoch": 0.9893991405175917, "grad_norm": 6.814662934913133, "learning_rate": 3.0499530648173068e-06, "loss": 0.3620635986328125, "step": 114425 }, { "epoch": 0.9894423740391349, "grad_norm": 23.3568877327269, "learning_rate": 3.049749359776903e-06, "loss": 0.3512939453125, "step": 114430 }, { "epoch": 0.9894856075606783, "grad_norm": 30.777614525395126, "learning_rate": 3.0495456545070593e-06, "loss": 0.2811727523803711, "step": 114435 }, { "epoch": 0.9895288410822215, "grad_norm": 42.44521831699633, "learning_rate": 3.049341949008716e-06, "loss": 0.207366943359375, "step": 114440 }, { "epoch": 0.9895720746037647, "grad_norm": 33.99062717701267, "learning_rate": 3.0491382432828125e-06, "loss": 0.18852996826171875, "step": 114445 }, { "epoch": 0.9896153081253081, "grad_norm": 1.4210645205656545, "learning_rate": 3.0489345373302865e-06, "loss": 0.41611328125, "step": 114450 }, { "epoch": 0.9896585416468513, "grad_norm": 5.092346888235715, "learning_rate": 3.0487308311520787e-06, "loss": 0.1650390625, "step": 114455 }, { "epoch": 0.9897017751683945, "grad_norm": 1.9803641870133482, "learning_rate": 3.0485271247491293e-06, "loss": 0.11529617309570313, "step": 114460 }, { "epoch": 0.9897450086899379, "grad_norm": 6.696116350763504, "learning_rate": 3.0483234181223765e-06, "loss": 0.2191802978515625, "step": 114465 }, { "epoch": 0.9897882422114811, "grad_norm": 2.0685546536686714, "learning_rate": 3.0481197112727612e-06, "loss": 0.21596755981445312, "step": 114470 }, { "epoch": 0.9898314757330243, "grad_norm": 3.444698372917628, "learning_rate": 3.0479160042012217e-06, "loss": 0.037586402893066403, "step": 114475 }, { "epoch": 0.9898747092545677, "grad_norm": 0.74799610999262, "learning_rate": 3.0477122969086976e-06, "loss": 0.032065963745117186, "step": 114480 }, { "epoch": 0.9899179427761109, "grad_norm": 21.098754352693792, "learning_rate": 3.0475085893961287e-06, "loss": 0.10758209228515625, "step": 114485 }, { "epoch": 0.9899611762976541, "grad_norm": 10.556595347946635, "learning_rate": 3.047304881664454e-06, "loss": 0.27962646484375, "step": 114490 }, { "epoch": 0.9900044098191975, "grad_norm": 1.9720324561283702, "learning_rate": 3.0471011737146137e-06, "loss": 0.13972625732421876, "step": 114495 }, { "epoch": 0.9900476433407407, "grad_norm": 17.44108654242275, "learning_rate": 3.0468974655475475e-06, "loss": 0.2107574462890625, "step": 114500 }, { "epoch": 0.9900908768622839, "grad_norm": 17.962394303085144, "learning_rate": 3.046693757164194e-06, "loss": 0.14551849365234376, "step": 114505 }, { "epoch": 0.9901341103838273, "grad_norm": 28.303951052655655, "learning_rate": 3.0464900485654926e-06, "loss": 0.2161712646484375, "step": 114510 }, { "epoch": 0.9901773439053705, "grad_norm": 2.228641235075412, "learning_rate": 3.0462863397523834e-06, "loss": 0.04885292053222656, "step": 114515 }, { "epoch": 0.9902205774269137, "grad_norm": 40.929149196158114, "learning_rate": 3.0460826307258063e-06, "loss": 0.2744293212890625, "step": 114520 }, { "epoch": 0.990263810948457, "grad_norm": 0.34261437481507245, "learning_rate": 3.0458789214866996e-06, "loss": 0.03377685546875, "step": 114525 }, { "epoch": 0.9903070444700003, "grad_norm": 0.18847431708604595, "learning_rate": 3.045675212036003e-06, "loss": 0.09259452819824218, "step": 114530 }, { "epoch": 0.9903502779915435, "grad_norm": 6.003262572889806, "learning_rate": 3.0454715023746576e-06, "loss": 0.13561248779296875, "step": 114535 }, { "epoch": 0.9903935115130867, "grad_norm": 0.6341397992014134, "learning_rate": 3.0452677925036e-06, "loss": 0.06952056884765626, "step": 114540 }, { "epoch": 0.9904367450346301, "grad_norm": 18.970225331964972, "learning_rate": 3.045064082423772e-06, "loss": 0.172119140625, "step": 114545 }, { "epoch": 0.9904799785561733, "grad_norm": 4.671691061663101, "learning_rate": 3.0448603721361128e-06, "loss": 0.05798664093017578, "step": 114550 }, { "epoch": 0.9905232120777165, "grad_norm": 5.285772870787945, "learning_rate": 3.044656661641562e-06, "loss": 0.0912200927734375, "step": 114555 }, { "epoch": 0.9905664455992599, "grad_norm": 8.990569275639801, "learning_rate": 3.044452950941057e-06, "loss": 0.172967529296875, "step": 114560 }, { "epoch": 0.9906096791208031, "grad_norm": 26.157914492540772, "learning_rate": 3.044249240035541e-06, "loss": 0.1857147216796875, "step": 114565 }, { "epoch": 0.9906529126423463, "grad_norm": 8.87313171853262, "learning_rate": 3.0440455289259493e-06, "loss": 0.13659210205078126, "step": 114570 }, { "epoch": 0.9906961461638897, "grad_norm": 8.755216544935474, "learning_rate": 3.0438418176132243e-06, "loss": 0.0559600830078125, "step": 114575 }, { "epoch": 0.9907393796854329, "grad_norm": 46.654674315716434, "learning_rate": 3.0436381060983053e-06, "loss": 0.31856765747070315, "step": 114580 }, { "epoch": 0.9907826132069761, "grad_norm": 0.8780623032839789, "learning_rate": 3.043434394382131e-06, "loss": 0.031183624267578126, "step": 114585 }, { "epoch": 0.9908258467285195, "grad_norm": 6.969828297401578, "learning_rate": 3.0432306824656408e-06, "loss": 0.2376007080078125, "step": 114590 }, { "epoch": 0.9908690802500627, "grad_norm": 17.422587865289852, "learning_rate": 3.0430269703497743e-06, "loss": 0.16463623046875, "step": 114595 }, { "epoch": 0.9909123137716059, "grad_norm": 32.10788480178292, "learning_rate": 3.0428232580354714e-06, "loss": 0.12865142822265624, "step": 114600 }, { "epoch": 0.9909555472931492, "grad_norm": 0.8263729370615851, "learning_rate": 3.042619545523672e-06, "loss": 0.07789802551269531, "step": 114605 }, { "epoch": 0.9909987808146925, "grad_norm": 9.018566131817694, "learning_rate": 3.0424158328153142e-06, "loss": 0.05547637939453125, "step": 114610 }, { "epoch": 0.9910420143362357, "grad_norm": 48.090975573124275, "learning_rate": 3.042212119911339e-06, "loss": 0.3650909423828125, "step": 114615 }, { "epoch": 0.991085247857779, "grad_norm": 7.199527501167117, "learning_rate": 3.042008406812685e-06, "loss": 0.1751922607421875, "step": 114620 }, { "epoch": 0.9911284813793223, "grad_norm": 18.381965937559183, "learning_rate": 3.041804693520291e-06, "loss": 0.087225341796875, "step": 114625 }, { "epoch": 0.9911717149008655, "grad_norm": 0.2784896695308265, "learning_rate": 3.0416009800350993e-06, "loss": 0.08599853515625, "step": 114630 }, { "epoch": 0.9912149484224088, "grad_norm": 5.853564547394445, "learning_rate": 3.0413972663580466e-06, "loss": 0.04118671417236328, "step": 114635 }, { "epoch": 0.9912581819439521, "grad_norm": 2.9764137201805174, "learning_rate": 3.041193552490073e-06, "loss": 0.09620399475097656, "step": 114640 }, { "epoch": 0.9913014154654953, "grad_norm": 10.68682859023121, "learning_rate": 3.040989838432119e-06, "loss": 0.2029500961303711, "step": 114645 }, { "epoch": 0.9913446489870386, "grad_norm": 2.3129856535563085, "learning_rate": 3.040786124185123e-06, "loss": 0.10045547485351562, "step": 114650 }, { "epoch": 0.9913878825085819, "grad_norm": 53.5449283596958, "learning_rate": 3.040582409750025e-06, "loss": 0.382568359375, "step": 114655 }, { "epoch": 0.9914311160301251, "grad_norm": 11.463612472654967, "learning_rate": 3.0403786951277652e-06, "loss": 0.1993408203125, "step": 114660 }, { "epoch": 0.9914743495516684, "grad_norm": 59.68742282677593, "learning_rate": 3.0401749803192823e-06, "loss": 0.42737808227539065, "step": 114665 }, { "epoch": 0.9915175830732117, "grad_norm": 17.55208920950106, "learning_rate": 3.039971265325515e-06, "loss": 0.17974853515625, "step": 114670 }, { "epoch": 0.9915608165947549, "grad_norm": 0.435740577653851, "learning_rate": 3.039767550147405e-06, "loss": 0.026369094848632812, "step": 114675 }, { "epoch": 0.9916040501162982, "grad_norm": 3.605063408116942, "learning_rate": 3.0395638347858894e-06, "loss": 0.10190811157226562, "step": 114680 }, { "epoch": 0.9916472836378414, "grad_norm": 0.2044602266642272, "learning_rate": 3.03936011924191e-06, "loss": 0.2857452392578125, "step": 114685 }, { "epoch": 0.9916905171593847, "grad_norm": 24.346164616291418, "learning_rate": 3.0391564035164038e-06, "loss": 0.29339599609375, "step": 114690 }, { "epoch": 0.991733750680928, "grad_norm": 0.042948743963064716, "learning_rate": 3.038952687610313e-06, "loss": 0.03937454223632812, "step": 114695 }, { "epoch": 0.9917769842024712, "grad_norm": 32.77382341267512, "learning_rate": 3.038748971524576e-06, "loss": 0.30222091674804685, "step": 114700 }, { "epoch": 0.9918202177240145, "grad_norm": 15.11280166284759, "learning_rate": 3.038545255260131e-06, "loss": 0.4269418716430664, "step": 114705 }, { "epoch": 0.9918634512455577, "grad_norm": 5.840367898679953, "learning_rate": 3.038341538817919e-06, "loss": 0.05054054260253906, "step": 114710 }, { "epoch": 0.991906684767101, "grad_norm": 71.49738337914337, "learning_rate": 3.03813782219888e-06, "loss": 0.5461700439453125, "step": 114715 }, { "epoch": 0.9919499182886443, "grad_norm": 0.08176619950669221, "learning_rate": 3.0379341054039514e-06, "loss": 0.053089332580566403, "step": 114720 }, { "epoch": 0.9919931518101875, "grad_norm": 2.3487967986701395, "learning_rate": 3.037730388434075e-06, "loss": 0.13270034790039062, "step": 114725 }, { "epoch": 0.9920363853317308, "grad_norm": 12.115693682718035, "learning_rate": 3.0375266712901897e-06, "loss": 0.15117912292480468, "step": 114730 }, { "epoch": 0.9920796188532741, "grad_norm": 2.3999237637370614, "learning_rate": 3.037322953973234e-06, "loss": 0.3774444580078125, "step": 114735 }, { "epoch": 0.9921228523748173, "grad_norm": 24.610566270901277, "learning_rate": 3.0371192364841484e-06, "loss": 0.115216064453125, "step": 114740 }, { "epoch": 0.9921660858963606, "grad_norm": 0.28460478357189684, "learning_rate": 3.036915518823872e-06, "loss": 0.020735931396484376, "step": 114745 }, { "epoch": 0.9922093194179039, "grad_norm": 6.393872473052746, "learning_rate": 3.0367118009933444e-06, "loss": 0.11069564819335938, "step": 114750 }, { "epoch": 0.9922525529394471, "grad_norm": 1.0520884055604085, "learning_rate": 3.036508082993506e-06, "loss": 0.08337020874023438, "step": 114755 }, { "epoch": 0.9922957864609904, "grad_norm": 4.326138467026946, "learning_rate": 3.0363043648252953e-06, "loss": 0.14603271484375, "step": 114760 }, { "epoch": 0.9923390199825337, "grad_norm": 17.25558193159672, "learning_rate": 3.036100646489651e-06, "loss": 0.18170166015625, "step": 114765 }, { "epoch": 0.9923822535040769, "grad_norm": 2.413945239356397, "learning_rate": 3.035896927987514e-06, "loss": 0.03250608444213867, "step": 114770 }, { "epoch": 0.9924254870256202, "grad_norm": 12.442505643529715, "learning_rate": 3.0356932093198246e-06, "loss": 0.1647897720336914, "step": 114775 }, { "epoch": 0.9924687205471634, "grad_norm": 28.078027356902066, "learning_rate": 3.035489490487521e-06, "loss": 0.1170989990234375, "step": 114780 }, { "epoch": 0.9925119540687067, "grad_norm": 3.5367603348040335, "learning_rate": 3.0352857714915427e-06, "loss": 0.0694671630859375, "step": 114785 }, { "epoch": 0.99255518759025, "grad_norm": 17.91033949647662, "learning_rate": 3.03508205233283e-06, "loss": 0.20400543212890626, "step": 114790 }, { "epoch": 0.9925984211117932, "grad_norm": 12.841880293935192, "learning_rate": 3.034878333012321e-06, "loss": 0.32299118041992186, "step": 114795 }, { "epoch": 0.9926416546333365, "grad_norm": 2.1473553362462, "learning_rate": 3.0346746135309567e-06, "loss": 0.12221107482910157, "step": 114800 }, { "epoch": 0.9926848881548798, "grad_norm": 0.7923614697541463, "learning_rate": 3.0344708938896765e-06, "loss": 0.0651123046875, "step": 114805 }, { "epoch": 0.992728121676423, "grad_norm": 0.5268505318122232, "learning_rate": 3.034267174089419e-06, "loss": 0.100128173828125, "step": 114810 }, { "epoch": 0.9927713551979663, "grad_norm": 0.37951431187441154, "learning_rate": 3.0340634541311246e-06, "loss": 0.031547164916992186, "step": 114815 }, { "epoch": 0.9928145887195096, "grad_norm": 3.9315198777722973, "learning_rate": 3.033859734015732e-06, "loss": 0.08168792724609375, "step": 114820 }, { "epoch": 0.9928578222410528, "grad_norm": 28.632893131706805, "learning_rate": 3.033656013744182e-06, "loss": 0.228656005859375, "step": 114825 }, { "epoch": 0.9929010557625961, "grad_norm": 19.773734874021173, "learning_rate": 3.033452293317413e-06, "loss": 0.16737022399902343, "step": 114830 }, { "epoch": 0.9929442892841394, "grad_norm": 8.330404820411768, "learning_rate": 3.0332485727363656e-06, "loss": 0.20922393798828126, "step": 114835 }, { "epoch": 0.9929875228056826, "grad_norm": 8.884646633612363, "learning_rate": 3.0330448520019788e-06, "loss": 0.08915634155273437, "step": 114840 }, { "epoch": 0.9930307563272259, "grad_norm": 54.122605568679454, "learning_rate": 3.0328411311151916e-06, "loss": 0.21183700561523439, "step": 114845 }, { "epoch": 0.9930739898487692, "grad_norm": 0.6837073465996594, "learning_rate": 3.0326374100769436e-06, "loss": 0.02859954833984375, "step": 114850 }, { "epoch": 0.9931172233703124, "grad_norm": 9.032919886593515, "learning_rate": 3.0324336888881757e-06, "loss": 0.214990234375, "step": 114855 }, { "epoch": 0.9931604568918556, "grad_norm": 3.927183207280165, "learning_rate": 3.0322299675498264e-06, "loss": 0.05830116271972656, "step": 114860 }, { "epoch": 0.993203690413399, "grad_norm": 1.680221044439511, "learning_rate": 3.0320262460628342e-06, "loss": 0.14867668151855468, "step": 114865 }, { "epoch": 0.9932469239349422, "grad_norm": 1.1591619642927782, "learning_rate": 3.0318225244281407e-06, "loss": 0.218304443359375, "step": 114870 }, { "epoch": 0.9932901574564854, "grad_norm": 33.39814101671175, "learning_rate": 3.0316188026466844e-06, "loss": 0.45239715576171874, "step": 114875 }, { "epoch": 0.9933333909780288, "grad_norm": 0.9111673368892224, "learning_rate": 3.0314150807194037e-06, "loss": 0.3207826614379883, "step": 114880 }, { "epoch": 0.993376624499572, "grad_norm": 1.5097828877195483, "learning_rate": 3.0312113586472414e-06, "loss": 0.19882659912109374, "step": 114885 }, { "epoch": 0.9934198580211152, "grad_norm": 0.7751333494573255, "learning_rate": 3.0310076364311347e-06, "loss": 0.07925796508789062, "step": 114890 }, { "epoch": 0.9934630915426585, "grad_norm": 1.4780662221276042, "learning_rate": 3.0308039140720226e-06, "loss": 0.261834716796875, "step": 114895 }, { "epoch": 0.9935063250642018, "grad_norm": 3.5935155580797233, "learning_rate": 3.0306001915708466e-06, "loss": 0.06441154479980468, "step": 114900 }, { "epoch": 0.993549558585745, "grad_norm": 0.051323130070438615, "learning_rate": 3.0303964689285443e-06, "loss": 0.07768402099609376, "step": 114905 }, { "epoch": 0.9935927921072883, "grad_norm": 2.8598949251235797, "learning_rate": 3.0301927461460564e-06, "loss": 0.04612555503845215, "step": 114910 }, { "epoch": 0.9936360256288316, "grad_norm": 3.6902876350566687, "learning_rate": 3.029989023224322e-06, "loss": 0.2477020263671875, "step": 114915 }, { "epoch": 0.9936792591503748, "grad_norm": 10.26599594310589, "learning_rate": 3.0297853001642823e-06, "loss": 0.296875, "step": 114920 }, { "epoch": 0.9937224926719181, "grad_norm": 15.42265659595996, "learning_rate": 3.029581576966874e-06, "loss": 0.5378128051757812, "step": 114925 }, { "epoch": 0.9937657261934614, "grad_norm": 18.44173107726582, "learning_rate": 3.0293778536330387e-06, "loss": 0.12224502563476562, "step": 114930 }, { "epoch": 0.9938089597150046, "grad_norm": 4.309233608749839, "learning_rate": 3.029174130163715e-06, "loss": 0.0904541015625, "step": 114935 }, { "epoch": 0.9938521932365479, "grad_norm": 0.5501940682197232, "learning_rate": 3.0289704065598423e-06, "loss": 0.26346893310546876, "step": 114940 }, { "epoch": 0.9938954267580912, "grad_norm": 7.060493004133441, "learning_rate": 3.0287666828223616e-06, "loss": 0.1617645263671875, "step": 114945 }, { "epoch": 0.9939386602796344, "grad_norm": 3.7600876377655634, "learning_rate": 3.0285629589522118e-06, "loss": 0.27260780334472656, "step": 114950 }, { "epoch": 0.9939818938011776, "grad_norm": 1.629195627477132, "learning_rate": 3.028359234950332e-06, "loss": 0.015891265869140626, "step": 114955 }, { "epoch": 0.994025127322721, "grad_norm": 0.08062396944489517, "learning_rate": 3.0281555108176606e-06, "loss": 0.12454948425292969, "step": 114960 }, { "epoch": 0.9940683608442642, "grad_norm": 2.3025138529415017, "learning_rate": 3.0279517865551393e-06, "loss": 0.21549072265625, "step": 114965 }, { "epoch": 0.9941115943658074, "grad_norm": 0.89296084826385, "learning_rate": 3.0277480621637072e-06, "loss": 0.05999755859375, "step": 114970 }, { "epoch": 0.9941548278873508, "grad_norm": 2.2616825384615495, "learning_rate": 3.0275443376443024e-06, "loss": 0.299749755859375, "step": 114975 }, { "epoch": 0.994198061408894, "grad_norm": 3.1411118652394503, "learning_rate": 3.0273406129978672e-06, "loss": 0.03578681945800781, "step": 114980 }, { "epoch": 0.9942412949304372, "grad_norm": 17.73468222763631, "learning_rate": 3.027136888225339e-06, "loss": 0.13136844635009765, "step": 114985 }, { "epoch": 0.9942845284519806, "grad_norm": 9.265366321263675, "learning_rate": 3.0269331633276564e-06, "loss": 0.05898361206054688, "step": 114990 }, { "epoch": 0.9943277619735238, "grad_norm": 1.7624430998889038, "learning_rate": 3.0267294383057616e-06, "loss": 0.12964019775390626, "step": 114995 }, { "epoch": 0.994370995495067, "grad_norm": 0.6805852729917452, "learning_rate": 3.0265257131605934e-06, "loss": 0.276007080078125, "step": 115000 }, { "epoch": 0.9944142290166104, "grad_norm": 35.95063643153007, "learning_rate": 3.0263219878930895e-06, "loss": 0.7885177612304688, "step": 115005 }, { "epoch": 0.9944574625381536, "grad_norm": 3.402580090204198, "learning_rate": 3.0261182625041922e-06, "loss": 0.20973358154296876, "step": 115010 }, { "epoch": 0.9945006960596968, "grad_norm": 66.34422025040321, "learning_rate": 3.0259145369948397e-06, "loss": 0.4408561706542969, "step": 115015 }, { "epoch": 0.9945439295812402, "grad_norm": 2.9439919394146665, "learning_rate": 3.0257108113659704e-06, "loss": 0.042205810546875, "step": 115020 }, { "epoch": 0.9945871631027834, "grad_norm": 1.0915886463553375, "learning_rate": 3.0255070856185266e-06, "loss": 0.039861297607421874, "step": 115025 }, { "epoch": 0.9946303966243266, "grad_norm": 0.21349200273131605, "learning_rate": 3.025303359753446e-06, "loss": 0.18830604553222657, "step": 115030 }, { "epoch": 0.9946736301458698, "grad_norm": 2.688191102893541, "learning_rate": 3.025099633771669e-06, "loss": 0.0511688232421875, "step": 115035 }, { "epoch": 0.9947168636674132, "grad_norm": 0.2770693380307538, "learning_rate": 3.0248959076741337e-06, "loss": 0.21676177978515626, "step": 115040 }, { "epoch": 0.9947600971889564, "grad_norm": 0.5805662154197252, "learning_rate": 3.0246921814617807e-06, "loss": 0.055875396728515624, "step": 115045 }, { "epoch": 0.9948033307104996, "grad_norm": 3.5727195721690244, "learning_rate": 3.0244884551355504e-06, "loss": 0.04326171875, "step": 115050 }, { "epoch": 0.994846564232043, "grad_norm": 10.020303587681274, "learning_rate": 3.0242847286963806e-06, "loss": 0.1014984130859375, "step": 115055 }, { "epoch": 0.9948897977535862, "grad_norm": 9.431497889690734, "learning_rate": 3.0240810021452123e-06, "loss": 0.255517578125, "step": 115060 }, { "epoch": 0.9949330312751294, "grad_norm": 10.986449798379853, "learning_rate": 3.0238772754829848e-06, "loss": 0.16877288818359376, "step": 115065 }, { "epoch": 0.9949762647966728, "grad_norm": 15.388365532653756, "learning_rate": 3.0236735487106366e-06, "loss": 0.046649169921875, "step": 115070 }, { "epoch": 0.995019498318216, "grad_norm": 0.9002339845018882, "learning_rate": 3.0234698218291085e-06, "loss": 0.16127777099609375, "step": 115075 }, { "epoch": 0.9950627318397592, "grad_norm": 1.3543960763434977, "learning_rate": 3.0232660948393397e-06, "loss": 0.16122970581054688, "step": 115080 }, { "epoch": 0.9951059653613026, "grad_norm": 11.227162896404126, "learning_rate": 3.0230623677422697e-06, "loss": 0.0699615478515625, "step": 115085 }, { "epoch": 0.9951491988828458, "grad_norm": 0.013157449353152016, "learning_rate": 3.0228586405388385e-06, "loss": 0.12421150207519531, "step": 115090 }, { "epoch": 0.995192432404389, "grad_norm": 7.546160839144623, "learning_rate": 3.0226549132299852e-06, "loss": 0.05322418212890625, "step": 115095 }, { "epoch": 0.9952356659259324, "grad_norm": 5.054171643859436, "learning_rate": 3.0224511858166487e-06, "loss": 0.055298995971679685, "step": 115100 }, { "epoch": 0.9952788994474756, "grad_norm": 0.38605575399503184, "learning_rate": 3.0222474582997697e-06, "loss": 0.05624847412109375, "step": 115105 }, { "epoch": 0.9953221329690188, "grad_norm": 5.3769445479574385, "learning_rate": 3.0220437306802874e-06, "loss": 0.06226024627685547, "step": 115110 }, { "epoch": 0.9953653664905622, "grad_norm": 1.901568656131525, "learning_rate": 3.021840002959141e-06, "loss": 0.27166595458984377, "step": 115115 }, { "epoch": 0.9954086000121054, "grad_norm": 31.903644328600176, "learning_rate": 3.021636275137271e-06, "loss": 0.1867136001586914, "step": 115120 }, { "epoch": 0.9954518335336486, "grad_norm": 1.3266924820979356, "learning_rate": 3.021432547215617e-06, "loss": 0.12850570678710938, "step": 115125 }, { "epoch": 0.9954950670551919, "grad_norm": 4.751758983715946, "learning_rate": 3.0212288191951166e-06, "loss": 0.12078475952148438, "step": 115130 }, { "epoch": 0.9955383005767352, "grad_norm": 2.9829374075700614, "learning_rate": 3.0210250910767116e-06, "loss": 0.5406982421875, "step": 115135 }, { "epoch": 0.9955815340982784, "grad_norm": 4.535782265654195, "learning_rate": 3.0208213628613403e-06, "loss": 0.13736419677734374, "step": 115140 }, { "epoch": 0.9956247676198217, "grad_norm": 19.456202402985323, "learning_rate": 3.0206176345499435e-06, "loss": 0.15710678100585937, "step": 115145 }, { "epoch": 0.995668001141365, "grad_norm": 0.8206117157481664, "learning_rate": 3.020413906143459e-06, "loss": 0.27533836364746095, "step": 115150 }, { "epoch": 0.9957112346629082, "grad_norm": 2.429852990149431, "learning_rate": 3.0202101776428284e-06, "loss": 0.01655120849609375, "step": 115155 }, { "epoch": 0.9957544681844515, "grad_norm": 41.12053000181403, "learning_rate": 3.0200064490489893e-06, "loss": 0.16640090942382812, "step": 115160 }, { "epoch": 0.9957977017059948, "grad_norm": 2.707010442842109, "learning_rate": 3.019802720362882e-06, "loss": 0.15274372100830078, "step": 115165 }, { "epoch": 0.995840935227538, "grad_norm": 9.134010426435477, "learning_rate": 3.0195989915854473e-06, "loss": 0.034967994689941405, "step": 115170 }, { "epoch": 0.9958841687490813, "grad_norm": 7.669359634068302, "learning_rate": 3.0193952627176235e-06, "loss": 0.0812042236328125, "step": 115175 }, { "epoch": 0.9959274022706246, "grad_norm": 2.943688314819258, "learning_rate": 3.01919153376035e-06, "loss": 0.055828857421875, "step": 115180 }, { "epoch": 0.9959706357921678, "grad_norm": 1.2581361994874065, "learning_rate": 3.0189878047145676e-06, "loss": 0.022061920166015624, "step": 115185 }, { "epoch": 0.996013869313711, "grad_norm": 4.5567595731976525, "learning_rate": 3.0187840755812143e-06, "loss": 0.07704544067382812, "step": 115190 }, { "epoch": 0.9960571028352544, "grad_norm": 49.31638260626821, "learning_rate": 3.018580346361231e-06, "loss": 0.29209136962890625, "step": 115195 }, { "epoch": 0.9961003363567976, "grad_norm": 0.9445847181681825, "learning_rate": 3.018376617055557e-06, "loss": 0.28375701904296874, "step": 115200 }, { "epoch": 0.9961435698783409, "grad_norm": 7.531825303637826, "learning_rate": 3.0181728876651315e-06, "loss": 0.090289306640625, "step": 115205 }, { "epoch": 0.9961868033998841, "grad_norm": 3.7701370526739515, "learning_rate": 3.017969158190894e-06, "loss": 0.2648448944091797, "step": 115210 }, { "epoch": 0.9962300369214274, "grad_norm": 2.481894603131225, "learning_rate": 3.0177654286337844e-06, "loss": 0.18519744873046876, "step": 115215 }, { "epoch": 0.9962732704429706, "grad_norm": 0.8894765294406981, "learning_rate": 3.0175616989947423e-06, "loss": 0.04310150146484375, "step": 115220 }, { "epoch": 0.9963165039645139, "grad_norm": 1.0382701943684023, "learning_rate": 3.017357969274707e-06, "loss": 0.1104949951171875, "step": 115225 }, { "epoch": 0.9963597374860572, "grad_norm": 27.76236504754322, "learning_rate": 3.0171542394746186e-06, "loss": 0.10373134613037109, "step": 115230 }, { "epoch": 0.9964029710076004, "grad_norm": 6.92807964243805, "learning_rate": 3.0169505095954163e-06, "loss": 0.031571578979492185, "step": 115235 }, { "epoch": 0.9964462045291437, "grad_norm": 2.028801173942645, "learning_rate": 3.016746779638039e-06, "loss": 0.019205474853515626, "step": 115240 }, { "epoch": 0.996489438050687, "grad_norm": 7.723206978695485, "learning_rate": 3.0165430496034276e-06, "loss": 0.10289459228515625, "step": 115245 }, { "epoch": 0.9965326715722302, "grad_norm": 0.3967979319556954, "learning_rate": 3.0163393194925215e-06, "loss": 0.12915477752685547, "step": 115250 }, { "epoch": 0.9965759050937735, "grad_norm": 29.333440905089425, "learning_rate": 3.0161355893062596e-06, "loss": 0.15834884643554686, "step": 115255 }, { "epoch": 0.9966191386153168, "grad_norm": 0.926412861163609, "learning_rate": 3.0159318590455812e-06, "loss": 0.07050933837890624, "step": 115260 }, { "epoch": 0.99666237213686, "grad_norm": 9.119104026213295, "learning_rate": 3.015728128711428e-06, "loss": 0.06990165710449218, "step": 115265 }, { "epoch": 0.9967056056584033, "grad_norm": 38.52133272407317, "learning_rate": 3.015524398304736e-06, "loss": 0.5751903533935547, "step": 115270 }, { "epoch": 0.9967488391799466, "grad_norm": 10.437195205731483, "learning_rate": 3.0153206678264478e-06, "loss": 0.17250351905822753, "step": 115275 }, { "epoch": 0.9967920727014898, "grad_norm": 6.315089328877276, "learning_rate": 3.015116937277503e-06, "loss": 0.12912874221801757, "step": 115280 }, { "epoch": 0.9968353062230331, "grad_norm": 1.5242596079862887, "learning_rate": 3.0149132066588396e-06, "loss": 0.15504875183105468, "step": 115285 }, { "epoch": 0.9968785397445764, "grad_norm": 0.13200621239156987, "learning_rate": 3.014709475971397e-06, "loss": 0.027336883544921874, "step": 115290 }, { "epoch": 0.9969217732661196, "grad_norm": 25.187210403940174, "learning_rate": 3.0145057452161167e-06, "loss": 0.131317138671875, "step": 115295 }, { "epoch": 0.9969650067876629, "grad_norm": 38.22743080956001, "learning_rate": 3.014302014393936e-06, "loss": 0.2380645751953125, "step": 115300 }, { "epoch": 0.9970082403092061, "grad_norm": 16.30414751841742, "learning_rate": 3.014098283505797e-06, "loss": 0.3103424072265625, "step": 115305 }, { "epoch": 0.9970514738307494, "grad_norm": 8.930743103486623, "learning_rate": 3.0138945525526375e-06, "loss": 0.251434326171875, "step": 115310 }, { "epoch": 0.9970947073522927, "grad_norm": 17.19887681118665, "learning_rate": 3.0136908215353973e-06, "loss": 0.0798309326171875, "step": 115315 }, { "epoch": 0.9971379408738359, "grad_norm": 16.29094186319908, "learning_rate": 3.0134870904550168e-06, "loss": 0.15538101196289061, "step": 115320 }, { "epoch": 0.9971811743953792, "grad_norm": 13.086941253447018, "learning_rate": 3.013283359312434e-06, "loss": 0.09097061157226563, "step": 115325 }, { "epoch": 0.9972244079169225, "grad_norm": 10.53459148682645, "learning_rate": 3.0130796281085894e-06, "loss": 0.11506729125976563, "step": 115330 }, { "epoch": 0.9972676414384657, "grad_norm": 4.160974779344446, "learning_rate": 3.012875896844424e-06, "loss": 0.23243846893310546, "step": 115335 }, { "epoch": 0.997310874960009, "grad_norm": 0.24056127778012487, "learning_rate": 3.012672165520875e-06, "loss": 0.07465667724609375, "step": 115340 }, { "epoch": 0.9973541084815523, "grad_norm": 16.160848415709786, "learning_rate": 3.012468434138884e-06, "loss": 0.5549155235290527, "step": 115345 }, { "epoch": 0.9973973420030955, "grad_norm": 0.43389251741674223, "learning_rate": 3.0122647026993896e-06, "loss": 0.1444427490234375, "step": 115350 }, { "epoch": 0.9974405755246388, "grad_norm": 0.25729238860977277, "learning_rate": 3.012060971203331e-06, "loss": 0.08394126892089844, "step": 115355 }, { "epoch": 0.9974838090461821, "grad_norm": 2.19415088836722, "learning_rate": 3.0118572396516478e-06, "loss": 0.2294321060180664, "step": 115360 }, { "epoch": 0.9975270425677253, "grad_norm": 1.1829681665410832, "learning_rate": 3.0116535080452813e-06, "loss": 0.0911376953125, "step": 115365 }, { "epoch": 0.9975702760892686, "grad_norm": 11.242705452905572, "learning_rate": 3.0114497763851686e-06, "loss": 0.12275314331054688, "step": 115370 }, { "epoch": 0.9976135096108119, "grad_norm": 17.258774313026432, "learning_rate": 3.0112460446722517e-06, "loss": 0.43123817443847656, "step": 115375 }, { "epoch": 0.9976567431323551, "grad_norm": 9.812822076022334, "learning_rate": 3.0110423129074686e-06, "loss": 0.172894287109375, "step": 115380 }, { "epoch": 0.9976999766538983, "grad_norm": 13.806170626418496, "learning_rate": 3.0108385810917588e-06, "loss": 0.06697845458984375, "step": 115385 }, { "epoch": 0.9977432101754417, "grad_norm": 4.082671801498656, "learning_rate": 3.0106348492260626e-06, "loss": 0.0717987060546875, "step": 115390 }, { "epoch": 0.9977864436969849, "grad_norm": 13.643829730165121, "learning_rate": 3.01043111731132e-06, "loss": 0.1723102569580078, "step": 115395 }, { "epoch": 0.9978296772185281, "grad_norm": 24.562995908471798, "learning_rate": 3.01022738534847e-06, "loss": 0.2119384765625, "step": 115400 }, { "epoch": 0.9978729107400715, "grad_norm": 8.003656211272007, "learning_rate": 3.0100236533384516e-06, "loss": 0.27822170257568357, "step": 115405 }, { "epoch": 0.9979161442616147, "grad_norm": 9.111608227024155, "learning_rate": 3.0098199212822057e-06, "loss": 0.09765090942382812, "step": 115410 }, { "epoch": 0.9979593777831579, "grad_norm": 5.886479456108226, "learning_rate": 3.0096161891806706e-06, "loss": 0.0339752197265625, "step": 115415 }, { "epoch": 0.9980026113047012, "grad_norm": 6.549914416463642, "learning_rate": 3.009412457034786e-06, "loss": 0.266973876953125, "step": 115420 }, { "epoch": 0.9980458448262445, "grad_norm": 1.2592215520945373, "learning_rate": 3.0092087248454933e-06, "loss": 0.14356727600097657, "step": 115425 }, { "epoch": 0.9980890783477877, "grad_norm": 8.96826870186894, "learning_rate": 3.0090049926137302e-06, "loss": 0.18947601318359375, "step": 115430 }, { "epoch": 0.998132311869331, "grad_norm": 0.4269550302590774, "learning_rate": 3.0088012603404363e-06, "loss": 0.06720428466796875, "step": 115435 }, { "epoch": 0.9981755453908743, "grad_norm": 0.504417156595841, "learning_rate": 3.0085975280265517e-06, "loss": 0.05635080337524414, "step": 115440 }, { "epoch": 0.9982187789124175, "grad_norm": 3.6150698793502842, "learning_rate": 3.008393795673017e-06, "loss": 0.24784774780273439, "step": 115445 }, { "epoch": 0.9982620124339608, "grad_norm": 2.600614121914898, "learning_rate": 3.0081900632807706e-06, "loss": 0.0707977294921875, "step": 115450 }, { "epoch": 0.9983052459555041, "grad_norm": 14.548678233730906, "learning_rate": 3.007986330850752e-06, "loss": 0.13570709228515626, "step": 115455 }, { "epoch": 0.9983484794770473, "grad_norm": 0.5794810202881597, "learning_rate": 3.0077825983839016e-06, "loss": 0.19480514526367188, "step": 115460 }, { "epoch": 0.9983917129985906, "grad_norm": 0.698911511095094, "learning_rate": 3.0075788658811583e-06, "loss": 0.2568817138671875, "step": 115465 }, { "epoch": 0.9984349465201339, "grad_norm": 1.104121596649478, "learning_rate": 3.0073751333434614e-06, "loss": 0.01998291015625, "step": 115470 }, { "epoch": 0.9984781800416771, "grad_norm": 17.511617875656952, "learning_rate": 3.007171400771752e-06, "loss": 0.21571044921875, "step": 115475 }, { "epoch": 0.9985214135632203, "grad_norm": 0.2765543659241302, "learning_rate": 3.0069676681669686e-06, "loss": 0.05783233642578125, "step": 115480 }, { "epoch": 0.9985646470847637, "grad_norm": 23.691223671163463, "learning_rate": 3.0067639355300502e-06, "loss": 0.08287105560302735, "step": 115485 }, { "epoch": 0.9986078806063069, "grad_norm": 1.8933403605103372, "learning_rate": 3.0065602028619378e-06, "loss": 0.026493072509765625, "step": 115490 }, { "epoch": 0.9986511141278501, "grad_norm": 16.204622391528687, "learning_rate": 3.00635647016357e-06, "loss": 0.25109024047851564, "step": 115495 }, { "epoch": 0.9986943476493935, "grad_norm": 14.717026795140523, "learning_rate": 3.006152737435886e-06, "loss": 0.15135345458984376, "step": 115500 }, { "epoch": 0.9987375811709367, "grad_norm": 6.10623928945674, "learning_rate": 3.0059490046798276e-06, "loss": 0.029439544677734374, "step": 115505 }, { "epoch": 0.9987808146924799, "grad_norm": 22.48505698947318, "learning_rate": 3.0057452718963327e-06, "loss": 0.13926239013671876, "step": 115510 }, { "epoch": 0.9988240482140233, "grad_norm": 37.928910159259914, "learning_rate": 3.00554153908634e-06, "loss": 0.38678817749023436, "step": 115515 }, { "epoch": 0.9988672817355665, "grad_norm": 2.728396257319506, "learning_rate": 3.0053378062507916e-06, "loss": 0.23865585327148436, "step": 115520 }, { "epoch": 0.9989105152571097, "grad_norm": 18.49224917385552, "learning_rate": 3.0051340733906246e-06, "loss": 0.2690155029296875, "step": 115525 }, { "epoch": 0.9989537487786531, "grad_norm": 0.0434171742627677, "learning_rate": 3.004930340506779e-06, "loss": 0.027436065673828124, "step": 115530 }, { "epoch": 0.9989969823001963, "grad_norm": 13.126459912939348, "learning_rate": 3.004726607600197e-06, "loss": 0.353009033203125, "step": 115535 }, { "epoch": 0.9990402158217395, "grad_norm": 4.0437709498213605, "learning_rate": 3.0045228746718157e-06, "loss": 0.1353546142578125, "step": 115540 }, { "epoch": 0.9990834493432829, "grad_norm": 4.300028269021025, "learning_rate": 3.0043191417225746e-06, "loss": 0.13692855834960938, "step": 115545 }, { "epoch": 0.9991266828648261, "grad_norm": 18.465061453438135, "learning_rate": 3.004115408753415e-06, "loss": 0.15235729217529298, "step": 115550 }, { "epoch": 0.9991699163863693, "grad_norm": 29.624194442995368, "learning_rate": 3.003911675765275e-06, "loss": 0.16390609741210938, "step": 115555 }, { "epoch": 0.9992131499079125, "grad_norm": 1.1229748842698024, "learning_rate": 3.0037079427590948e-06, "loss": 0.1584381103515625, "step": 115560 }, { "epoch": 0.9992563834294559, "grad_norm": 1.6099473037110925, "learning_rate": 3.0035042097358138e-06, "loss": 0.16191864013671875, "step": 115565 }, { "epoch": 0.9992996169509991, "grad_norm": 1.1297598954366623, "learning_rate": 3.0033004766963713e-06, "loss": 0.17295875549316406, "step": 115570 }, { "epoch": 0.9993428504725423, "grad_norm": 0.28987300633850566, "learning_rate": 3.003096743641708e-06, "loss": 0.12346038818359376, "step": 115575 }, { "epoch": 0.9993860839940857, "grad_norm": 19.271483695340113, "learning_rate": 3.002893010572762e-06, "loss": 0.23826560974121094, "step": 115580 }, { "epoch": 0.9994293175156289, "grad_norm": 0.49095172749942345, "learning_rate": 3.002689277490474e-06, "loss": 0.06209144592285156, "step": 115585 }, { "epoch": 0.9994725510371721, "grad_norm": 34.169843383197176, "learning_rate": 3.0024855443957843e-06, "loss": 0.30727996826171877, "step": 115590 }, { "epoch": 0.9995157845587155, "grad_norm": 1.206738321311962, "learning_rate": 3.00228181128963e-06, "loss": 0.016227149963378908, "step": 115595 }, { "epoch": 0.9995590180802587, "grad_norm": 9.05434553301696, "learning_rate": 3.002078078172953e-06, "loss": 0.21625747680664062, "step": 115600 }, { "epoch": 0.9996022516018019, "grad_norm": 1.22118213079689, "learning_rate": 3.0018743450466923e-06, "loss": 0.018622970581054686, "step": 115605 }, { "epoch": 0.9996454851233453, "grad_norm": 11.127369628515783, "learning_rate": 3.0016706119117863e-06, "loss": 0.03258209228515625, "step": 115610 }, { "epoch": 0.9996887186448885, "grad_norm": 11.604177852714287, "learning_rate": 3.001466878769176e-06, "loss": 0.23523712158203125, "step": 115615 }, { "epoch": 0.9997319521664317, "grad_norm": 9.142571824756553, "learning_rate": 3.001263145619801e-06, "loss": 0.27051239013671874, "step": 115620 }, { "epoch": 0.9997751856879751, "grad_norm": 5.538248706996961, "learning_rate": 3.0010594124646e-06, "loss": 0.2069469451904297, "step": 115625 }, { "epoch": 0.9998184192095183, "grad_norm": 39.33295621071322, "learning_rate": 3.0008556793045136e-06, "loss": 0.10973968505859374, "step": 115630 }, { "epoch": 0.9998616527310615, "grad_norm": 45.94528480389972, "learning_rate": 3.000651946140481e-06, "loss": 0.3123149871826172, "step": 115635 }, { "epoch": 0.9999048862526049, "grad_norm": 0.398422959465206, "learning_rate": 3.0004482129734404e-06, "loss": 0.23987045288085937, "step": 115640 }, { "epoch": 0.9999481197741481, "grad_norm": 5.633166470126096, "learning_rate": 3.0002444798043338e-06, "loss": 0.27230300903320315, "step": 115645 }, { "epoch": 0.9999913532956913, "grad_norm": 0.10134641944444635, "learning_rate": 3.0000407466340997e-06, "loss": 0.1014404296875, "step": 115650 }, { "epoch": 1.0000345868172347, "grad_norm": 0.021524155788453306, "learning_rate": 2.9998370134636772e-06, "loss": 0.043461036682128903, "step": 115655 }, { "epoch": 1.000077820338778, "grad_norm": 19.461866990790924, "learning_rate": 2.999633280294007e-06, "loss": 0.268853759765625, "step": 115660 }, { "epoch": 1.0001210538603211, "grad_norm": 1.0466903794245, "learning_rate": 2.9994295471260272e-06, "loss": 0.09023933410644532, "step": 115665 }, { "epoch": 1.0001642873818644, "grad_norm": 1.1920490815731268, "learning_rate": 2.9992258139606793e-06, "loss": 0.02504901885986328, "step": 115670 }, { "epoch": 1.0002075209034076, "grad_norm": 2.7386179627882825, "learning_rate": 2.9990220807989016e-06, "loss": 0.13503570556640626, "step": 115675 }, { "epoch": 1.000250754424951, "grad_norm": 10.53592788008918, "learning_rate": 2.9988183476416327e-06, "loss": 0.1442626953125, "step": 115680 }, { "epoch": 1.0002939879464943, "grad_norm": 0.8819408167150365, "learning_rate": 2.998614614489815e-06, "loss": 0.018903350830078124, "step": 115685 }, { "epoch": 1.0003372214680375, "grad_norm": 1.6848969739172155, "learning_rate": 2.998410881344386e-06, "loss": 0.08342409133911133, "step": 115690 }, { "epoch": 1.0003804549895807, "grad_norm": 14.102287534861398, "learning_rate": 2.998207148206286e-06, "loss": 0.07015457153320312, "step": 115695 }, { "epoch": 1.000423688511124, "grad_norm": 1.560332599777647, "learning_rate": 2.9980034150764546e-06, "loss": 0.12565879821777343, "step": 115700 }, { "epoch": 1.0004669220326672, "grad_norm": 0.989098692874329, "learning_rate": 2.9977996819558313e-06, "loss": 0.1108001708984375, "step": 115705 }, { "epoch": 1.0005101555542104, "grad_norm": 0.1662546602360029, "learning_rate": 2.997595948845356e-06, "loss": 0.08920974731445312, "step": 115710 }, { "epoch": 1.0005533890757539, "grad_norm": 12.090530476292354, "learning_rate": 2.9973922157459664e-06, "loss": 0.04853744506835937, "step": 115715 }, { "epoch": 1.000596622597297, "grad_norm": 0.7988177118288827, "learning_rate": 2.997188482658605e-06, "loss": 0.0112945556640625, "step": 115720 }, { "epoch": 1.0006398561188403, "grad_norm": 2.943675303101962, "learning_rate": 2.9969847495842095e-06, "loss": 0.02920989990234375, "step": 115725 }, { "epoch": 1.0006830896403835, "grad_norm": 0.9633112594563755, "learning_rate": 2.9967810165237205e-06, "loss": 0.0325653076171875, "step": 115730 }, { "epoch": 1.0007263231619268, "grad_norm": 7.468308762041162, "learning_rate": 2.9965772834780778e-06, "loss": 0.08679580688476562, "step": 115735 }, { "epoch": 1.00076955668347, "grad_norm": 3.743546994520049, "learning_rate": 2.9963735504482196e-06, "loss": 0.08910484313964843, "step": 115740 }, { "epoch": 1.0008127902050135, "grad_norm": 2.309865475392174, "learning_rate": 2.996169817435085e-06, "loss": 0.088079833984375, "step": 115745 }, { "epoch": 1.0008560237265567, "grad_norm": 1.2666002145583835, "learning_rate": 2.995966084439617e-06, "loss": 0.18673095703125, "step": 115750 }, { "epoch": 1.0008992572481, "grad_norm": 2.8502381860671577, "learning_rate": 2.9957623514627516e-06, "loss": 0.138470458984375, "step": 115755 }, { "epoch": 1.0009424907696431, "grad_norm": 2.704583350294412, "learning_rate": 2.9955586185054307e-06, "loss": 0.15582199096679689, "step": 115760 }, { "epoch": 1.0009857242911864, "grad_norm": 10.301644504342665, "learning_rate": 2.9953548855685928e-06, "loss": 0.0985260009765625, "step": 115765 }, { "epoch": 1.0010289578127296, "grad_norm": 1.592610572709028, "learning_rate": 2.995151152653178e-06, "loss": 0.037662506103515625, "step": 115770 }, { "epoch": 1.0010721913342728, "grad_norm": 1.0876503604547412, "learning_rate": 2.9949474197601257e-06, "loss": 0.0784912109375, "step": 115775 }, { "epoch": 1.0011154248558163, "grad_norm": 0.04973111132650111, "learning_rate": 2.9947436868903745e-06, "loss": 0.009651947021484374, "step": 115780 }, { "epoch": 1.0011586583773595, "grad_norm": 5.83373274768717, "learning_rate": 2.9945399540448654e-06, "loss": 0.06014308929443359, "step": 115785 }, { "epoch": 1.0012018918989027, "grad_norm": 6.518493415082956, "learning_rate": 2.994336221224538e-06, "loss": 0.06249370574951172, "step": 115790 }, { "epoch": 1.001245125420446, "grad_norm": 2.8706234858478403, "learning_rate": 2.9941324884303312e-06, "loss": 0.03395576477050781, "step": 115795 }, { "epoch": 1.0012883589419892, "grad_norm": 1.5437855851450282, "learning_rate": 2.9939287556631854e-06, "loss": 0.044690704345703124, "step": 115800 }, { "epoch": 1.0013315924635324, "grad_norm": 3.344426834943957, "learning_rate": 2.993725022924039e-06, "loss": 0.07141036987304687, "step": 115805 }, { "epoch": 1.0013748259850759, "grad_norm": 0.30109987807596733, "learning_rate": 2.9935212902138315e-06, "loss": 0.0099700927734375, "step": 115810 }, { "epoch": 1.001418059506619, "grad_norm": 0.7303964488797458, "learning_rate": 2.9933175575335044e-06, "loss": 0.05109901428222656, "step": 115815 }, { "epoch": 1.0014612930281623, "grad_norm": 20.49814814583312, "learning_rate": 2.9931138248839957e-06, "loss": 0.09351654052734375, "step": 115820 }, { "epoch": 1.0015045265497056, "grad_norm": 3.039714695888118, "learning_rate": 2.9929100922662457e-06, "loss": 0.0774648666381836, "step": 115825 }, { "epoch": 1.0015477600712488, "grad_norm": 8.649337652852251, "learning_rate": 2.992706359681194e-06, "loss": 0.11161956787109376, "step": 115830 }, { "epoch": 1.001590993592792, "grad_norm": 2.5480572642951445, "learning_rate": 2.9925026271297795e-06, "loss": 0.2650726318359375, "step": 115835 }, { "epoch": 1.0016342271143355, "grad_norm": 1.5549807116953716, "learning_rate": 2.992298894612941e-06, "loss": 0.023621177673339842, "step": 115840 }, { "epoch": 1.0016774606358787, "grad_norm": 72.0507471987859, "learning_rate": 2.9920951621316206e-06, "loss": 0.16804580688476561, "step": 115845 }, { "epoch": 1.001720694157422, "grad_norm": 0.8171247434876536, "learning_rate": 2.991891429686757e-06, "loss": 0.018959426879882814, "step": 115850 }, { "epoch": 1.0017639276789652, "grad_norm": 1.4664549204028945, "learning_rate": 2.9916876972792895e-06, "loss": 0.0894500732421875, "step": 115855 }, { "epoch": 1.0018071612005084, "grad_norm": 21.965026640645846, "learning_rate": 2.9914839649101576e-06, "loss": 0.13409652709960937, "step": 115860 }, { "epoch": 1.0018503947220516, "grad_norm": 1.3726702920905811, "learning_rate": 2.9912802325802997e-06, "loss": 0.066070556640625, "step": 115865 }, { "epoch": 1.0018936282435948, "grad_norm": 0.7624708268170617, "learning_rate": 2.9910765002906576e-06, "loss": 0.1666168212890625, "step": 115870 }, { "epoch": 1.0019368617651383, "grad_norm": 0.08732174982438382, "learning_rate": 2.9908727680421686e-06, "loss": 0.11715011596679688, "step": 115875 }, { "epoch": 1.0019800952866815, "grad_norm": 64.602051061337, "learning_rate": 2.9906690358357746e-06, "loss": 0.40835113525390626, "step": 115880 }, { "epoch": 1.0020233288082248, "grad_norm": 2.266324449306802, "learning_rate": 2.9904653036724146e-06, "loss": 0.02350311279296875, "step": 115885 }, { "epoch": 1.002066562329768, "grad_norm": 4.72509240671648, "learning_rate": 2.990261571553027e-06, "loss": 0.05749244689941406, "step": 115890 }, { "epoch": 1.0021097958513112, "grad_norm": 0.0250199223035486, "learning_rate": 2.990057839478553e-06, "loss": 0.06188783645629883, "step": 115895 }, { "epoch": 1.0021530293728544, "grad_norm": 1.5045140451644892, "learning_rate": 2.9898541074499312e-06, "loss": 0.04204864501953125, "step": 115900 }, { "epoch": 1.002196262894398, "grad_norm": 17.22276862734249, "learning_rate": 2.9896503754680997e-06, "loss": 0.11080741882324219, "step": 115905 }, { "epoch": 1.0022394964159411, "grad_norm": 33.7422114234346, "learning_rate": 2.9894466435340014e-06, "loss": 0.13696632385253907, "step": 115910 }, { "epoch": 1.0022827299374844, "grad_norm": 3.5945564257672333, "learning_rate": 2.989242911648574e-06, "loss": 0.03310089111328125, "step": 115915 }, { "epoch": 1.0023259634590276, "grad_norm": 0.312247265652155, "learning_rate": 2.989039179812757e-06, "loss": 0.028549957275390624, "step": 115920 }, { "epoch": 1.0023691969805708, "grad_norm": 0.4071812132112675, "learning_rate": 2.988835448027491e-06, "loss": 0.021770668029785157, "step": 115925 }, { "epoch": 1.002412430502114, "grad_norm": 25.78781730241644, "learning_rate": 2.988631716293715e-06, "loss": 0.13955192565917968, "step": 115930 }, { "epoch": 1.0024556640236575, "grad_norm": 0.9039492283592654, "learning_rate": 2.9884279846123666e-06, "loss": 0.10972251892089843, "step": 115935 }, { "epoch": 1.0024988975452007, "grad_norm": 0.5891684627458703, "learning_rate": 2.988224252984389e-06, "loss": 0.2364959716796875, "step": 115940 }, { "epoch": 1.002542131066744, "grad_norm": 0.2129049045340981, "learning_rate": 2.9880205214107206e-06, "loss": 0.08276195526123047, "step": 115945 }, { "epoch": 1.0025853645882872, "grad_norm": 0.6991941231412249, "learning_rate": 2.987816789892299e-06, "loss": 0.04011993408203125, "step": 115950 }, { "epoch": 1.0026285981098304, "grad_norm": 15.135063300947232, "learning_rate": 2.9876130584300667e-06, "loss": 0.1211334228515625, "step": 115955 }, { "epoch": 1.0026718316313736, "grad_norm": 0.34796799123516764, "learning_rate": 2.9874093270249617e-06, "loss": 0.013494873046875, "step": 115960 }, { "epoch": 1.0027150651529169, "grad_norm": 2.7594876691441494, "learning_rate": 2.9872055956779237e-06, "loss": 0.24697265625, "step": 115965 }, { "epoch": 1.0027582986744603, "grad_norm": 2.5656738443713825, "learning_rate": 2.9870018643898905e-06, "loss": 0.04183349609375, "step": 115970 }, { "epoch": 1.0028015321960035, "grad_norm": 10.533645155854346, "learning_rate": 2.9867981331618056e-06, "loss": 0.07321319580078126, "step": 115975 }, { "epoch": 1.0028447657175468, "grad_norm": 3.4995097870535896, "learning_rate": 2.9865944019946054e-06, "loss": 0.058935546875, "step": 115980 }, { "epoch": 1.00288799923909, "grad_norm": 0.5218259491211613, "learning_rate": 2.9863906708892314e-06, "loss": 0.05956344604492188, "step": 115985 }, { "epoch": 1.0029312327606332, "grad_norm": 14.38348946635457, "learning_rate": 2.9861869398466226e-06, "loss": 0.13805389404296875, "step": 115990 }, { "epoch": 1.0029744662821765, "grad_norm": 2.577834724615932, "learning_rate": 2.9859832088677183e-06, "loss": 0.06510353088378906, "step": 115995 }, { "epoch": 1.00301769980372, "grad_norm": 2.837278602858148, "learning_rate": 2.985779477953457e-06, "loss": 0.014610671997070312, "step": 116000 }, { "epoch": 1.0030609333252631, "grad_norm": 7.622040727307272, "learning_rate": 2.9855757471047806e-06, "loss": 0.03181476593017578, "step": 116005 }, { "epoch": 1.0031041668468064, "grad_norm": 0.3819570750638485, "learning_rate": 2.985372016322627e-06, "loss": 0.13770675659179688, "step": 116010 }, { "epoch": 1.0031474003683496, "grad_norm": 3.579225991075599, "learning_rate": 2.985168285607937e-06, "loss": 0.05167884826660156, "step": 116015 }, { "epoch": 1.0031906338898928, "grad_norm": 0.5189720302013707, "learning_rate": 2.98496455496165e-06, "loss": 0.11056747436523437, "step": 116020 }, { "epoch": 1.003233867411436, "grad_norm": 0.07648404959843258, "learning_rate": 2.9847608243847045e-06, "loss": 0.10942459106445312, "step": 116025 }, { "epoch": 1.0032771009329795, "grad_norm": 4.4977169051668024, "learning_rate": 2.9845570938780397e-06, "loss": 0.04085884094238281, "step": 116030 }, { "epoch": 1.0033203344545227, "grad_norm": 0.36180015191606607, "learning_rate": 2.9843533634425977e-06, "loss": 0.0274810791015625, "step": 116035 }, { "epoch": 1.003363567976066, "grad_norm": 3.151024349946543, "learning_rate": 2.9841496330793156e-06, "loss": 0.09783477783203125, "step": 116040 }, { "epoch": 1.0034068014976092, "grad_norm": 4.463483989731798, "learning_rate": 2.9839459027891346e-06, "loss": 0.2234527587890625, "step": 116045 }, { "epoch": 1.0034500350191524, "grad_norm": 34.88186213130705, "learning_rate": 2.983742172572994e-06, "loss": 0.2536022186279297, "step": 116050 }, { "epoch": 1.0034932685406956, "grad_norm": 1.2367178001578667, "learning_rate": 2.983538442431833e-06, "loss": 0.11705188751220703, "step": 116055 }, { "epoch": 1.0035365020622389, "grad_norm": 12.56899177292219, "learning_rate": 2.9833347123665904e-06, "loss": 0.04457550048828125, "step": 116060 }, { "epoch": 1.0035797355837823, "grad_norm": 2.9202324949743415, "learning_rate": 2.9831309823782064e-06, "loss": 0.08828601837158204, "step": 116065 }, { "epoch": 1.0036229691053256, "grad_norm": 8.26015151334338, "learning_rate": 2.9829272524676215e-06, "loss": 0.0694549560546875, "step": 116070 }, { "epoch": 1.0036662026268688, "grad_norm": 2.427418903213543, "learning_rate": 2.982723522635775e-06, "loss": 0.0412872314453125, "step": 116075 }, { "epoch": 1.003709436148412, "grad_norm": 0.3362649211189061, "learning_rate": 2.982519792883606e-06, "loss": 0.045916748046875, "step": 116080 }, { "epoch": 1.0037526696699552, "grad_norm": 1.3354928450163444, "learning_rate": 2.9823160632120544e-06, "loss": 0.03929443359375, "step": 116085 }, { "epoch": 1.0037959031914985, "grad_norm": 0.13251640440442097, "learning_rate": 2.982112333622059e-06, "loss": 0.1654052734375, "step": 116090 }, { "epoch": 1.003839136713042, "grad_norm": 0.5231808313028795, "learning_rate": 2.9819086041145587e-06, "loss": 0.2069091796875, "step": 116095 }, { "epoch": 1.0038823702345852, "grad_norm": 2.653592060354033, "learning_rate": 2.981704874690496e-06, "loss": 0.057086181640625, "step": 116100 }, { "epoch": 1.0039256037561284, "grad_norm": 17.327117814891007, "learning_rate": 2.981501145350809e-06, "loss": 0.21059722900390626, "step": 116105 }, { "epoch": 1.0039688372776716, "grad_norm": 1.8100118041192816, "learning_rate": 2.9812974160964365e-06, "loss": 0.055254364013671876, "step": 116110 }, { "epoch": 1.0040120707992148, "grad_norm": 0.21898108218328702, "learning_rate": 2.9810936869283184e-06, "loss": 0.0807647705078125, "step": 116115 }, { "epoch": 1.004055304320758, "grad_norm": 6.37923177931613, "learning_rate": 2.980889957847395e-06, "loss": 0.0358856201171875, "step": 116120 }, { "epoch": 1.0040985378423013, "grad_norm": 16.484550936643853, "learning_rate": 2.9806862288546055e-06, "loss": 0.0779205322265625, "step": 116125 }, { "epoch": 1.0041417713638447, "grad_norm": 4.838053138332329, "learning_rate": 2.980482499950888e-06, "loss": 0.0882904052734375, "step": 116130 }, { "epoch": 1.004185004885388, "grad_norm": 3.1097573558736618, "learning_rate": 2.980278771137185e-06, "loss": 0.04494190216064453, "step": 116135 }, { "epoch": 1.0042282384069312, "grad_norm": 17.72779269238778, "learning_rate": 2.9800750424144345e-06, "loss": 0.09487838745117187, "step": 116140 }, { "epoch": 1.0042714719284744, "grad_norm": 1.5335035756905497, "learning_rate": 2.979871313783576e-06, "loss": 0.1076446533203125, "step": 116145 }, { "epoch": 1.0043147054500177, "grad_norm": 16.094857589455266, "learning_rate": 2.979667585245549e-06, "loss": 0.08187484741210938, "step": 116150 }, { "epoch": 1.004357938971561, "grad_norm": 1.4333383471104721, "learning_rate": 2.9794638568012938e-06, "loss": 0.0591278076171875, "step": 116155 }, { "epoch": 1.0044011724931043, "grad_norm": 22.722324631862772, "learning_rate": 2.979260128451748e-06, "loss": 0.14056320190429689, "step": 116160 }, { "epoch": 1.0044444060146476, "grad_norm": 10.11355904648821, "learning_rate": 2.9790564001978536e-06, "loss": 0.030629348754882813, "step": 116165 }, { "epoch": 1.0044876395361908, "grad_norm": 20.142190664735097, "learning_rate": 2.97885267204055e-06, "loss": 0.05420951843261719, "step": 116170 }, { "epoch": 1.004530873057734, "grad_norm": 52.78657061017837, "learning_rate": 2.9786489439807747e-06, "loss": 0.18044204711914064, "step": 116175 }, { "epoch": 1.0045741065792773, "grad_norm": 0.557734666808223, "learning_rate": 2.9784452160194697e-06, "loss": 0.18812103271484376, "step": 116180 }, { "epoch": 1.0046173401008205, "grad_norm": 0.2795321826793959, "learning_rate": 2.9782414881575734e-06, "loss": 0.17666168212890626, "step": 116185 }, { "epoch": 1.004660573622364, "grad_norm": 10.377491696661126, "learning_rate": 2.9780377603960238e-06, "loss": 0.10301055908203124, "step": 116190 }, { "epoch": 1.0047038071439072, "grad_norm": 0.744951898090005, "learning_rate": 2.9778340327357636e-06, "loss": 0.0565887451171875, "step": 116195 }, { "epoch": 1.0047470406654504, "grad_norm": 28.90190726159555, "learning_rate": 2.977630305177731e-06, "loss": 0.27093658447265623, "step": 116200 }, { "epoch": 1.0047902741869936, "grad_norm": 0.7663351698976256, "learning_rate": 2.977426577722865e-06, "loss": 0.040643310546875, "step": 116205 }, { "epoch": 1.0048335077085369, "grad_norm": 2.0018025638469252, "learning_rate": 2.9772228503721055e-06, "loss": 0.07096900939941406, "step": 116210 }, { "epoch": 1.00487674123008, "grad_norm": 0.05935245286531086, "learning_rate": 2.977019123126393e-06, "loss": 0.04876289367675781, "step": 116215 }, { "epoch": 1.0049199747516233, "grad_norm": 22.710880003740144, "learning_rate": 2.976815395986666e-06, "loss": 0.0907440185546875, "step": 116220 }, { "epoch": 1.0049632082731668, "grad_norm": 2.589753620776918, "learning_rate": 2.976611668953863e-06, "loss": 0.08659820556640625, "step": 116225 }, { "epoch": 1.00500644179471, "grad_norm": 0.23839913566559126, "learning_rate": 2.9764079420289264e-06, "loss": 0.19863510131835938, "step": 116230 }, { "epoch": 1.0050496753162532, "grad_norm": 1.8030167472083707, "learning_rate": 2.976204215212793e-06, "loss": 0.06304035186767579, "step": 116235 }, { "epoch": 1.0050929088377965, "grad_norm": 12.696465427678298, "learning_rate": 2.9760004885064046e-06, "loss": 0.05482749938964844, "step": 116240 }, { "epoch": 1.0051361423593397, "grad_norm": 7.332374274613357, "learning_rate": 2.9757967619107006e-06, "loss": 0.036590194702148436, "step": 116245 }, { "epoch": 1.005179375880883, "grad_norm": 1.5565570396917001, "learning_rate": 2.975593035426619e-06, "loss": 0.009068679809570313, "step": 116250 }, { "epoch": 1.0052226094024264, "grad_norm": 0.041881416408826845, "learning_rate": 2.9753893090550986e-06, "loss": 0.08192672729492187, "step": 116255 }, { "epoch": 1.0052658429239696, "grad_norm": 0.05251134764635327, "learning_rate": 2.9751855827970825e-06, "loss": 0.449506950378418, "step": 116260 }, { "epoch": 1.0053090764455128, "grad_norm": 4.866138401982241, "learning_rate": 2.9749818566535077e-06, "loss": 0.024011611938476562, "step": 116265 }, { "epoch": 1.005352309967056, "grad_norm": 5.700103317969009, "learning_rate": 2.974778130625315e-06, "loss": 0.050807952880859375, "step": 116270 }, { "epoch": 1.0053955434885993, "grad_norm": 0.07698260356174515, "learning_rate": 2.9745744047134425e-06, "loss": 0.013832855224609374, "step": 116275 }, { "epoch": 1.0054387770101425, "grad_norm": 0.9182876660209952, "learning_rate": 2.9743706789188313e-06, "loss": 0.010070037841796876, "step": 116280 }, { "epoch": 1.005482010531686, "grad_norm": 0.6193266740393127, "learning_rate": 2.974166953242419e-06, "loss": 0.05880584716796875, "step": 116285 }, { "epoch": 1.0055252440532292, "grad_norm": 3.5765931042949024, "learning_rate": 2.9739632276851467e-06, "loss": 0.058786773681640626, "step": 116290 }, { "epoch": 1.0055684775747724, "grad_norm": 1.8354333804625949, "learning_rate": 2.973759502247955e-06, "loss": 0.07520027160644531, "step": 116295 }, { "epoch": 1.0056117110963156, "grad_norm": 2.63750749959228, "learning_rate": 2.9735557769317814e-06, "loss": 0.04063262939453125, "step": 116300 }, { "epoch": 1.0056549446178589, "grad_norm": 9.433644589089093, "learning_rate": 2.9733520517375667e-06, "loss": 0.0309356689453125, "step": 116305 }, { "epoch": 1.005698178139402, "grad_norm": 9.046656300680894, "learning_rate": 2.973148326666249e-06, "loss": 0.11426162719726562, "step": 116310 }, { "epoch": 1.0057414116609453, "grad_norm": 10.628303336948147, "learning_rate": 2.9729446017187696e-06, "loss": 0.12649192810058593, "step": 116315 }, { "epoch": 1.0057846451824888, "grad_norm": 0.021563611890095393, "learning_rate": 2.972740876896066e-06, "loss": 0.010289573669433593, "step": 116320 }, { "epoch": 1.005827878704032, "grad_norm": 1.6291767439443978, "learning_rate": 2.9725371521990807e-06, "loss": 0.0126678466796875, "step": 116325 }, { "epoch": 1.0058711122255752, "grad_norm": 1.2989787331684222, "learning_rate": 2.972333427628751e-06, "loss": 0.035892486572265625, "step": 116330 }, { "epoch": 1.0059143457471185, "grad_norm": 47.877476618653255, "learning_rate": 2.9721297031860175e-06, "loss": 0.5021856307983399, "step": 116335 }, { "epoch": 1.0059575792686617, "grad_norm": 18.065116330274446, "learning_rate": 2.971925978871819e-06, "loss": 0.0685638427734375, "step": 116340 }, { "epoch": 1.006000812790205, "grad_norm": 1.5686074041075702, "learning_rate": 2.9717222546870957e-06, "loss": 0.02311553955078125, "step": 116345 }, { "epoch": 1.0060440463117484, "grad_norm": 13.797673957204942, "learning_rate": 2.9715185306327856e-06, "loss": 0.12580490112304688, "step": 116350 }, { "epoch": 1.0060872798332916, "grad_norm": 0.9662930063030888, "learning_rate": 2.971314806709831e-06, "loss": 0.02650623321533203, "step": 116355 }, { "epoch": 1.0061305133548348, "grad_norm": 0.32989550053354216, "learning_rate": 2.9711110829191692e-06, "loss": 0.06669960021972657, "step": 116360 }, { "epoch": 1.006173746876378, "grad_norm": 1.1179906430801099, "learning_rate": 2.970907359261741e-06, "loss": 0.03580551147460938, "step": 116365 }, { "epoch": 1.0062169803979213, "grad_norm": 0.37949321833453753, "learning_rate": 2.970703635738485e-06, "loss": 0.05305747985839844, "step": 116370 }, { "epoch": 1.0062602139194645, "grad_norm": 40.24656485386251, "learning_rate": 2.970499912350342e-06, "loss": 0.08321580886840821, "step": 116375 }, { "epoch": 1.006303447441008, "grad_norm": 44.391886469377056, "learning_rate": 2.9702961890982497e-06, "loss": 0.09463424682617187, "step": 116380 }, { "epoch": 1.0063466809625512, "grad_norm": 4.028853862329872, "learning_rate": 2.9700924659831493e-06, "loss": 0.21804122924804686, "step": 116385 }, { "epoch": 1.0063899144840944, "grad_norm": 21.175091340734138, "learning_rate": 2.9698887430059805e-06, "loss": 0.17335739135742187, "step": 116390 }, { "epoch": 1.0064331480056377, "grad_norm": 0.6615107780749208, "learning_rate": 2.969685020167682e-06, "loss": 0.033087158203125, "step": 116395 }, { "epoch": 1.0064763815271809, "grad_norm": 7.53159035645949, "learning_rate": 2.9694812974691922e-06, "loss": 0.0549072265625, "step": 116400 }, { "epoch": 1.0065196150487241, "grad_norm": 1.0881594232378347, "learning_rate": 2.9692775749114535e-06, "loss": 0.04758148193359375, "step": 116405 }, { "epoch": 1.0065628485702673, "grad_norm": 3.563715966979372, "learning_rate": 2.9690738524954034e-06, "loss": 0.068603515625, "step": 116410 }, { "epoch": 1.0066060820918108, "grad_norm": 5.34033837797447, "learning_rate": 2.968870130221981e-06, "loss": 0.041876220703125, "step": 116415 }, { "epoch": 1.006649315613354, "grad_norm": 3.991914355276025, "learning_rate": 2.9686664080921277e-06, "loss": 0.251080322265625, "step": 116420 }, { "epoch": 1.0066925491348973, "grad_norm": 2.4141974105450923, "learning_rate": 2.9684626861067825e-06, "loss": 0.07589874267578126, "step": 116425 }, { "epoch": 1.0067357826564405, "grad_norm": 13.654643098380333, "learning_rate": 2.968258964266884e-06, "loss": 0.06368904113769532, "step": 116430 }, { "epoch": 1.0067790161779837, "grad_norm": 1.6983342075488628, "learning_rate": 2.9680552425733723e-06, "loss": 0.07442054748535157, "step": 116435 }, { "epoch": 1.006822249699527, "grad_norm": 16.169478361056235, "learning_rate": 2.9678515210271874e-06, "loss": 0.171148681640625, "step": 116440 }, { "epoch": 1.0068654832210704, "grad_norm": 6.706389373309928, "learning_rate": 2.967647799629267e-06, "loss": 0.05168418884277344, "step": 116445 }, { "epoch": 1.0069087167426136, "grad_norm": 0.055005908870877, "learning_rate": 2.9674440783805543e-06, "loss": 0.07310981750488281, "step": 116450 }, { "epoch": 1.0069519502641568, "grad_norm": 2.2405850260927807, "learning_rate": 2.9672403572819856e-06, "loss": 0.05557727813720703, "step": 116455 }, { "epoch": 1.0069951837857, "grad_norm": 0.18755745602577367, "learning_rate": 2.967036636334501e-06, "loss": 0.04015274047851562, "step": 116460 }, { "epoch": 1.0070384173072433, "grad_norm": 11.470105549065789, "learning_rate": 2.966832915539042e-06, "loss": 0.09633255004882812, "step": 116465 }, { "epoch": 1.0070816508287865, "grad_norm": 4.54739766701475, "learning_rate": 2.9666291948965458e-06, "loss": 0.1150115966796875, "step": 116470 }, { "epoch": 1.0071248843503298, "grad_norm": 1.1670297291575782, "learning_rate": 2.966425474407953e-06, "loss": 0.018301010131835938, "step": 116475 }, { "epoch": 1.0071681178718732, "grad_norm": 1.5061299658738907, "learning_rate": 2.9662217540742014e-06, "loss": 0.051019287109375, "step": 116480 }, { "epoch": 1.0072113513934164, "grad_norm": 1.8655731312368709, "learning_rate": 2.966018033896234e-06, "loss": 0.052965736389160155, "step": 116485 }, { "epoch": 1.0072545849149597, "grad_norm": 0.3124183543675354, "learning_rate": 2.9658143138749873e-06, "loss": 0.04898757934570312, "step": 116490 }, { "epoch": 1.007297818436503, "grad_norm": 1.3234212244543517, "learning_rate": 2.9656105940114025e-06, "loss": 0.04154281616210938, "step": 116495 }, { "epoch": 1.0073410519580461, "grad_norm": 4.686273105980424, "learning_rate": 2.9654068743064194e-06, "loss": 0.08613815307617187, "step": 116500 }, { "epoch": 1.0073842854795894, "grad_norm": 13.505086602471996, "learning_rate": 2.965203154760976e-06, "loss": 0.24654083251953124, "step": 116505 }, { "epoch": 1.0074275190011328, "grad_norm": 0.5873963692511353, "learning_rate": 2.9649994353760112e-06, "loss": 0.11980743408203125, "step": 116510 }, { "epoch": 1.007470752522676, "grad_norm": 9.81292522333559, "learning_rate": 2.964795716152467e-06, "loss": 0.05661468505859375, "step": 116515 }, { "epoch": 1.0075139860442193, "grad_norm": 42.574720542220064, "learning_rate": 2.9645919970912822e-06, "loss": 0.2002655029296875, "step": 116520 }, { "epoch": 1.0075572195657625, "grad_norm": 35.567047755038764, "learning_rate": 2.9643882781933962e-06, "loss": 0.1076568603515625, "step": 116525 }, { "epoch": 1.0076004530873057, "grad_norm": 0.7150870987214312, "learning_rate": 2.9641845594597482e-06, "loss": 0.05126953125, "step": 116530 }, { "epoch": 1.007643686608849, "grad_norm": 4.674135499501888, "learning_rate": 2.9639808408912776e-06, "loss": 0.021785545349121093, "step": 116535 }, { "epoch": 1.0076869201303924, "grad_norm": 0.35651644108440067, "learning_rate": 2.9637771224889236e-06, "loss": 0.040309906005859375, "step": 116540 }, { "epoch": 1.0077301536519356, "grad_norm": 1.7145574065546345, "learning_rate": 2.9635734042536266e-06, "loss": 0.04893341064453125, "step": 116545 }, { "epoch": 1.0077733871734789, "grad_norm": 15.354955738215619, "learning_rate": 2.9633696861863263e-06, "loss": 0.08307533264160157, "step": 116550 }, { "epoch": 1.007816620695022, "grad_norm": 0.7406790809981499, "learning_rate": 2.9631659682879616e-06, "loss": 0.018896484375, "step": 116555 }, { "epoch": 1.0078598542165653, "grad_norm": 3.4037876904238047, "learning_rate": 2.962962250559473e-06, "loss": 0.2476776123046875, "step": 116560 }, { "epoch": 1.0079030877381085, "grad_norm": 1.5699663732521676, "learning_rate": 2.9627585330017975e-06, "loss": 0.050641632080078124, "step": 116565 }, { "epoch": 1.0079463212596518, "grad_norm": 2.970276233359923, "learning_rate": 2.962554815615878e-06, "loss": 0.24008331298828126, "step": 116570 }, { "epoch": 1.0079895547811952, "grad_norm": 0.36694087138163306, "learning_rate": 2.9623510984026503e-06, "loss": 0.058746719360351564, "step": 116575 }, { "epoch": 1.0080327883027385, "grad_norm": 0.8641757962323804, "learning_rate": 2.962147381363058e-06, "loss": 0.049483871459960936, "step": 116580 }, { "epoch": 1.0080760218242817, "grad_norm": 0.8733309365392177, "learning_rate": 2.9619436644980384e-06, "loss": 0.00997467041015625, "step": 116585 }, { "epoch": 1.008119255345825, "grad_norm": 29.372239429716917, "learning_rate": 2.9617399478085308e-06, "loss": 0.177972412109375, "step": 116590 }, { "epoch": 1.0081624888673681, "grad_norm": 10.161065576487324, "learning_rate": 2.961536231295475e-06, "loss": 0.12945976257324218, "step": 116595 }, { "epoch": 1.0082057223889114, "grad_norm": 0.8737051130809057, "learning_rate": 2.961332514959811e-06, "loss": 0.0114013671875, "step": 116600 }, { "epoch": 1.0082489559104548, "grad_norm": 11.073203915940233, "learning_rate": 2.9611287988024773e-06, "loss": 0.08313674926757812, "step": 116605 }, { "epoch": 1.008292189431998, "grad_norm": 7.716982930551853, "learning_rate": 2.960925082824415e-06, "loss": 0.15647239685058595, "step": 116610 }, { "epoch": 1.0083354229535413, "grad_norm": 5.202992826004712, "learning_rate": 2.960721367026563e-06, "loss": 0.11178016662597656, "step": 116615 }, { "epoch": 1.0083786564750845, "grad_norm": 2.4288288769323403, "learning_rate": 2.9605176514098602e-06, "loss": 0.03397293090820312, "step": 116620 }, { "epoch": 1.0084218899966277, "grad_norm": 36.71794047917899, "learning_rate": 2.960313935975246e-06, "loss": 0.10783157348632813, "step": 116625 }, { "epoch": 1.008465123518171, "grad_norm": 0.10892641292607774, "learning_rate": 2.9601102207236614e-06, "loss": 0.0228912353515625, "step": 116630 }, { "epoch": 1.0085083570397144, "grad_norm": 57.158582805032324, "learning_rate": 2.959906505656043e-06, "loss": 0.4934326171875, "step": 116635 }, { "epoch": 1.0085515905612576, "grad_norm": 0.1248471639507894, "learning_rate": 2.9597027907733343e-06, "loss": 0.12647857666015624, "step": 116640 }, { "epoch": 1.0085948240828009, "grad_norm": 0.23201212221230852, "learning_rate": 2.959499076076472e-06, "loss": 0.036480712890625, "step": 116645 }, { "epoch": 1.008638057604344, "grad_norm": 11.776091940100663, "learning_rate": 2.959295361566397e-06, "loss": 0.08408012390136718, "step": 116650 }, { "epoch": 1.0086812911258873, "grad_norm": 21.85966441828884, "learning_rate": 2.959091647244047e-06, "loss": 0.2761016845703125, "step": 116655 }, { "epoch": 1.0087245246474306, "grad_norm": 0.7570169162098419, "learning_rate": 2.958887933110364e-06, "loss": 0.057489013671875, "step": 116660 }, { "epoch": 1.0087677581689738, "grad_norm": 1.6644575659571617, "learning_rate": 2.9586842191662863e-06, "loss": 0.009735870361328124, "step": 116665 }, { "epoch": 1.0088109916905172, "grad_norm": 57.899149841044085, "learning_rate": 2.9584805054127516e-06, "loss": 0.220501708984375, "step": 116670 }, { "epoch": 1.0088542252120605, "grad_norm": 0.6771884315426554, "learning_rate": 2.958276791850702e-06, "loss": 0.0167694091796875, "step": 116675 }, { "epoch": 1.0088974587336037, "grad_norm": 14.07345416312808, "learning_rate": 2.958073078481077e-06, "loss": 0.05524749755859375, "step": 116680 }, { "epoch": 1.008940692255147, "grad_norm": 3.648545094570554, "learning_rate": 2.957869365304814e-06, "loss": 0.03166275024414063, "step": 116685 }, { "epoch": 1.0089839257766902, "grad_norm": 24.358072344808683, "learning_rate": 2.957665652322855e-06, "loss": 0.05702667236328125, "step": 116690 }, { "epoch": 1.0090271592982334, "grad_norm": 0.12272836890238868, "learning_rate": 2.9574619395361377e-06, "loss": 0.07873611450195313, "step": 116695 }, { "epoch": 1.0090703928197768, "grad_norm": 27.18197705673981, "learning_rate": 2.9572582269456012e-06, "loss": 0.07347602844238281, "step": 116700 }, { "epoch": 1.00911362634132, "grad_norm": 0.7159468103682354, "learning_rate": 2.957054514552187e-06, "loss": 0.04883918762207031, "step": 116705 }, { "epoch": 1.0091568598628633, "grad_norm": 1.668003779065155, "learning_rate": 2.956850802356833e-06, "loss": 0.058442878723144534, "step": 116710 }, { "epoch": 1.0092000933844065, "grad_norm": 2.805114519244401, "learning_rate": 2.95664709036048e-06, "loss": 0.05497875213623047, "step": 116715 }, { "epoch": 1.0092433269059498, "grad_norm": 15.226094173633491, "learning_rate": 2.9564433785640674e-06, "loss": 0.1788330078125, "step": 116720 }, { "epoch": 1.009286560427493, "grad_norm": 3.659103041881882, "learning_rate": 2.9562396669685333e-06, "loss": 0.14372711181640624, "step": 116725 }, { "epoch": 1.0093297939490364, "grad_norm": 3.0440927403505036, "learning_rate": 2.956035955574817e-06, "loss": 0.0168060302734375, "step": 116730 }, { "epoch": 1.0093730274705797, "grad_norm": 2.207826393336075, "learning_rate": 2.9558322443838603e-06, "loss": 0.0381744384765625, "step": 116735 }, { "epoch": 1.009416260992123, "grad_norm": 0.9614131189412718, "learning_rate": 2.9556285333966005e-06, "loss": 0.02564849853515625, "step": 116740 }, { "epoch": 1.0094594945136661, "grad_norm": 1.1727606358889915, "learning_rate": 2.9554248226139788e-06, "loss": 0.024547576904296875, "step": 116745 }, { "epoch": 1.0095027280352094, "grad_norm": 10.450089467839904, "learning_rate": 2.955221112036934e-06, "loss": 0.096929931640625, "step": 116750 }, { "epoch": 1.0095459615567526, "grad_norm": 0.30371686480138627, "learning_rate": 2.9550174016664053e-06, "loss": 0.1375812530517578, "step": 116755 }, { "epoch": 1.0095891950782958, "grad_norm": 5.150224523105562, "learning_rate": 2.954813691503332e-06, "loss": 0.03782958984375, "step": 116760 }, { "epoch": 1.0096324285998393, "grad_norm": 1.7283270753003763, "learning_rate": 2.9546099815486535e-06, "loss": 0.08493118286132813, "step": 116765 }, { "epoch": 1.0096756621213825, "grad_norm": 25.32067313205988, "learning_rate": 2.95440627180331e-06, "loss": 0.07294387817382812, "step": 116770 }, { "epoch": 1.0097188956429257, "grad_norm": 41.075687516638666, "learning_rate": 2.9542025622682416e-06, "loss": 0.1873138427734375, "step": 116775 }, { "epoch": 1.009762129164469, "grad_norm": 14.318183005583878, "learning_rate": 2.9539988529443863e-06, "loss": 0.091668701171875, "step": 116780 }, { "epoch": 1.0098053626860122, "grad_norm": 0.06482727544447933, "learning_rate": 2.953795143832685e-06, "loss": 0.06528282165527344, "step": 116785 }, { "epoch": 1.0098485962075554, "grad_norm": 3.2444564646492093, "learning_rate": 2.953591434934075e-06, "loss": 0.1132232666015625, "step": 116790 }, { "epoch": 1.0098918297290989, "grad_norm": 6.497141327082717, "learning_rate": 2.9533877262494975e-06, "loss": 0.1073638916015625, "step": 116795 }, { "epoch": 1.009935063250642, "grad_norm": 4.898238092939053, "learning_rate": 2.953184017779892e-06, "loss": 0.142425537109375, "step": 116800 }, { "epoch": 1.0099782967721853, "grad_norm": 9.852822919842648, "learning_rate": 2.952980309526198e-06, "loss": 0.2302276611328125, "step": 116805 }, { "epoch": 1.0100215302937285, "grad_norm": 6.554684111422783, "learning_rate": 2.9527766014893547e-06, "loss": 0.11498565673828125, "step": 116810 }, { "epoch": 1.0100647638152718, "grad_norm": 1.4396298876410347, "learning_rate": 2.9525728936703018e-06, "loss": 0.039743804931640626, "step": 116815 }, { "epoch": 1.010107997336815, "grad_norm": 7.727744728173892, "learning_rate": 2.9523691860699778e-06, "loss": 0.076953125, "step": 116820 }, { "epoch": 1.0101512308583582, "grad_norm": 7.493652058964627, "learning_rate": 2.9521654786893233e-06, "loss": 0.0579437255859375, "step": 116825 }, { "epoch": 1.0101944643799017, "grad_norm": 1.9628666812832773, "learning_rate": 2.9519617715292764e-06, "loss": 0.08707275390625, "step": 116830 }, { "epoch": 1.010237697901445, "grad_norm": 0.7157064545745913, "learning_rate": 2.9517580645907786e-06, "loss": 0.03520774841308594, "step": 116835 }, { "epoch": 1.0102809314229881, "grad_norm": 1.4355851046217944, "learning_rate": 2.9515543578747684e-06, "loss": 0.12050018310546876, "step": 116840 }, { "epoch": 1.0103241649445314, "grad_norm": 19.34227780778662, "learning_rate": 2.951350651382185e-06, "loss": 0.22248382568359376, "step": 116845 }, { "epoch": 1.0103673984660746, "grad_norm": 1.9387398809140077, "learning_rate": 2.951146945113968e-06, "loss": 0.18592529296875, "step": 116850 }, { "epoch": 1.0104106319876178, "grad_norm": 0.2444086166664178, "learning_rate": 2.9509432390710574e-06, "loss": 0.10992622375488281, "step": 116855 }, { "epoch": 1.0104538655091613, "grad_norm": 2.066469447509932, "learning_rate": 2.950739533254391e-06, "loss": 0.03960113525390625, "step": 116860 }, { "epoch": 1.0104970990307045, "grad_norm": 5.424806970772371, "learning_rate": 2.9505358276649103e-06, "loss": 0.17045822143554687, "step": 116865 }, { "epoch": 1.0105403325522477, "grad_norm": 1.481317902148848, "learning_rate": 2.950332122303555e-06, "loss": 0.013251495361328126, "step": 116870 }, { "epoch": 1.010583566073791, "grad_norm": 38.52569825942822, "learning_rate": 2.9501284171712624e-06, "loss": 0.29287109375, "step": 116875 }, { "epoch": 1.0106267995953342, "grad_norm": 9.307125943406676, "learning_rate": 2.949924712268973e-06, "loss": 0.1706695556640625, "step": 116880 }, { "epoch": 1.0106700331168774, "grad_norm": 12.010189900788523, "learning_rate": 2.949721007597627e-06, "loss": 0.043126678466796874, "step": 116885 }, { "epoch": 1.0107132666384209, "grad_norm": 3.3763697634491825, "learning_rate": 2.9495173031581623e-06, "loss": 0.03952484130859375, "step": 116890 }, { "epoch": 1.010756500159964, "grad_norm": 1.9572522200734923, "learning_rate": 2.9493135989515202e-06, "loss": 0.0144622802734375, "step": 116895 }, { "epoch": 1.0107997336815073, "grad_norm": 0.3784810427718519, "learning_rate": 2.9491098949786394e-06, "loss": 0.06790618896484375, "step": 116900 }, { "epoch": 1.0108429672030506, "grad_norm": 3.134114704728762, "learning_rate": 2.9489061912404595e-06, "loss": 0.117926025390625, "step": 116905 }, { "epoch": 1.0108862007245938, "grad_norm": 1.9511410124626827, "learning_rate": 2.948702487737919e-06, "loss": 0.08135299682617188, "step": 116910 }, { "epoch": 1.010929434246137, "grad_norm": 0.8994493191162415, "learning_rate": 2.9484987844719588e-06, "loss": 0.043573760986328126, "step": 116915 }, { "epoch": 1.0109726677676802, "grad_norm": 2.2203745217678548, "learning_rate": 2.9482950814435175e-06, "loss": 0.017817306518554687, "step": 116920 }, { "epoch": 1.0110159012892237, "grad_norm": 2.1853083743778052, "learning_rate": 2.9480913786535334e-06, "loss": 0.03726119995117187, "step": 116925 }, { "epoch": 1.011059134810767, "grad_norm": 7.552555505680898, "learning_rate": 2.9478876761029488e-06, "loss": 0.04262542724609375, "step": 116930 }, { "epoch": 1.0111023683323102, "grad_norm": 57.50095747231273, "learning_rate": 2.947683973792701e-06, "loss": 0.16315555572509766, "step": 116935 }, { "epoch": 1.0111456018538534, "grad_norm": 1.7443929214567742, "learning_rate": 2.94748027172373e-06, "loss": 0.03074951171875, "step": 116940 }, { "epoch": 1.0111888353753966, "grad_norm": 0.48009124574350304, "learning_rate": 2.9472765698969763e-06, "loss": 0.05545883178710938, "step": 116945 }, { "epoch": 1.0112320688969398, "grad_norm": 15.503237509861703, "learning_rate": 2.947072868313378e-06, "loss": 0.06712074279785156, "step": 116950 }, { "epoch": 1.0112753024184833, "grad_norm": 0.07967919106135282, "learning_rate": 2.9468691669738734e-06, "loss": 0.0197052001953125, "step": 116955 }, { "epoch": 1.0113185359400265, "grad_norm": 5.582011815908133, "learning_rate": 2.946665465879405e-06, "loss": 0.3736598968505859, "step": 116960 }, { "epoch": 1.0113617694615697, "grad_norm": 2.105869592477887, "learning_rate": 2.94646176503091e-06, "loss": 0.06526145935058594, "step": 116965 }, { "epoch": 1.011405002983113, "grad_norm": 11.81349796884726, "learning_rate": 2.9462580644293296e-06, "loss": 0.02543182373046875, "step": 116970 }, { "epoch": 1.0114482365046562, "grad_norm": 2.6014098175495395, "learning_rate": 2.9460543640756024e-06, "loss": 0.03479843139648438, "step": 116975 }, { "epoch": 1.0114914700261994, "grad_norm": 18.50695315196252, "learning_rate": 2.945850663970667e-06, "loss": 0.06836509704589844, "step": 116980 }, { "epoch": 1.0115347035477429, "grad_norm": 13.815864686104195, "learning_rate": 2.9456469641154628e-06, "loss": 0.09502182006835938, "step": 116985 }, { "epoch": 1.0115779370692861, "grad_norm": 2.35153621107573, "learning_rate": 2.945443264510931e-06, "loss": 0.021221923828125, "step": 116990 }, { "epoch": 1.0116211705908293, "grad_norm": 1.0779343442713656, "learning_rate": 2.94523956515801e-06, "loss": 0.07369384765625, "step": 116995 }, { "epoch": 1.0116644041123726, "grad_norm": 1.4001585337771751, "learning_rate": 2.9450358660576394e-06, "loss": 0.0472442626953125, "step": 117000 }, { "epoch": 1.0117076376339158, "grad_norm": 0.068374751062234, "learning_rate": 2.9448321672107584e-06, "loss": 0.04162788391113281, "step": 117005 }, { "epoch": 1.011750871155459, "grad_norm": 26.015496052310468, "learning_rate": 2.944628468618307e-06, "loss": 0.1500629425048828, "step": 117010 }, { "epoch": 1.0117941046770023, "grad_norm": 23.201306514971847, "learning_rate": 2.944424770281223e-06, "loss": 0.27908859252929685, "step": 117015 }, { "epoch": 1.0118373381985457, "grad_norm": 1.3824058577733978, "learning_rate": 2.9442210722004473e-06, "loss": 0.153277587890625, "step": 117020 }, { "epoch": 1.011880571720089, "grad_norm": 0.9409709395152027, "learning_rate": 2.9440173743769193e-06, "loss": 0.0295806884765625, "step": 117025 }, { "epoch": 1.0119238052416322, "grad_norm": 14.248320407817648, "learning_rate": 2.943813676811579e-06, "loss": 0.05604686737060547, "step": 117030 }, { "epoch": 1.0119670387631754, "grad_norm": 0.7807585256687817, "learning_rate": 2.943609979505365e-06, "loss": 0.10803604125976562, "step": 117035 }, { "epoch": 1.0120102722847186, "grad_norm": 1.3550817398421302, "learning_rate": 2.943406282459216e-06, "loss": 0.0801666259765625, "step": 117040 }, { "epoch": 1.0120535058062619, "grad_norm": 4.69359699449695, "learning_rate": 2.9432025856740726e-06, "loss": 0.04352645874023438, "step": 117045 }, { "epoch": 1.0120967393278053, "grad_norm": 53.91201546657365, "learning_rate": 2.9429988891508732e-06, "loss": 0.0841094970703125, "step": 117050 }, { "epoch": 1.0121399728493485, "grad_norm": 14.653745903476409, "learning_rate": 2.942795192890558e-06, "loss": 0.02848987579345703, "step": 117055 }, { "epoch": 1.0121832063708918, "grad_norm": 0.6796511621761424, "learning_rate": 2.9425914968940675e-06, "loss": 0.028271484375, "step": 117060 }, { "epoch": 1.012226439892435, "grad_norm": 5.985568975900421, "learning_rate": 2.942387801162339e-06, "loss": 0.11530075073242188, "step": 117065 }, { "epoch": 1.0122696734139782, "grad_norm": 0.2273678965519945, "learning_rate": 2.9421841056963134e-06, "loss": 0.055255126953125, "step": 117070 }, { "epoch": 1.0123129069355215, "grad_norm": 11.746316552079758, "learning_rate": 2.941980410496929e-06, "loss": 0.07594642639160157, "step": 117075 }, { "epoch": 1.0123561404570647, "grad_norm": 3.6713575138728554, "learning_rate": 2.9417767155651267e-06, "loss": 0.07500572204589843, "step": 117080 }, { "epoch": 1.0123993739786081, "grad_norm": 0.3205983877144843, "learning_rate": 2.9415730209018435e-06, "loss": 0.00888671875, "step": 117085 }, { "epoch": 1.0124426075001514, "grad_norm": 10.864556798789353, "learning_rate": 2.9413693265080214e-06, "loss": 0.06828460693359376, "step": 117090 }, { "epoch": 1.0124858410216946, "grad_norm": 25.206770533517947, "learning_rate": 2.941165632384599e-06, "loss": 0.16336669921875, "step": 117095 }, { "epoch": 1.0125290745432378, "grad_norm": 4.741787418790096, "learning_rate": 2.9409619385325153e-06, "loss": 0.04256591796875, "step": 117100 }, { "epoch": 1.012572308064781, "grad_norm": 49.159984019968995, "learning_rate": 2.9407582449527096e-06, "loss": 0.205438232421875, "step": 117105 }, { "epoch": 1.0126155415863243, "grad_norm": 5.633818885600818, "learning_rate": 2.9405545516461225e-06, "loss": 0.04683990478515625, "step": 117110 }, { "epoch": 1.0126587751078677, "grad_norm": 3.381617009550765, "learning_rate": 2.9403508586136907e-06, "loss": 0.02870330810546875, "step": 117115 }, { "epoch": 1.012702008629411, "grad_norm": 4.041810837172605, "learning_rate": 2.940147165856357e-06, "loss": 0.06615200042724609, "step": 117120 }, { "epoch": 1.0127452421509542, "grad_norm": 6.09216414263765, "learning_rate": 2.939943473375059e-06, "loss": 0.021963882446289062, "step": 117125 }, { "epoch": 1.0127884756724974, "grad_norm": 1.1905154814633996, "learning_rate": 2.939739781170737e-06, "loss": 0.042280960083007815, "step": 117130 }, { "epoch": 1.0128317091940406, "grad_norm": 0.9176233658420674, "learning_rate": 2.9395360892443287e-06, "loss": 0.089666748046875, "step": 117135 }, { "epoch": 1.0128749427155839, "grad_norm": 8.306448715351165, "learning_rate": 2.9393323975967754e-06, "loss": 0.18817977905273436, "step": 117140 }, { "epoch": 1.0129181762371273, "grad_norm": 5.942586405425876, "learning_rate": 2.939128706229014e-06, "loss": 0.09654426574707031, "step": 117145 }, { "epoch": 1.0129614097586706, "grad_norm": 3.2624346899055023, "learning_rate": 2.9389250151419874e-06, "loss": 0.03614349365234375, "step": 117150 }, { "epoch": 1.0130046432802138, "grad_norm": 0.8042800742514968, "learning_rate": 2.9387213243366335e-06, "loss": 0.09604339599609375, "step": 117155 }, { "epoch": 1.013047876801757, "grad_norm": 7.5010173520831716, "learning_rate": 2.9385176338138905e-06, "loss": 0.0560821533203125, "step": 117160 }, { "epoch": 1.0130911103233002, "grad_norm": 1.3206615462044804, "learning_rate": 2.9383139435746993e-06, "loss": 0.033831024169921876, "step": 117165 }, { "epoch": 1.0131343438448435, "grad_norm": 21.131336115083524, "learning_rate": 2.938110253619999e-06, "loss": 0.09880714416503907, "step": 117170 }, { "epoch": 1.0131775773663867, "grad_norm": 25.3894169883442, "learning_rate": 2.9379065639507285e-06, "loss": 0.176568603515625, "step": 117175 }, { "epoch": 1.0132208108879301, "grad_norm": 7.372742281421916, "learning_rate": 2.9377028745678262e-06, "loss": 0.04917449951171875, "step": 117180 }, { "epoch": 1.0132640444094734, "grad_norm": 8.302978830262353, "learning_rate": 2.9374991854722337e-06, "loss": 0.05699462890625, "step": 117185 }, { "epoch": 1.0133072779310166, "grad_norm": 1.201909588105385, "learning_rate": 2.937295496664889e-06, "loss": 0.0350738525390625, "step": 117190 }, { "epoch": 1.0133505114525598, "grad_norm": 1.3416618386441204, "learning_rate": 2.937091808146733e-06, "loss": 0.03017005920410156, "step": 117195 }, { "epoch": 1.013393744974103, "grad_norm": 9.359299229126162, "learning_rate": 2.9368881199187038e-06, "loss": 0.08176116943359375, "step": 117200 }, { "epoch": 1.0134369784956463, "grad_norm": 8.891061709777045, "learning_rate": 2.936684431981741e-06, "loss": 0.057520294189453126, "step": 117205 }, { "epoch": 1.0134802120171897, "grad_norm": 1.2164186845816751, "learning_rate": 2.9364807443367826e-06, "loss": 0.048282623291015625, "step": 117210 }, { "epoch": 1.013523445538733, "grad_norm": 4.209026714530648, "learning_rate": 2.9362770569847706e-06, "loss": 0.17558059692382813, "step": 117215 }, { "epoch": 1.0135666790602762, "grad_norm": 0.6406712090786546, "learning_rate": 2.936073369926642e-06, "loss": 0.06455078125, "step": 117220 }, { "epoch": 1.0136099125818194, "grad_norm": 15.825731147910945, "learning_rate": 2.9358696831633392e-06, "loss": 0.0315643310546875, "step": 117225 }, { "epoch": 1.0136531461033627, "grad_norm": 6.481927264280169, "learning_rate": 2.935665996695799e-06, "loss": 0.04553680419921875, "step": 117230 }, { "epoch": 1.0136963796249059, "grad_norm": 9.199125870942787, "learning_rate": 2.9354623105249618e-06, "loss": 0.037241363525390626, "step": 117235 }, { "epoch": 1.0137396131464493, "grad_norm": 1.1663647533134398, "learning_rate": 2.9352586246517655e-06, "loss": 0.05847320556640625, "step": 117240 }, { "epoch": 1.0137828466679926, "grad_norm": 1.449464141248972, "learning_rate": 2.9350549390771518e-06, "loss": 0.01313323974609375, "step": 117245 }, { "epoch": 1.0138260801895358, "grad_norm": 10.38907725874295, "learning_rate": 2.9348512538020586e-06, "loss": 0.039966392517089847, "step": 117250 }, { "epoch": 1.013869313711079, "grad_norm": 4.972187762270251, "learning_rate": 2.934647568827426e-06, "loss": 0.19647483825683593, "step": 117255 }, { "epoch": 1.0139125472326223, "grad_norm": 5.397998003180304, "learning_rate": 2.934443884154193e-06, "loss": 0.041500091552734375, "step": 117260 }, { "epoch": 1.0139557807541655, "grad_norm": 1.1463515146096281, "learning_rate": 2.9342401997832997e-06, "loss": 0.09981918334960938, "step": 117265 }, { "epoch": 1.0139990142757087, "grad_norm": 0.4829597604632896, "learning_rate": 2.9340365157156833e-06, "loss": 0.02554779052734375, "step": 117270 }, { "epoch": 1.0140422477972522, "grad_norm": 13.100187020175317, "learning_rate": 2.933832831952285e-06, "loss": 0.16149139404296875, "step": 117275 }, { "epoch": 1.0140854813187954, "grad_norm": 4.735204687177825, "learning_rate": 2.933629148494044e-06, "loss": 0.05134315490722656, "step": 117280 }, { "epoch": 1.0141287148403386, "grad_norm": 1.2876729400236138, "learning_rate": 2.9334254653418994e-06, "loss": 0.16944217681884766, "step": 117285 }, { "epoch": 1.0141719483618818, "grad_norm": 1.391148196840027, "learning_rate": 2.9332217824967913e-06, "loss": 0.043304443359375, "step": 117290 }, { "epoch": 1.014215181883425, "grad_norm": 7.064102992661983, "learning_rate": 2.9330180999596585e-06, "loss": 0.04003143310546875, "step": 117295 }, { "epoch": 1.0142584154049683, "grad_norm": 3.0927570566654756, "learning_rate": 2.9328144177314394e-06, "loss": 0.0220428466796875, "step": 117300 }, { "epoch": 1.0143016489265118, "grad_norm": 2.7030319139740766, "learning_rate": 2.9326107358130747e-06, "loss": 0.012656784057617188, "step": 117305 }, { "epoch": 1.014344882448055, "grad_norm": 0.19509907850770283, "learning_rate": 2.932407054205503e-06, "loss": 0.043661880493164065, "step": 117310 }, { "epoch": 1.0143881159695982, "grad_norm": 22.866740396823396, "learning_rate": 2.9322033729096643e-06, "loss": 0.14637451171875, "step": 117315 }, { "epoch": 1.0144313494911414, "grad_norm": 5.670650204579513, "learning_rate": 2.931999691926498e-06, "loss": 0.220989990234375, "step": 117320 }, { "epoch": 1.0144745830126847, "grad_norm": 31.10067750005961, "learning_rate": 2.9317960112569433e-06, "loss": 0.14428977966308593, "step": 117325 }, { "epoch": 1.014517816534228, "grad_norm": 10.320673084107593, "learning_rate": 2.931592330901939e-06, "loss": 0.049468994140625, "step": 117330 }, { "epoch": 1.0145610500557714, "grad_norm": 5.395286232558037, "learning_rate": 2.9313886508624232e-06, "loss": 0.13231048583984376, "step": 117335 }, { "epoch": 1.0146042835773146, "grad_norm": 31.321894304470693, "learning_rate": 2.931184971139339e-06, "loss": 0.51077880859375, "step": 117340 }, { "epoch": 1.0146475170988578, "grad_norm": 7.45864635749628, "learning_rate": 2.9309812917336233e-06, "loss": 0.09716796875, "step": 117345 }, { "epoch": 1.014690750620401, "grad_norm": 31.722973490670846, "learning_rate": 2.9307776126462155e-06, "loss": 0.09980239868164062, "step": 117350 }, { "epoch": 1.0147339841419443, "grad_norm": 0.9778740705608813, "learning_rate": 2.9305739338780557e-06, "loss": 0.049176025390625, "step": 117355 }, { "epoch": 1.0147772176634875, "grad_norm": 0.17543897192602642, "learning_rate": 2.930370255430082e-06, "loss": 0.19262924194335937, "step": 117360 }, { "epoch": 1.0148204511850307, "grad_norm": 2.432222848937098, "learning_rate": 2.9301665773032354e-06, "loss": 0.11133575439453125, "step": 117365 }, { "epoch": 1.0148636847065742, "grad_norm": 1.6398622842260724, "learning_rate": 2.9299628994984527e-06, "loss": 0.06904678344726563, "step": 117370 }, { "epoch": 1.0149069182281174, "grad_norm": 1.586862411551288, "learning_rate": 2.9297592220166764e-06, "loss": 0.07065505981445312, "step": 117375 }, { "epoch": 1.0149501517496606, "grad_norm": 9.265680569765445, "learning_rate": 2.9295555448588444e-06, "loss": 0.14852981567382811, "step": 117380 }, { "epoch": 1.0149933852712039, "grad_norm": 45.14712801889202, "learning_rate": 2.9293518680258952e-06, "loss": 0.17751312255859375, "step": 117385 }, { "epoch": 1.015036618792747, "grad_norm": 30.90143632221495, "learning_rate": 2.9291481915187696e-06, "loss": 0.2030271530151367, "step": 117390 }, { "epoch": 1.0150798523142903, "grad_norm": 4.059584129512504, "learning_rate": 2.9289445153384064e-06, "loss": 0.023672103881835938, "step": 117395 }, { "epoch": 1.0151230858358338, "grad_norm": 0.8957987895590261, "learning_rate": 2.9287408394857434e-06, "loss": 0.126153564453125, "step": 117400 }, { "epoch": 1.015166319357377, "grad_norm": 0.4225660344493804, "learning_rate": 2.9285371639617227e-06, "loss": 0.17802734375, "step": 117405 }, { "epoch": 1.0152095528789202, "grad_norm": 0.28266104665436453, "learning_rate": 2.928333488767282e-06, "loss": 0.23063583374023439, "step": 117410 }, { "epoch": 1.0152527864004635, "grad_norm": 0.8433699600268125, "learning_rate": 2.9281298139033607e-06, "loss": 0.13344879150390626, "step": 117415 }, { "epoch": 1.0152960199220067, "grad_norm": 15.007160900181326, "learning_rate": 2.9279261393708992e-06, "loss": 0.10827713012695313, "step": 117420 }, { "epoch": 1.01533925344355, "grad_norm": 1.2943376889698146, "learning_rate": 2.927722465170835e-06, "loss": 0.07393360137939453, "step": 117425 }, { "epoch": 1.0153824869650931, "grad_norm": 1.6251954169797844, "learning_rate": 2.9275187913041094e-06, "loss": 0.07552947998046874, "step": 117430 }, { "epoch": 1.0154257204866366, "grad_norm": 10.275561288896728, "learning_rate": 2.9273151177716593e-06, "loss": 0.0530487060546875, "step": 117435 }, { "epoch": 1.0154689540081798, "grad_norm": 0.657243852495276, "learning_rate": 2.9271114445744263e-06, "loss": 0.05505428314208984, "step": 117440 }, { "epoch": 1.015512187529723, "grad_norm": 1.7815188912289401, "learning_rate": 2.9269077717133485e-06, "loss": 0.13605308532714844, "step": 117445 }, { "epoch": 1.0155554210512663, "grad_norm": 0.8998896748853573, "learning_rate": 2.926704099189366e-06, "loss": 0.01068286895751953, "step": 117450 }, { "epoch": 1.0155986545728095, "grad_norm": 4.159241792709889, "learning_rate": 2.926500427003418e-06, "loss": 0.018198013305664062, "step": 117455 }, { "epoch": 1.0156418880943527, "grad_norm": 3.6302196140971965, "learning_rate": 2.9262967551564436e-06, "loss": 0.12531585693359376, "step": 117460 }, { "epoch": 1.0156851216158962, "grad_norm": 0.15900587444337203, "learning_rate": 2.9260930836493806e-06, "loss": 0.09980049133300781, "step": 117465 }, { "epoch": 1.0157283551374394, "grad_norm": 12.565394346225707, "learning_rate": 2.925889412483171e-06, "loss": 0.1224151611328125, "step": 117470 }, { "epoch": 1.0157715886589826, "grad_norm": 0.9314274549149436, "learning_rate": 2.9256857416587524e-06, "loss": 0.0151123046875, "step": 117475 }, { "epoch": 1.0158148221805259, "grad_norm": 3.471183834380915, "learning_rate": 2.9254820711770655e-06, "loss": 0.15643348693847656, "step": 117480 }, { "epoch": 1.015858055702069, "grad_norm": 5.884857288497223, "learning_rate": 2.9252784010390483e-06, "loss": 0.12173633575439453, "step": 117485 }, { "epoch": 1.0159012892236123, "grad_norm": 2.5026938428985397, "learning_rate": 2.9250747312456405e-06, "loss": 0.04225234985351563, "step": 117490 }, { "epoch": 1.0159445227451558, "grad_norm": 11.492502999322593, "learning_rate": 2.9248710617977798e-06, "loss": 0.09744224548339844, "step": 117495 }, { "epoch": 1.015987756266699, "grad_norm": 0.4005520790267352, "learning_rate": 2.9246673926964093e-06, "loss": 0.0918182373046875, "step": 117500 }, { "epoch": 1.0160309897882422, "grad_norm": 10.641389554553092, "learning_rate": 2.9244637239424647e-06, "loss": 0.04349822998046875, "step": 117505 }, { "epoch": 1.0160742233097855, "grad_norm": 3.4707336460722553, "learning_rate": 2.9242600555368878e-06, "loss": 0.09453125, "step": 117510 }, { "epoch": 1.0161174568313287, "grad_norm": 1.455467432064112, "learning_rate": 2.9240563874806166e-06, "loss": 0.0409576416015625, "step": 117515 }, { "epoch": 1.016160690352872, "grad_norm": 12.908896965082041, "learning_rate": 2.9238527197745906e-06, "loss": 0.036163711547851564, "step": 117520 }, { "epoch": 1.0162039238744152, "grad_norm": 1.0975768351094408, "learning_rate": 2.9236490524197483e-06, "loss": 0.019582366943359374, "step": 117525 }, { "epoch": 1.0162471573959586, "grad_norm": 6.228619643070607, "learning_rate": 2.9234453854170303e-06, "loss": 0.03578338623046875, "step": 117530 }, { "epoch": 1.0162903909175018, "grad_norm": 5.81387734070316, "learning_rate": 2.9232417187673756e-06, "loss": 0.21192703247070313, "step": 117535 }, { "epoch": 1.016333624439045, "grad_norm": 2.5958791487688146, "learning_rate": 2.9230380524717234e-06, "loss": 0.062123870849609374, "step": 117540 }, { "epoch": 1.0163768579605883, "grad_norm": 3.3730353755801823, "learning_rate": 2.9228343865310132e-06, "loss": 0.0797119140625, "step": 117545 }, { "epoch": 1.0164200914821315, "grad_norm": 0.233976045125356, "learning_rate": 2.922630720946184e-06, "loss": 0.02472076416015625, "step": 117550 }, { "epoch": 1.0164633250036748, "grad_norm": 0.17253057890576126, "learning_rate": 2.922427055718174e-06, "loss": 0.042285919189453125, "step": 117555 }, { "epoch": 1.0165065585252182, "grad_norm": 0.3705962920157486, "learning_rate": 2.9222233908479237e-06, "loss": 0.0095428466796875, "step": 117560 }, { "epoch": 1.0165497920467614, "grad_norm": 1.8534097462370904, "learning_rate": 2.9220197263363732e-06, "loss": 0.0683523178100586, "step": 117565 }, { "epoch": 1.0165930255683047, "grad_norm": 4.798573409005018, "learning_rate": 2.9218160621844606e-06, "loss": 0.13751449584960937, "step": 117570 }, { "epoch": 1.016636259089848, "grad_norm": 4.926360777587923, "learning_rate": 2.9216123983931253e-06, "loss": 0.1166748046875, "step": 117575 }, { "epoch": 1.0166794926113911, "grad_norm": 5.2183583917652525, "learning_rate": 2.9214087349633062e-06, "loss": 0.16982040405273438, "step": 117580 }, { "epoch": 1.0167227261329344, "grad_norm": 3.323504645610243, "learning_rate": 2.921205071895944e-06, "loss": 0.0314453125, "step": 117585 }, { "epoch": 1.0167659596544778, "grad_norm": 0.03660560010776717, "learning_rate": 2.9210014091919754e-06, "loss": 0.02288055419921875, "step": 117590 }, { "epoch": 1.016809193176021, "grad_norm": 24.32610747235957, "learning_rate": 2.920797746852343e-06, "loss": 0.0942047119140625, "step": 117595 }, { "epoch": 1.0168524266975643, "grad_norm": 13.842278284625586, "learning_rate": 2.920594084877984e-06, "loss": 0.0753509521484375, "step": 117600 }, { "epoch": 1.0168956602191075, "grad_norm": 0.15022866587952569, "learning_rate": 2.920390423269838e-06, "loss": 0.018710803985595704, "step": 117605 }, { "epoch": 1.0169388937406507, "grad_norm": 0.020579087099957705, "learning_rate": 2.920186762028844e-06, "loss": 0.026161861419677735, "step": 117610 }, { "epoch": 1.016982127262194, "grad_norm": 7.020226387981915, "learning_rate": 2.9199831011559426e-06, "loss": 0.15358467102050782, "step": 117615 }, { "epoch": 1.0170253607837372, "grad_norm": 0.5324882070276759, "learning_rate": 2.9197794406520713e-06, "loss": 0.037229156494140624, "step": 117620 }, { "epoch": 1.0170685943052806, "grad_norm": 3.4208595229082386, "learning_rate": 2.9195757805181692e-06, "loss": 0.047800445556640626, "step": 117625 }, { "epoch": 1.0171118278268239, "grad_norm": 0.8306815209310743, "learning_rate": 2.9193721207551778e-06, "loss": 0.025225448608398437, "step": 117630 }, { "epoch": 1.017155061348367, "grad_norm": 1.144316643706696, "learning_rate": 2.919168461364035e-06, "loss": 0.027169036865234374, "step": 117635 }, { "epoch": 1.0171982948699103, "grad_norm": 1.318601387331567, "learning_rate": 2.9189648023456796e-06, "loss": 0.16134490966796874, "step": 117640 }, { "epoch": 1.0172415283914535, "grad_norm": 42.914219781135515, "learning_rate": 2.9187611437010516e-06, "loss": 0.1712982177734375, "step": 117645 }, { "epoch": 1.0172847619129968, "grad_norm": 8.345926570271994, "learning_rate": 2.9185574854310904e-06, "loss": 0.02987060546875, "step": 117650 }, { "epoch": 1.0173279954345402, "grad_norm": 2.118206382855611, "learning_rate": 2.9183538275367336e-06, "loss": 0.11483173370361328, "step": 117655 }, { "epoch": 1.0173712289560835, "grad_norm": 0.42258891539346966, "learning_rate": 2.918150170018923e-06, "loss": 0.1159027099609375, "step": 117660 }, { "epoch": 1.0174144624776267, "grad_norm": 11.075336297735525, "learning_rate": 2.9179465128785966e-06, "loss": 0.03592529296875, "step": 117665 }, { "epoch": 1.01745769599917, "grad_norm": 0.5633858620275904, "learning_rate": 2.9177428561166924e-06, "loss": 0.0317474365234375, "step": 117670 }, { "epoch": 1.0175009295207131, "grad_norm": 1.596155096383217, "learning_rate": 2.9175391997341527e-06, "loss": 0.09450321197509766, "step": 117675 }, { "epoch": 1.0175441630422564, "grad_norm": 3.1827107347901538, "learning_rate": 2.917335543731914e-06, "loss": 0.05376129150390625, "step": 117680 }, { "epoch": 1.0175873965637998, "grad_norm": 14.158179026004998, "learning_rate": 2.917131888110915e-06, "loss": 0.098614501953125, "step": 117685 }, { "epoch": 1.017630630085343, "grad_norm": 0.6946217537114565, "learning_rate": 2.9169282328720982e-06, "loss": 0.0256195068359375, "step": 117690 }, { "epoch": 1.0176738636068863, "grad_norm": 2.206198063114488, "learning_rate": 2.9167245780164015e-06, "loss": 0.130322265625, "step": 117695 }, { "epoch": 1.0177170971284295, "grad_norm": 1.8614321238220841, "learning_rate": 2.9165209235447626e-06, "loss": 0.011487960815429688, "step": 117700 }, { "epoch": 1.0177603306499727, "grad_norm": 0.22601868052796703, "learning_rate": 2.9163172694581222e-06, "loss": 0.084698486328125, "step": 117705 }, { "epoch": 1.017803564171516, "grad_norm": 2.4398947467694714, "learning_rate": 2.9161136157574193e-06, "loss": 0.113232421875, "step": 117710 }, { "epoch": 1.0178467976930592, "grad_norm": 3.3189885017380787, "learning_rate": 2.915909962443593e-06, "loss": 0.07395248413085938, "step": 117715 }, { "epoch": 1.0178900312146026, "grad_norm": 34.2466960123655, "learning_rate": 2.915706309517581e-06, "loss": 0.15290184020996095, "step": 117720 }, { "epoch": 1.0179332647361459, "grad_norm": 9.667855661932236, "learning_rate": 2.9155026569803256e-06, "loss": 0.1432464599609375, "step": 117725 }, { "epoch": 1.017976498257689, "grad_norm": 1.234346237167428, "learning_rate": 2.9152990048327643e-06, "loss": 0.05232658386230469, "step": 117730 }, { "epoch": 1.0180197317792323, "grad_norm": 0.6480848614636127, "learning_rate": 2.915095353075837e-06, "loss": 0.04434814453125, "step": 117735 }, { "epoch": 1.0180629653007756, "grad_norm": 5.35337176271989, "learning_rate": 2.9148917017104816e-06, "loss": 0.0258941650390625, "step": 117740 }, { "epoch": 1.0181061988223188, "grad_norm": 14.994432982552228, "learning_rate": 2.914688050737639e-06, "loss": 0.21996231079101564, "step": 117745 }, { "epoch": 1.0181494323438622, "grad_norm": 20.56969916396546, "learning_rate": 2.9144844001582456e-06, "loss": 0.10575599670410156, "step": 117750 }, { "epoch": 1.0181926658654055, "grad_norm": 2.5043649070261296, "learning_rate": 2.9142807499732444e-06, "loss": 0.18071632385253905, "step": 117755 }, { "epoch": 1.0182358993869487, "grad_norm": 1.099499821961463, "learning_rate": 2.9140771001835716e-06, "loss": 0.04076461791992188, "step": 117760 }, { "epoch": 1.018279132908492, "grad_norm": 1.376201604101016, "learning_rate": 2.913873450790169e-06, "loss": 0.080706787109375, "step": 117765 }, { "epoch": 1.0183223664300352, "grad_norm": 6.6588577642932, "learning_rate": 2.913669801793974e-06, "loss": 0.05032196044921875, "step": 117770 }, { "epoch": 1.0183655999515784, "grad_norm": 8.838396820967555, "learning_rate": 2.9134661531959264e-06, "loss": 0.04436836242675781, "step": 117775 }, { "epoch": 1.0184088334731216, "grad_norm": 0.2466728977886456, "learning_rate": 2.9132625049969646e-06, "loss": 0.16189956665039062, "step": 117780 }, { "epoch": 1.018452066994665, "grad_norm": 1.6178634073902403, "learning_rate": 2.9130588571980277e-06, "loss": 0.03035430908203125, "step": 117785 }, { "epoch": 1.0184953005162083, "grad_norm": 1.238847824725897, "learning_rate": 2.912855209800057e-06, "loss": 0.17347183227539062, "step": 117790 }, { "epoch": 1.0185385340377515, "grad_norm": 4.9606749650208855, "learning_rate": 2.9126515628039905e-06, "loss": 0.08657608032226563, "step": 117795 }, { "epoch": 1.0185817675592947, "grad_norm": 4.417937076545983, "learning_rate": 2.9124479162107672e-06, "loss": 0.07184829711914062, "step": 117800 }, { "epoch": 1.018625001080838, "grad_norm": 1.4338596606266285, "learning_rate": 2.912244270021326e-06, "loss": 0.06177978515625, "step": 117805 }, { "epoch": 1.0186682346023812, "grad_norm": 0.039985796072188065, "learning_rate": 2.9120406242366066e-06, "loss": 0.046286392211914065, "step": 117810 }, { "epoch": 1.0187114681239247, "grad_norm": 2.2913921651740203, "learning_rate": 2.9118369788575466e-06, "loss": 0.11238555908203125, "step": 117815 }, { "epoch": 1.0187547016454679, "grad_norm": 0.47084907361820666, "learning_rate": 2.9116333338850888e-06, "loss": 0.018607330322265626, "step": 117820 }, { "epoch": 1.0187979351670111, "grad_norm": 1.0008307645184256, "learning_rate": 2.9114296893201698e-06, "loss": 0.01417388916015625, "step": 117825 }, { "epoch": 1.0188411686885543, "grad_norm": 0.6867240959275326, "learning_rate": 2.9112260451637295e-06, "loss": 0.03268890380859375, "step": 117830 }, { "epoch": 1.0188844022100976, "grad_norm": 1.9798093497277978, "learning_rate": 2.911022401416706e-06, "loss": 0.0648345947265625, "step": 117835 }, { "epoch": 1.0189276357316408, "grad_norm": 0.4309702025220633, "learning_rate": 2.9108187580800393e-06, "loss": 0.16945152282714843, "step": 117840 }, { "epoch": 1.0189708692531843, "grad_norm": 4.117830096143878, "learning_rate": 2.9106151151546682e-06, "loss": 0.021507644653320314, "step": 117845 }, { "epoch": 1.0190141027747275, "grad_norm": 0.15293581345435767, "learning_rate": 2.9104114726415332e-06, "loss": 0.24282989501953126, "step": 117850 }, { "epoch": 1.0190573362962707, "grad_norm": 1.3154019258207237, "learning_rate": 2.910207830541573e-06, "loss": 0.042414093017578126, "step": 117855 }, { "epoch": 1.019100569817814, "grad_norm": 0.31024406718083836, "learning_rate": 2.910004188855726e-06, "loss": 0.018463516235351564, "step": 117860 }, { "epoch": 1.0191438033393572, "grad_norm": 4.302993955468386, "learning_rate": 2.909800547584931e-06, "loss": 0.04596786499023438, "step": 117865 }, { "epoch": 1.0191870368609004, "grad_norm": 0.48814705023921995, "learning_rate": 2.909596906730129e-06, "loss": 0.1184234619140625, "step": 117870 }, { "epoch": 1.0192302703824436, "grad_norm": 4.644899627505498, "learning_rate": 2.9093932662922577e-06, "loss": 0.0337188720703125, "step": 117875 }, { "epoch": 1.019273503903987, "grad_norm": 2.3808217089985733, "learning_rate": 2.9091896262722553e-06, "loss": 0.033720779418945315, "step": 117880 }, { "epoch": 1.0193167374255303, "grad_norm": 1.6297678709638581, "learning_rate": 2.9089859866710633e-06, "loss": 0.011034774780273437, "step": 117885 }, { "epoch": 1.0193599709470735, "grad_norm": 2.0528534925442483, "learning_rate": 2.9087823474896206e-06, "loss": 0.1894744873046875, "step": 117890 }, { "epoch": 1.0194032044686168, "grad_norm": 1.6067104779376642, "learning_rate": 2.9085787087288645e-06, "loss": 0.032386398315429686, "step": 117895 }, { "epoch": 1.01944643799016, "grad_norm": 12.223389699157917, "learning_rate": 2.9083750703897363e-06, "loss": 0.08810195922851563, "step": 117900 }, { "epoch": 1.0194896715117032, "grad_norm": 0.5298127638183324, "learning_rate": 2.9081714324731743e-06, "loss": 0.012253570556640624, "step": 117905 }, { "epoch": 1.0195329050332467, "grad_norm": 0.10151481666190877, "learning_rate": 2.907967794980116e-06, "loss": 0.053174591064453124, "step": 117910 }, { "epoch": 1.01957613855479, "grad_norm": 5.692460452256522, "learning_rate": 2.9077641579115034e-06, "loss": 0.07441902160644531, "step": 117915 }, { "epoch": 1.0196193720763331, "grad_norm": 1.0567885927764804, "learning_rate": 2.907560521268274e-06, "loss": 0.007845687866210937, "step": 117920 }, { "epoch": 1.0196626055978764, "grad_norm": 0.2883919443177799, "learning_rate": 2.907356885051367e-06, "loss": 0.05661163330078125, "step": 117925 }, { "epoch": 1.0197058391194196, "grad_norm": 21.40075358802187, "learning_rate": 2.907153249261723e-06, "loss": 0.09761199951171876, "step": 117930 }, { "epoch": 1.0197490726409628, "grad_norm": 0.8360813657432029, "learning_rate": 2.9069496139002795e-06, "loss": 0.025748443603515626, "step": 117935 }, { "epoch": 1.0197923061625063, "grad_norm": 0.5190871224602954, "learning_rate": 2.9067459789679746e-06, "loss": 0.04426994323730469, "step": 117940 }, { "epoch": 1.0198355396840495, "grad_norm": 47.48108181904906, "learning_rate": 2.9065423444657507e-06, "loss": 0.22343940734863282, "step": 117945 }, { "epoch": 1.0198787732055927, "grad_norm": 0.5074863847268387, "learning_rate": 2.906338710394545e-06, "loss": 0.01412506103515625, "step": 117950 }, { "epoch": 1.019922006727136, "grad_norm": 19.658462958149375, "learning_rate": 2.906135076755297e-06, "loss": 0.09089851379394531, "step": 117955 }, { "epoch": 1.0199652402486792, "grad_norm": 16.30770861393759, "learning_rate": 2.9059314435489453e-06, "loss": 0.180853271484375, "step": 117960 }, { "epoch": 1.0200084737702224, "grad_norm": 0.6850748197649362, "learning_rate": 2.90572781077643e-06, "loss": 0.02017059326171875, "step": 117965 }, { "epoch": 1.0200517072917656, "grad_norm": 12.67891851657025, "learning_rate": 2.9055241784386896e-06, "loss": 0.11047210693359374, "step": 117970 }, { "epoch": 1.020094940813309, "grad_norm": 2.350991718850555, "learning_rate": 2.9053205465366624e-06, "loss": 0.019297027587890626, "step": 117975 }, { "epoch": 1.0201381743348523, "grad_norm": 37.034920348608445, "learning_rate": 2.9051169150712884e-06, "loss": 0.10691909790039063, "step": 117980 }, { "epoch": 1.0201814078563956, "grad_norm": 3.2236020691158336, "learning_rate": 2.904913284043508e-06, "loss": 0.0471527099609375, "step": 117985 }, { "epoch": 1.0202246413779388, "grad_norm": 0.21917237675973586, "learning_rate": 2.904709653454259e-06, "loss": 0.0745086669921875, "step": 117990 }, { "epoch": 1.020267874899482, "grad_norm": 0.4414293121591306, "learning_rate": 2.9045060233044806e-06, "loss": 0.05181694030761719, "step": 117995 }, { "epoch": 1.0203111084210252, "grad_norm": 0.6034399081963215, "learning_rate": 2.904302393595112e-06, "loss": 0.02685203552246094, "step": 118000 }, { "epoch": 1.0203543419425687, "grad_norm": 5.98862387287559, "learning_rate": 2.904098764327091e-06, "loss": 0.030597496032714843, "step": 118005 }, { "epoch": 1.020397575464112, "grad_norm": 9.455004183171564, "learning_rate": 2.903895135501359e-06, "loss": 0.07909622192382812, "step": 118010 }, { "epoch": 1.0204408089856551, "grad_norm": 0.24367406078262907, "learning_rate": 2.9036915071188544e-06, "loss": 0.14105377197265626, "step": 118015 }, { "epoch": 1.0204840425071984, "grad_norm": 0.4721205694228394, "learning_rate": 2.9034878791805163e-06, "loss": 0.01915740966796875, "step": 118020 }, { "epoch": 1.0205272760287416, "grad_norm": 2.108844523669083, "learning_rate": 2.9032842516872833e-06, "loss": 0.07241554260253906, "step": 118025 }, { "epoch": 1.0205705095502848, "grad_norm": 0.3711258576747477, "learning_rate": 2.903080624640094e-06, "loss": 0.017380523681640624, "step": 118030 }, { "epoch": 1.020613743071828, "grad_norm": 0.6151819264748981, "learning_rate": 2.902876998039889e-06, "loss": 0.08110847473144531, "step": 118035 }, { "epoch": 1.0206569765933715, "grad_norm": 2.29989772104614, "learning_rate": 2.902673371887606e-06, "loss": 0.0744145393371582, "step": 118040 }, { "epoch": 1.0207002101149147, "grad_norm": 4.60183482481541, "learning_rate": 2.902469746184186e-06, "loss": 0.020235633850097655, "step": 118045 }, { "epoch": 1.020743443636458, "grad_norm": 1.7097653252973741, "learning_rate": 2.9022661209305666e-06, "loss": 0.14069061279296874, "step": 118050 }, { "epoch": 1.0207866771580012, "grad_norm": 17.744199399846483, "learning_rate": 2.9020624961276873e-06, "loss": 0.042580795288085935, "step": 118055 }, { "epoch": 1.0208299106795444, "grad_norm": 0.17988222898349351, "learning_rate": 2.9018588717764862e-06, "loss": 0.011717987060546876, "step": 118060 }, { "epoch": 1.0208731442010877, "grad_norm": 1.7759120786895026, "learning_rate": 2.9016552478779044e-06, "loss": 0.015641021728515624, "step": 118065 }, { "epoch": 1.020916377722631, "grad_norm": 2.655814154926081, "learning_rate": 2.9014516244328787e-06, "loss": 0.06906242370605468, "step": 118070 }, { "epoch": 1.0209596112441743, "grad_norm": 0.7146700131398449, "learning_rate": 2.90124800144235e-06, "loss": 0.04450798034667969, "step": 118075 }, { "epoch": 1.0210028447657176, "grad_norm": 0.1774948564014243, "learning_rate": 2.9010443789072575e-06, "loss": 0.08213157653808593, "step": 118080 }, { "epoch": 1.0210460782872608, "grad_norm": 9.77451512501906, "learning_rate": 2.900840756828539e-06, "loss": 0.08940162658691406, "step": 118085 }, { "epoch": 1.021089311808804, "grad_norm": 19.731360680623073, "learning_rate": 2.900637135207134e-06, "loss": 0.06483688354492187, "step": 118090 }, { "epoch": 1.0211325453303473, "grad_norm": 58.98565498463383, "learning_rate": 2.9004335140439822e-06, "loss": 0.10820693969726562, "step": 118095 }, { "epoch": 1.0211757788518907, "grad_norm": 2.2749565681143817, "learning_rate": 2.9002298933400207e-06, "loss": 0.10159492492675781, "step": 118100 }, { "epoch": 1.021219012373434, "grad_norm": 6.01478378083886, "learning_rate": 2.9000262730961917e-06, "loss": 0.0779296875, "step": 118105 }, { "epoch": 1.0212622458949772, "grad_norm": 4.024552635214321, "learning_rate": 2.8998226533134323e-06, "loss": 0.14236068725585938, "step": 118110 }, { "epoch": 1.0213054794165204, "grad_norm": 12.82254005073662, "learning_rate": 2.8996190339926824e-06, "loss": 0.0999237060546875, "step": 118115 }, { "epoch": 1.0213487129380636, "grad_norm": 1.2102626205952982, "learning_rate": 2.89941541513488e-06, "loss": 0.07875518798828125, "step": 118120 }, { "epoch": 1.0213919464596068, "grad_norm": 0.2423048768869311, "learning_rate": 2.8992117967409655e-06, "loss": 0.013674545288085937, "step": 118125 }, { "epoch": 1.02143517998115, "grad_norm": 0.18508527054984694, "learning_rate": 2.899008178811877e-06, "loss": 0.02939453125, "step": 118130 }, { "epoch": 1.0214784135026935, "grad_norm": 0.6344433047408362, "learning_rate": 2.898804561348553e-06, "loss": 0.09530982971191407, "step": 118135 }, { "epoch": 1.0215216470242368, "grad_norm": 2.355753543693819, "learning_rate": 2.8986009443519343e-06, "loss": 0.09966850280761719, "step": 118140 }, { "epoch": 1.02156488054578, "grad_norm": 4.239111303460469, "learning_rate": 2.8983973278229592e-06, "loss": 0.46545753479003904, "step": 118145 }, { "epoch": 1.0216081140673232, "grad_norm": 44.824133340985505, "learning_rate": 2.8981937117625658e-06, "loss": 0.19422607421875, "step": 118150 }, { "epoch": 1.0216513475888664, "grad_norm": 3.947074649655793, "learning_rate": 2.8979900961716954e-06, "loss": 0.09903411865234375, "step": 118155 }, { "epoch": 1.0216945811104097, "grad_norm": 1.5085270313908632, "learning_rate": 2.897786481051285e-06, "loss": 0.025005340576171875, "step": 118160 }, { "epoch": 1.0217378146319531, "grad_norm": 2.6488499145914677, "learning_rate": 2.897582866402273e-06, "loss": 0.04154014587402344, "step": 118165 }, { "epoch": 1.0217810481534964, "grad_norm": 16.634866207185837, "learning_rate": 2.897379252225601e-06, "loss": 0.142169189453125, "step": 118170 }, { "epoch": 1.0218242816750396, "grad_norm": 0.4049143704652665, "learning_rate": 2.897175638522207e-06, "loss": 0.03597412109375, "step": 118175 }, { "epoch": 1.0218675151965828, "grad_norm": 19.755761328936885, "learning_rate": 2.8969720252930294e-06, "loss": 0.08374290466308594, "step": 118180 }, { "epoch": 1.021910748718126, "grad_norm": 0.12343199841800302, "learning_rate": 2.8967684125390083e-06, "loss": 0.03341827392578125, "step": 118185 }, { "epoch": 1.0219539822396693, "grad_norm": 1.8388957245418334, "learning_rate": 2.896564800261082e-06, "loss": 0.01886749267578125, "step": 118190 }, { "epoch": 1.0219972157612127, "grad_norm": 9.13339023324196, "learning_rate": 2.8963611884601885e-06, "loss": 0.11385078430175781, "step": 118195 }, { "epoch": 1.022040449282756, "grad_norm": 0.3398033150228563, "learning_rate": 2.8961575771372694e-06, "loss": 0.27869415283203125, "step": 118200 }, { "epoch": 1.0220836828042992, "grad_norm": 0.9091735777095383, "learning_rate": 2.895953966293262e-06, "loss": 0.03797073364257812, "step": 118205 }, { "epoch": 1.0221269163258424, "grad_norm": 2.2746239313149474, "learning_rate": 2.8957503559291057e-06, "loss": 0.04001922607421875, "step": 118210 }, { "epoch": 1.0221701498473856, "grad_norm": 0.22275738005022147, "learning_rate": 2.89554674604574e-06, "loss": 0.0557769775390625, "step": 118215 }, { "epoch": 1.0222133833689289, "grad_norm": 4.767443841294456, "learning_rate": 2.895343136644103e-06, "loss": 0.017737960815429686, "step": 118220 }, { "epoch": 1.022256616890472, "grad_norm": 23.186656183094197, "learning_rate": 2.8951395277251347e-06, "loss": 0.08695297241210938, "step": 118225 }, { "epoch": 1.0222998504120155, "grad_norm": 5.313085544322233, "learning_rate": 2.894935919289772e-06, "loss": 0.04078521728515625, "step": 118230 }, { "epoch": 1.0223430839335588, "grad_norm": 5.314895911974183, "learning_rate": 2.8947323113389568e-06, "loss": 0.0849599838256836, "step": 118235 }, { "epoch": 1.022386317455102, "grad_norm": 0.7337017423858295, "learning_rate": 2.894528703873627e-06, "loss": 0.019725799560546875, "step": 118240 }, { "epoch": 1.0224295509766452, "grad_norm": 3.2530501019187525, "learning_rate": 2.8943250968947213e-06, "loss": 0.08958816528320312, "step": 118245 }, { "epoch": 1.0224727844981885, "grad_norm": 10.348772309028725, "learning_rate": 2.8941214904031794e-06, "loss": 0.0510223388671875, "step": 118250 }, { "epoch": 1.0225160180197317, "grad_norm": 4.0306764212428305, "learning_rate": 2.893917884399939e-06, "loss": 0.12448883056640625, "step": 118255 }, { "epoch": 1.0225592515412751, "grad_norm": 1.526514109617251, "learning_rate": 2.89371427888594e-06, "loss": 0.121136474609375, "step": 118260 }, { "epoch": 1.0226024850628184, "grad_norm": 2.338858296637515, "learning_rate": 2.893510673862121e-06, "loss": 0.07419052124023437, "step": 118265 }, { "epoch": 1.0226457185843616, "grad_norm": 18.578926830735, "learning_rate": 2.8933070693294226e-06, "loss": 0.08587188720703125, "step": 118270 }, { "epoch": 1.0226889521059048, "grad_norm": 0.5549482602720781, "learning_rate": 2.8931034652887824e-06, "loss": 0.05772247314453125, "step": 118275 }, { "epoch": 1.022732185627448, "grad_norm": 0.22205134547941596, "learning_rate": 2.8928998617411395e-06, "loss": 0.13330230712890626, "step": 118280 }, { "epoch": 1.0227754191489913, "grad_norm": 1.295086359628341, "learning_rate": 2.892696258687432e-06, "loss": 0.04810905456542969, "step": 118285 }, { "epoch": 1.0228186526705347, "grad_norm": 2.2316589579619457, "learning_rate": 2.8924926561286007e-06, "loss": 0.14313583374023436, "step": 118290 }, { "epoch": 1.022861886192078, "grad_norm": 78.08296155223657, "learning_rate": 2.892289054065583e-06, "loss": 0.4064910888671875, "step": 118295 }, { "epoch": 1.0229051197136212, "grad_norm": 18.80729483289491, "learning_rate": 2.8920854524993195e-06, "loss": 0.09138679504394531, "step": 118300 }, { "epoch": 1.0229483532351644, "grad_norm": 17.607750171512077, "learning_rate": 2.8918818514307486e-06, "loss": 0.13957061767578124, "step": 118305 }, { "epoch": 1.0229915867567076, "grad_norm": 0.9191791573981667, "learning_rate": 2.8916782508608085e-06, "loss": 0.2535442352294922, "step": 118310 }, { "epoch": 1.0230348202782509, "grad_norm": 2.027575111968772, "learning_rate": 2.8914746507904388e-06, "loss": 0.07071914672851562, "step": 118315 }, { "epoch": 1.023078053799794, "grad_norm": 4.103685623606341, "learning_rate": 2.8912710512205783e-06, "loss": 0.036070632934570315, "step": 118320 }, { "epoch": 1.0231212873213376, "grad_norm": 5.503021178093942, "learning_rate": 2.8910674521521655e-06, "loss": 0.030493927001953126, "step": 118325 }, { "epoch": 1.0231645208428808, "grad_norm": 16.72596307491948, "learning_rate": 2.890863853586141e-06, "loss": 0.025823211669921874, "step": 118330 }, { "epoch": 1.023207754364424, "grad_norm": 0.7945472337244944, "learning_rate": 2.890660255523443e-06, "loss": 0.04716796875, "step": 118335 }, { "epoch": 1.0232509878859672, "grad_norm": 5.931228400452664, "learning_rate": 2.8904566579650097e-06, "loss": 0.11248359680175782, "step": 118340 }, { "epoch": 1.0232942214075105, "grad_norm": 0.8699347881186297, "learning_rate": 2.8902530609117802e-06, "loss": 0.1290203094482422, "step": 118345 }, { "epoch": 1.0233374549290537, "grad_norm": 4.222167701657491, "learning_rate": 2.890049464364695e-06, "loss": 0.030031967163085937, "step": 118350 }, { "epoch": 1.0233806884505972, "grad_norm": 0.6960002877811399, "learning_rate": 2.88984586832469e-06, "loss": 0.04913616180419922, "step": 118355 }, { "epoch": 1.0234239219721404, "grad_norm": 1.7495646388188881, "learning_rate": 2.8896422727927075e-06, "loss": 0.12948217391967773, "step": 118360 }, { "epoch": 1.0234671554936836, "grad_norm": 10.357432435475037, "learning_rate": 2.8894386777696855e-06, "loss": 0.14026565551757814, "step": 118365 }, { "epoch": 1.0235103890152268, "grad_norm": 3.1937581795468937, "learning_rate": 2.889235083256562e-06, "loss": 0.06802253723144532, "step": 118370 }, { "epoch": 1.02355362253677, "grad_norm": 0.41438731747166224, "learning_rate": 2.8890314892542763e-06, "loss": 0.04621429443359375, "step": 118375 }, { "epoch": 1.0235968560583133, "grad_norm": 0.12184724693492806, "learning_rate": 2.888827895763768e-06, "loss": 0.01084747314453125, "step": 118380 }, { "epoch": 1.0236400895798567, "grad_norm": 0.18327296659670228, "learning_rate": 2.888624302785976e-06, "loss": 0.055635833740234376, "step": 118385 }, { "epoch": 1.0236833231014, "grad_norm": 26.831295470134133, "learning_rate": 2.888420710321837e-06, "loss": 0.11569328308105468, "step": 118390 }, { "epoch": 1.0237265566229432, "grad_norm": 2.9979231483885487, "learning_rate": 2.8882171183722936e-06, "loss": 0.02012004852294922, "step": 118395 }, { "epoch": 1.0237697901444864, "grad_norm": 0.46154545964346044, "learning_rate": 2.888013526938283e-06, "loss": 0.252435302734375, "step": 118400 }, { "epoch": 1.0238130236660297, "grad_norm": 0.2882047030103955, "learning_rate": 2.887809936020743e-06, "loss": 0.028698253631591796, "step": 118405 }, { "epoch": 1.023856257187573, "grad_norm": 0.9169277613045156, "learning_rate": 2.887606345620615e-06, "loss": 0.037064743041992185, "step": 118410 }, { "epoch": 1.0238994907091161, "grad_norm": 9.163151338413222, "learning_rate": 2.887402755738836e-06, "loss": 0.3288606643676758, "step": 118415 }, { "epoch": 1.0239427242306596, "grad_norm": 1.1383611451156028, "learning_rate": 2.8871991663763446e-06, "loss": 0.049737548828125, "step": 118420 }, { "epoch": 1.0239859577522028, "grad_norm": 0.7567039129329886, "learning_rate": 2.8869955775340814e-06, "loss": 0.02797088623046875, "step": 118425 }, { "epoch": 1.024029191273746, "grad_norm": 0.9364826090502525, "learning_rate": 2.8867919892129844e-06, "loss": 0.1171661376953125, "step": 118430 }, { "epoch": 1.0240724247952893, "grad_norm": 1.8649170024328854, "learning_rate": 2.8865884014139935e-06, "loss": 0.086199951171875, "step": 118435 }, { "epoch": 1.0241156583168325, "grad_norm": 18.567352449338784, "learning_rate": 2.8863848141380467e-06, "loss": 0.09097976684570312, "step": 118440 }, { "epoch": 1.0241588918383757, "grad_norm": 0.3149033600892351, "learning_rate": 2.886181227386083e-06, "loss": 0.04916839599609375, "step": 118445 }, { "epoch": 1.0242021253599192, "grad_norm": 1.159291987700837, "learning_rate": 2.8859776411590403e-06, "loss": 0.02050437927246094, "step": 118450 }, { "epoch": 1.0242453588814624, "grad_norm": 0.6510479932210906, "learning_rate": 2.88577405545786e-06, "loss": 0.08084945678710938, "step": 118455 }, { "epoch": 1.0242885924030056, "grad_norm": 0.7348990901895509, "learning_rate": 2.885570470283479e-06, "loss": 0.029439544677734374, "step": 118460 }, { "epoch": 1.0243318259245489, "grad_norm": 1.518275780180984, "learning_rate": 2.885366885636837e-06, "loss": 0.02183837890625, "step": 118465 }, { "epoch": 1.024375059446092, "grad_norm": 2.0215365290520824, "learning_rate": 2.8851633015188734e-06, "loss": 0.06649932861328126, "step": 118470 }, { "epoch": 1.0244182929676353, "grad_norm": 0.35777835951852854, "learning_rate": 2.8849597179305265e-06, "loss": 0.022010231018066408, "step": 118475 }, { "epoch": 1.0244615264891785, "grad_norm": 44.845772826124104, "learning_rate": 2.8847561348727343e-06, "loss": 0.08117828369140626, "step": 118480 }, { "epoch": 1.024504760010722, "grad_norm": 2.8726693591610393, "learning_rate": 2.8845525523464366e-06, "loss": 0.020784759521484376, "step": 118485 }, { "epoch": 1.0245479935322652, "grad_norm": 24.384508285160443, "learning_rate": 2.8843489703525724e-06, "loss": 0.05374164581298828, "step": 118490 }, { "epoch": 1.0245912270538085, "grad_norm": 1.1065817497133175, "learning_rate": 2.884145388892081e-06, "loss": 0.011380767822265625, "step": 118495 }, { "epoch": 1.0246344605753517, "grad_norm": 1.7971509853629246, "learning_rate": 2.883941807965901e-06, "loss": 0.074481201171875, "step": 118500 }, { "epoch": 1.024677694096895, "grad_norm": 6.485751100763561, "learning_rate": 2.8837382275749714e-06, "loss": 0.09288272857666016, "step": 118505 }, { "epoch": 1.0247209276184381, "grad_norm": 16.564654673307423, "learning_rate": 2.8835346477202296e-06, "loss": 0.04707527160644531, "step": 118510 }, { "epoch": 1.0247641611399816, "grad_norm": 0.6404840539309775, "learning_rate": 2.8833310684026164e-06, "loss": 0.007149887084960937, "step": 118515 }, { "epoch": 1.0248073946615248, "grad_norm": 0.6266969661881904, "learning_rate": 2.8831274896230694e-06, "loss": 0.029188919067382812, "step": 118520 }, { "epoch": 1.024850628183068, "grad_norm": 0.8180514804797628, "learning_rate": 2.8829239113825294e-06, "loss": 0.0678253173828125, "step": 118525 }, { "epoch": 1.0248938617046113, "grad_norm": 1.4906024128795425, "learning_rate": 2.8827203336819334e-06, "loss": 0.040306472778320314, "step": 118530 }, { "epoch": 1.0249370952261545, "grad_norm": 13.51926731852998, "learning_rate": 2.882516756522221e-06, "loss": 0.39319610595703125, "step": 118535 }, { "epoch": 1.0249803287476977, "grad_norm": 0.10743188785174722, "learning_rate": 2.8823131799043305e-06, "loss": 0.19924049377441405, "step": 118540 }, { "epoch": 1.0250235622692412, "grad_norm": 0.6278384866175383, "learning_rate": 2.882109603829201e-06, "loss": 0.21573276519775392, "step": 118545 }, { "epoch": 1.0250667957907844, "grad_norm": 8.644488430534468, "learning_rate": 2.881906028297771e-06, "loss": 0.20342655181884767, "step": 118550 }, { "epoch": 1.0251100293123276, "grad_norm": 283.4166738092973, "learning_rate": 2.881702453310982e-06, "loss": 0.15704345703125, "step": 118555 }, { "epoch": 1.0251532628338709, "grad_norm": 0.4308771039432176, "learning_rate": 2.8814988788697697e-06, "loss": 0.026395225524902345, "step": 118560 }, { "epoch": 1.025196496355414, "grad_norm": 0.8405008261352337, "learning_rate": 2.881295304975075e-06, "loss": 0.0864959716796875, "step": 118565 }, { "epoch": 1.0252397298769573, "grad_norm": 51.20055365297629, "learning_rate": 2.8810917316278344e-06, "loss": 0.19922943115234376, "step": 118570 }, { "epoch": 1.0252829633985006, "grad_norm": 0.08742332257950539, "learning_rate": 2.8808881588289893e-06, "loss": 0.187725830078125, "step": 118575 }, { "epoch": 1.025326196920044, "grad_norm": 11.0457129036427, "learning_rate": 2.880684586579476e-06, "loss": 0.05589561462402344, "step": 118580 }, { "epoch": 1.0253694304415872, "grad_norm": 1.4321256969521658, "learning_rate": 2.880481014880236e-06, "loss": 0.1340972900390625, "step": 118585 }, { "epoch": 1.0254126639631305, "grad_norm": 3.730501489841813, "learning_rate": 2.8802774437322073e-06, "loss": 0.2018524169921875, "step": 118590 }, { "epoch": 1.0254558974846737, "grad_norm": 0.8201740080688038, "learning_rate": 2.8800738731363284e-06, "loss": 0.15805740356445314, "step": 118595 }, { "epoch": 1.025499131006217, "grad_norm": 0.4261661094473598, "learning_rate": 2.8798703030935375e-06, "loss": 0.064532470703125, "step": 118600 }, { "epoch": 1.0255423645277602, "grad_norm": 10.341774001975121, "learning_rate": 2.879666733604775e-06, "loss": 0.0926666259765625, "step": 118605 }, { "epoch": 1.0255855980493036, "grad_norm": 0.2484742849327139, "learning_rate": 2.879463164670977e-06, "loss": 0.2316295623779297, "step": 118610 }, { "epoch": 1.0256288315708468, "grad_norm": 4.234486625394792, "learning_rate": 2.8792595962930858e-06, "loss": 0.017763900756835937, "step": 118615 }, { "epoch": 1.02567206509239, "grad_norm": 3.349013043519319, "learning_rate": 2.879056028472039e-06, "loss": 0.10980377197265626, "step": 118620 }, { "epoch": 1.0257152986139333, "grad_norm": 12.169611941255459, "learning_rate": 2.878852461208774e-06, "loss": 0.0618072509765625, "step": 118625 }, { "epoch": 1.0257585321354765, "grad_norm": 2.396267381885031, "learning_rate": 2.878648894504232e-06, "loss": 0.02004547119140625, "step": 118630 }, { "epoch": 1.0258017656570197, "grad_norm": 30.39492362464878, "learning_rate": 2.8784453283593503e-06, "loss": 0.10733642578125, "step": 118635 }, { "epoch": 1.0258449991785632, "grad_norm": 25.242296954886122, "learning_rate": 2.8782417627750664e-06, "loss": 0.159930419921875, "step": 118640 }, { "epoch": 1.0258882327001064, "grad_norm": 3.5939682246912343, "learning_rate": 2.878038197752322e-06, "loss": 0.03683929443359375, "step": 118645 }, { "epoch": 1.0259314662216497, "grad_norm": 1.8366940230042972, "learning_rate": 2.8778346332920552e-06, "loss": 0.08180561065673828, "step": 118650 }, { "epoch": 1.0259746997431929, "grad_norm": 4.989220503178507, "learning_rate": 2.8776310693952033e-06, "loss": 0.23936767578125, "step": 118655 }, { "epoch": 1.0260179332647361, "grad_norm": 0.10459987927097841, "learning_rate": 2.877427506062707e-06, "loss": 0.010961151123046875, "step": 118660 }, { "epoch": 1.0260611667862793, "grad_norm": 4.56053686332246, "learning_rate": 2.877223943295504e-06, "loss": 0.12559967041015624, "step": 118665 }, { "epoch": 1.0261044003078226, "grad_norm": 0.31543698796384373, "learning_rate": 2.877020381094533e-06, "loss": 0.04300537109375, "step": 118670 }, { "epoch": 1.026147633829366, "grad_norm": 10.503562291533665, "learning_rate": 2.876816819460732e-06, "loss": 0.081964111328125, "step": 118675 }, { "epoch": 1.0261908673509093, "grad_norm": 0.9037444124786468, "learning_rate": 2.8766132583950423e-06, "loss": 0.014642333984375, "step": 118680 }, { "epoch": 1.0262341008724525, "grad_norm": 0.3551076169546684, "learning_rate": 2.876409697898401e-06, "loss": 0.05400772094726562, "step": 118685 }, { "epoch": 1.0262773343939957, "grad_norm": 2.306434534824252, "learning_rate": 2.8762061379717478e-06, "loss": 0.04825897216796875, "step": 118690 }, { "epoch": 1.026320567915539, "grad_norm": 2.150183058140882, "learning_rate": 2.8760025786160207e-06, "loss": 0.26274261474609373, "step": 118695 }, { "epoch": 1.0263638014370822, "grad_norm": 0.1951903078801619, "learning_rate": 2.875799019832159e-06, "loss": 0.04435806274414063, "step": 118700 }, { "epoch": 1.0264070349586256, "grad_norm": 3.4721863796951333, "learning_rate": 2.8755954616211e-06, "loss": 0.35904617309570314, "step": 118705 }, { "epoch": 1.0264502684801688, "grad_norm": 1.0396023765201723, "learning_rate": 2.875391903983785e-06, "loss": 0.009356689453125, "step": 118710 }, { "epoch": 1.026493502001712, "grad_norm": 2.248816163453983, "learning_rate": 2.8751883469211508e-06, "loss": 0.02265625, "step": 118715 }, { "epoch": 1.0265367355232553, "grad_norm": 10.796700075673334, "learning_rate": 2.8749847904341375e-06, "loss": 0.13453292846679688, "step": 118720 }, { "epoch": 1.0265799690447985, "grad_norm": 1.3738949327476495, "learning_rate": 2.8747812345236835e-06, "loss": 0.0236083984375, "step": 118725 }, { "epoch": 1.0266232025663418, "grad_norm": 1.3083374598871123, "learning_rate": 2.874577679190727e-06, "loss": 0.04244537353515625, "step": 118730 }, { "epoch": 1.026666436087885, "grad_norm": 2.3216200758656873, "learning_rate": 2.8743741244362064e-06, "loss": 0.04149360656738281, "step": 118735 }, { "epoch": 1.0267096696094284, "grad_norm": 16.737034859027496, "learning_rate": 2.8741705702610616e-06, "loss": 0.24290390014648439, "step": 118740 }, { "epoch": 1.0267529031309717, "grad_norm": 19.7770957696238, "learning_rate": 2.873967016666231e-06, "loss": 0.0478302001953125, "step": 118745 }, { "epoch": 1.026796136652515, "grad_norm": 7.408693338752638, "learning_rate": 2.873763463652654e-06, "loss": 0.07339630126953126, "step": 118750 }, { "epoch": 1.0268393701740581, "grad_norm": 0.21752066759440636, "learning_rate": 2.873559911221269e-06, "loss": 0.0130340576171875, "step": 118755 }, { "epoch": 1.0268826036956014, "grad_norm": 2.1442784216436426, "learning_rate": 2.8733563593730143e-06, "loss": 0.2686370849609375, "step": 118760 }, { "epoch": 1.0269258372171446, "grad_norm": 0.8760511701572828, "learning_rate": 2.8731528081088283e-06, "loss": 0.015016937255859375, "step": 118765 }, { "epoch": 1.026969070738688, "grad_norm": 3.5569004828030137, "learning_rate": 2.87294925742965e-06, "loss": 0.024869155883789063, "step": 118770 }, { "epoch": 1.0270123042602313, "grad_norm": 1.2938679057292581, "learning_rate": 2.872745707336419e-06, "loss": 0.056987762451171875, "step": 118775 }, { "epoch": 1.0270555377817745, "grad_norm": 51.1786273446478, "learning_rate": 2.8725421578300736e-06, "loss": 0.1475006103515625, "step": 118780 }, { "epoch": 1.0270987713033177, "grad_norm": 1.5515669980917728, "learning_rate": 2.872338608911553e-06, "loss": 0.0904876708984375, "step": 118785 }, { "epoch": 1.027142004824861, "grad_norm": 0.6898842086183081, "learning_rate": 2.8721350605817953e-06, "loss": 0.031858062744140624, "step": 118790 }, { "epoch": 1.0271852383464042, "grad_norm": 0.1033451997116634, "learning_rate": 2.871931512841739e-06, "loss": 0.06553573608398437, "step": 118795 }, { "epoch": 1.0272284718679476, "grad_norm": 1.5384942547692306, "learning_rate": 2.871727965692323e-06, "loss": 0.04366302490234375, "step": 118800 }, { "epoch": 1.0272717053894909, "grad_norm": 0.5222366593702747, "learning_rate": 2.871524419134486e-06, "loss": 0.18972206115722656, "step": 118805 }, { "epoch": 1.027314938911034, "grad_norm": 0.15302405570070415, "learning_rate": 2.871320873169168e-06, "loss": 0.06417388916015625, "step": 118810 }, { "epoch": 1.0273581724325773, "grad_norm": 30.092435821719107, "learning_rate": 2.871117327797307e-06, "loss": 0.04268951416015625, "step": 118815 }, { "epoch": 1.0274014059541206, "grad_norm": 0.882285995960861, "learning_rate": 2.870913783019841e-06, "loss": 0.011556243896484375, "step": 118820 }, { "epoch": 1.0274446394756638, "grad_norm": 1.1598587546643462, "learning_rate": 2.870710238837709e-06, "loss": 0.05168609619140625, "step": 118825 }, { "epoch": 1.027487872997207, "grad_norm": 2.0089275518708187, "learning_rate": 2.87050669525185e-06, "loss": 0.07132339477539062, "step": 118830 }, { "epoch": 1.0275311065187505, "grad_norm": 3.0848637871083535, "learning_rate": 2.8703031522632022e-06, "loss": 0.09642333984375, "step": 118835 }, { "epoch": 1.0275743400402937, "grad_norm": 1.9634401305074147, "learning_rate": 2.8700996098727057e-06, "loss": 0.01524810791015625, "step": 118840 }, { "epoch": 1.027617573561837, "grad_norm": 1.8353653380908315, "learning_rate": 2.869896068081298e-06, "loss": 0.02647705078125, "step": 118845 }, { "epoch": 1.0276608070833801, "grad_norm": 7.020728048042744, "learning_rate": 2.869692526889918e-06, "loss": 0.022329044342041016, "step": 118850 }, { "epoch": 1.0277040406049234, "grad_norm": 0.43852356828345584, "learning_rate": 2.869488986299505e-06, "loss": 0.021438217163085936, "step": 118855 }, { "epoch": 1.0277472741264666, "grad_norm": 21.697044912199075, "learning_rate": 2.869285446310997e-06, "loss": 0.04671821594238281, "step": 118860 }, { "epoch": 1.02779050764801, "grad_norm": 1.6291573812480806, "learning_rate": 2.8690819069253322e-06, "loss": 0.057933807373046875, "step": 118865 }, { "epoch": 1.0278337411695533, "grad_norm": 1.0829447420554077, "learning_rate": 2.868878368143451e-06, "loss": 0.024326324462890625, "step": 118870 }, { "epoch": 1.0278769746910965, "grad_norm": 20.360598441264784, "learning_rate": 2.868674829966291e-06, "loss": 0.191973876953125, "step": 118875 }, { "epoch": 1.0279202082126397, "grad_norm": 0.1949237812208489, "learning_rate": 2.8684712923947912e-06, "loss": 0.17477951049804688, "step": 118880 }, { "epoch": 1.027963441734183, "grad_norm": 5.332233675942719, "learning_rate": 2.86826775542989e-06, "loss": 0.04615936279296875, "step": 118885 }, { "epoch": 1.0280066752557262, "grad_norm": 1.5599240452240104, "learning_rate": 2.8680642190725272e-06, "loss": 0.0456268310546875, "step": 118890 }, { "epoch": 1.0280499087772696, "grad_norm": 12.379983347994829, "learning_rate": 2.8678606833236385e-06, "loss": 0.07251472473144531, "step": 118895 }, { "epoch": 1.0280931422988129, "grad_norm": 0.21287508506687172, "learning_rate": 2.867657148184166e-06, "loss": 0.12752532958984375, "step": 118900 }, { "epoch": 1.028136375820356, "grad_norm": 0.38269678022377146, "learning_rate": 2.8674536136550473e-06, "loss": 0.09058990478515624, "step": 118905 }, { "epoch": 1.0281796093418993, "grad_norm": 35.513493459146105, "learning_rate": 2.8672500797372197e-06, "loss": 0.296014404296875, "step": 118910 }, { "epoch": 1.0282228428634426, "grad_norm": 1.0120043031923822, "learning_rate": 2.8670465464316246e-06, "loss": 0.041240692138671875, "step": 118915 }, { "epoch": 1.0282660763849858, "grad_norm": 4.693305201725161, "learning_rate": 2.8668430137391983e-06, "loss": 0.02293243408203125, "step": 118920 }, { "epoch": 1.028309309906529, "grad_norm": 19.902412622976886, "learning_rate": 2.8666394816608803e-06, "loss": 0.1098541259765625, "step": 118925 }, { "epoch": 1.0283525434280725, "grad_norm": 19.56057743722512, "learning_rate": 2.866435950197608e-06, "loss": 0.2589263916015625, "step": 118930 }, { "epoch": 1.0283957769496157, "grad_norm": 12.890772925002752, "learning_rate": 2.866232419350323e-06, "loss": 0.09311027526855468, "step": 118935 }, { "epoch": 1.028439010471159, "grad_norm": 1.2856066046087267, "learning_rate": 2.866028889119961e-06, "loss": 0.027777099609375, "step": 118940 }, { "epoch": 1.0284822439927022, "grad_norm": 18.86061193355405, "learning_rate": 2.865825359507463e-06, "loss": 0.043218231201171874, "step": 118945 }, { "epoch": 1.0285254775142454, "grad_norm": 0.20617089925359192, "learning_rate": 2.865621830513766e-06, "loss": 0.09528923034667969, "step": 118950 }, { "epoch": 1.0285687110357886, "grad_norm": 6.270641940007336, "learning_rate": 2.86541830213981e-06, "loss": 0.05498695373535156, "step": 118955 }, { "epoch": 1.028611944557332, "grad_norm": 0.6561324110328595, "learning_rate": 2.865214774386531e-06, "loss": 0.018817138671875, "step": 118960 }, { "epoch": 1.0286551780788753, "grad_norm": 12.55717104633765, "learning_rate": 2.8650112472548713e-06, "loss": 0.01985769271850586, "step": 118965 }, { "epoch": 1.0286984116004185, "grad_norm": 1.0026969161879544, "learning_rate": 2.864807720745767e-06, "loss": 0.03362541198730469, "step": 118970 }, { "epoch": 1.0287416451219618, "grad_norm": 6.276338652616599, "learning_rate": 2.8646041948601584e-06, "loss": 0.23741092681884765, "step": 118975 }, { "epoch": 1.028784878643505, "grad_norm": 0.15286431844088216, "learning_rate": 2.864400669598983e-06, "loss": 0.07157573699951172, "step": 118980 }, { "epoch": 1.0288281121650482, "grad_norm": 7.15538724684152, "learning_rate": 2.8641971449631796e-06, "loss": 0.4642311096191406, "step": 118985 }, { "epoch": 1.0288713456865914, "grad_norm": 8.783398349507216, "learning_rate": 2.863993620953686e-06, "loss": 0.0890655517578125, "step": 118990 }, { "epoch": 1.028914579208135, "grad_norm": 0.21795743959155203, "learning_rate": 2.863790097571443e-06, "loss": 0.04901466369628906, "step": 118995 }, { "epoch": 1.0289578127296781, "grad_norm": 14.404285272486709, "learning_rate": 2.863586574817387e-06, "loss": 0.0515625, "step": 119000 }, { "epoch": 1.0290010462512214, "grad_norm": 0.8107231060390545, "learning_rate": 2.8633830526924587e-06, "loss": 0.02555694580078125, "step": 119005 }, { "epoch": 1.0290442797727646, "grad_norm": 1.2366304047743353, "learning_rate": 2.8631795311975958e-06, "loss": 0.085565185546875, "step": 119010 }, { "epoch": 1.0290875132943078, "grad_norm": 0.2644608145647145, "learning_rate": 2.862976010333737e-06, "loss": 0.17909011840820313, "step": 119015 }, { "epoch": 1.029130746815851, "grad_norm": 1.751071430303892, "learning_rate": 2.8627724901018196e-06, "loss": 0.01273040771484375, "step": 119020 }, { "epoch": 1.0291739803373945, "grad_norm": 6.788508895293408, "learning_rate": 2.862568970502783e-06, "loss": 0.02238006591796875, "step": 119025 }, { "epoch": 1.0292172138589377, "grad_norm": 11.681894282023489, "learning_rate": 2.862365451537567e-06, "loss": 0.16726608276367189, "step": 119030 }, { "epoch": 1.029260447380481, "grad_norm": 5.922128052660597, "learning_rate": 2.86216193320711e-06, "loss": 0.07249221801757813, "step": 119035 }, { "epoch": 1.0293036809020242, "grad_norm": 5.931722285746054, "learning_rate": 2.8619584155123494e-06, "loss": 0.036930084228515625, "step": 119040 }, { "epoch": 1.0293469144235674, "grad_norm": 8.18224472950247, "learning_rate": 2.8617548984542247e-06, "loss": 0.07419033050537109, "step": 119045 }, { "epoch": 1.0293901479451106, "grad_norm": 7.691100556983292, "learning_rate": 2.8615513820336737e-06, "loss": 0.027386474609375, "step": 119050 }, { "epoch": 1.029433381466654, "grad_norm": 1.0762435222488607, "learning_rate": 2.8613478662516342e-06, "loss": 0.0718597412109375, "step": 119055 }, { "epoch": 1.0294766149881973, "grad_norm": 3.7894314727901364, "learning_rate": 2.861144351109048e-06, "loss": 0.1178060531616211, "step": 119060 }, { "epoch": 1.0295198485097405, "grad_norm": 3.744052995653834, "learning_rate": 2.860940836606852e-06, "loss": 0.2808204650878906, "step": 119065 }, { "epoch": 1.0295630820312838, "grad_norm": 0.31006205377980073, "learning_rate": 2.8607373227459837e-06, "loss": 0.024840545654296876, "step": 119070 }, { "epoch": 1.029606315552827, "grad_norm": 0.7764349938760562, "learning_rate": 2.8605338095273827e-06, "loss": 0.0944366455078125, "step": 119075 }, { "epoch": 1.0296495490743702, "grad_norm": 6.861111421018443, "learning_rate": 2.8603302969519877e-06, "loss": 0.04891624450683594, "step": 119080 }, { "epoch": 1.0296927825959135, "grad_norm": 1.758048628344776, "learning_rate": 2.860126785020737e-06, "loss": 0.07290477752685547, "step": 119085 }, { "epoch": 1.029736016117457, "grad_norm": 5.8155503347261, "learning_rate": 2.859923273734568e-06, "loss": 0.23464508056640626, "step": 119090 }, { "epoch": 1.0297792496390001, "grad_norm": 8.650096401588097, "learning_rate": 2.859719763094422e-06, "loss": 0.10625, "step": 119095 }, { "epoch": 1.0298224831605434, "grad_norm": 1.3804633927934373, "learning_rate": 2.8595162531012356e-06, "loss": 0.14155426025390624, "step": 119100 }, { "epoch": 1.0298657166820866, "grad_norm": 0.6817101558614064, "learning_rate": 2.8593127437559477e-06, "loss": 0.04538421630859375, "step": 119105 }, { "epoch": 1.0299089502036298, "grad_norm": 0.5209962049067447, "learning_rate": 2.8591092350594973e-06, "loss": 0.06905364990234375, "step": 119110 }, { "epoch": 1.029952183725173, "grad_norm": 0.848842430558386, "learning_rate": 2.8589057270128228e-06, "loss": 0.05180816650390625, "step": 119115 }, { "epoch": 1.0299954172467165, "grad_norm": 3.8109140032696485, "learning_rate": 2.8587022196168614e-06, "loss": 0.01893768310546875, "step": 119120 }, { "epoch": 1.0300386507682597, "grad_norm": 1.9713545458793265, "learning_rate": 2.858498712872554e-06, "loss": 0.1171478271484375, "step": 119125 }, { "epoch": 1.030081884289803, "grad_norm": 0.162927976479731, "learning_rate": 2.858295206780838e-06, "loss": 0.07679290771484375, "step": 119130 }, { "epoch": 1.0301251178113462, "grad_norm": 0.976274709149819, "learning_rate": 2.8580917013426516e-06, "loss": 0.0988494873046875, "step": 119135 }, { "epoch": 1.0301683513328894, "grad_norm": 8.081667322044728, "learning_rate": 2.8578881965589343e-06, "loss": 0.06128387451171875, "step": 119140 }, { "epoch": 1.0302115848544326, "grad_norm": 1.600454357483341, "learning_rate": 2.857684692430624e-06, "loss": 0.02454833984375, "step": 119145 }, { "epoch": 1.030254818375976, "grad_norm": 1.074067686782253, "learning_rate": 2.857481188958658e-06, "loss": 0.17228641510009765, "step": 119150 }, { "epoch": 1.0302980518975193, "grad_norm": 0.30212969561703806, "learning_rate": 2.8572776861439773e-06, "loss": 0.014742660522460937, "step": 119155 }, { "epoch": 1.0303412854190626, "grad_norm": 3.425475553886548, "learning_rate": 2.8570741839875195e-06, "loss": 0.08054580688476562, "step": 119160 }, { "epoch": 1.0303845189406058, "grad_norm": 0.778218723561663, "learning_rate": 2.8568706824902223e-06, "loss": 0.06915626525878907, "step": 119165 }, { "epoch": 1.030427752462149, "grad_norm": 0.37194016109075173, "learning_rate": 2.8566671816530255e-06, "loss": 0.08839168548583984, "step": 119170 }, { "epoch": 1.0304709859836922, "grad_norm": 14.786577596897798, "learning_rate": 2.856463681476867e-06, "loss": 0.1020538330078125, "step": 119175 }, { "epoch": 1.0305142195052355, "grad_norm": 0.34371357616131787, "learning_rate": 2.856260181962685e-06, "loss": 0.030590057373046875, "step": 119180 }, { "epoch": 1.030557453026779, "grad_norm": 0.2055545309830728, "learning_rate": 2.8560566831114176e-06, "loss": 0.03123779296875, "step": 119185 }, { "epoch": 1.0306006865483222, "grad_norm": 1.8567419802908252, "learning_rate": 2.855853184924005e-06, "loss": 0.303924560546875, "step": 119190 }, { "epoch": 1.0306439200698654, "grad_norm": 4.027179694273992, "learning_rate": 2.8556496874013845e-06, "loss": 0.017855453491210937, "step": 119195 }, { "epoch": 1.0306871535914086, "grad_norm": 26.99796648380989, "learning_rate": 2.855446190544495e-06, "loss": 0.15106430053710937, "step": 119200 }, { "epoch": 1.0307303871129518, "grad_norm": 0.5177151594375787, "learning_rate": 2.855242694354275e-06, "loss": 0.08251266479492188, "step": 119205 }, { "epoch": 1.030773620634495, "grad_norm": 0.22198758238350566, "learning_rate": 2.8550391988316634e-06, "loss": 0.037616729736328125, "step": 119210 }, { "epoch": 1.0308168541560385, "grad_norm": 4.276802417984546, "learning_rate": 2.8548357039775965e-06, "loss": 0.13341064453125, "step": 119215 }, { "epoch": 1.0308600876775817, "grad_norm": 17.41113265001022, "learning_rate": 2.854632209793016e-06, "loss": 0.05182342529296875, "step": 119220 }, { "epoch": 1.030903321199125, "grad_norm": 0.8092755039908869, "learning_rate": 2.8544287162788578e-06, "loss": 0.016747283935546874, "step": 119225 }, { "epoch": 1.0309465547206682, "grad_norm": 3.51335086614111, "learning_rate": 2.8542252234360626e-06, "loss": 0.06305122375488281, "step": 119230 }, { "epoch": 1.0309897882422114, "grad_norm": 17.48137670779924, "learning_rate": 2.8540217312655676e-06, "loss": 0.17762908935546876, "step": 119235 }, { "epoch": 1.0310330217637547, "grad_norm": 33.04310664792844, "learning_rate": 2.8538182397683117e-06, "loss": 0.1668771743774414, "step": 119240 }, { "epoch": 1.0310762552852981, "grad_norm": 1.3343412061194164, "learning_rate": 2.8536147489452314e-06, "loss": 0.06096038818359375, "step": 119245 }, { "epoch": 1.0311194888068413, "grad_norm": 25.458201032121735, "learning_rate": 2.853411258797268e-06, "loss": 0.106890869140625, "step": 119250 }, { "epoch": 1.0311627223283846, "grad_norm": 15.805199014301188, "learning_rate": 2.8532077693253595e-06, "loss": 0.37169570922851564, "step": 119255 }, { "epoch": 1.0312059558499278, "grad_norm": 0.6112457632024576, "learning_rate": 2.8530042805304437e-06, "loss": 0.04325790405273437, "step": 119260 }, { "epoch": 1.031249189371471, "grad_norm": 22.402752122205193, "learning_rate": 2.852800792413459e-06, "loss": 0.17934541702270507, "step": 119265 }, { "epoch": 1.0312924228930143, "grad_norm": 8.980361367275083, "learning_rate": 2.8525973049753444e-06, "loss": 0.18927001953125, "step": 119270 }, { "epoch": 1.0313356564145575, "grad_norm": 6.018967731451287, "learning_rate": 2.852393818217037e-06, "loss": 0.020330047607421874, "step": 119275 }, { "epoch": 1.031378889936101, "grad_norm": 0.14685685359750933, "learning_rate": 2.8521903321394763e-06, "loss": 0.05870819091796875, "step": 119280 }, { "epoch": 1.0314221234576442, "grad_norm": 13.810445630471449, "learning_rate": 2.8519868467436012e-06, "loss": 0.08213882446289063, "step": 119285 }, { "epoch": 1.0314653569791874, "grad_norm": 0.4518902610515359, "learning_rate": 2.85178336203035e-06, "loss": 0.16375656127929689, "step": 119290 }, { "epoch": 1.0315085905007306, "grad_norm": 1.994168032107817, "learning_rate": 2.851579878000661e-06, "loss": 0.0485651969909668, "step": 119295 }, { "epoch": 1.0315518240222739, "grad_norm": 27.379142629825026, "learning_rate": 2.851376394655472e-06, "loss": 0.09967918395996093, "step": 119300 }, { "epoch": 1.031595057543817, "grad_norm": 5.38103676973704, "learning_rate": 2.851172911995722e-06, "loss": 0.120172119140625, "step": 119305 }, { "epoch": 1.0316382910653605, "grad_norm": 11.646792628294438, "learning_rate": 2.8509694300223487e-06, "loss": 0.05761871337890625, "step": 119310 }, { "epoch": 1.0316815245869038, "grad_norm": 0.2611548248651016, "learning_rate": 2.850765948736292e-06, "loss": 0.046749114990234375, "step": 119315 }, { "epoch": 1.031724758108447, "grad_norm": 30.87405303979513, "learning_rate": 2.85056246813849e-06, "loss": 0.04118156433105469, "step": 119320 }, { "epoch": 1.0317679916299902, "grad_norm": 16.996880105525637, "learning_rate": 2.8503589882298802e-06, "loss": 0.17641687393188477, "step": 119325 }, { "epoch": 1.0318112251515335, "grad_norm": 4.987022925577934, "learning_rate": 2.850155509011401e-06, "loss": 0.11578121185302734, "step": 119330 }, { "epoch": 1.0318544586730767, "grad_norm": 0.5167151498831505, "learning_rate": 2.849952030483992e-06, "loss": 0.1004460334777832, "step": 119335 }, { "epoch": 1.0318976921946201, "grad_norm": 5.491297449938482, "learning_rate": 2.84974855264859e-06, "loss": 0.08605575561523438, "step": 119340 }, { "epoch": 1.0319409257161634, "grad_norm": 7.295534698853531, "learning_rate": 2.8495450755061358e-06, "loss": 0.0805419921875, "step": 119345 }, { "epoch": 1.0319841592377066, "grad_norm": 6.412643125697239, "learning_rate": 2.849341599057566e-06, "loss": 0.17242698669433593, "step": 119350 }, { "epoch": 1.0320273927592498, "grad_norm": 39.477690887652216, "learning_rate": 2.8491381233038196e-06, "loss": 0.14710960388183594, "step": 119355 }, { "epoch": 1.032070626280793, "grad_norm": 1.4263575054811184, "learning_rate": 2.848934648245834e-06, "loss": 0.06143951416015625, "step": 119360 }, { "epoch": 1.0321138598023363, "grad_norm": 1.904086479429563, "learning_rate": 2.848731173884549e-06, "loss": 0.11890830993652343, "step": 119365 }, { "epoch": 1.0321570933238795, "grad_norm": 3.5911632818890626, "learning_rate": 2.848527700220903e-06, "loss": 0.04347305297851563, "step": 119370 }, { "epoch": 1.032200326845423, "grad_norm": 12.525251355166898, "learning_rate": 2.848324227255832e-06, "loss": 0.037908935546875, "step": 119375 }, { "epoch": 1.0322435603669662, "grad_norm": 2.660374614308046, "learning_rate": 2.848120754990278e-06, "loss": 0.02239990234375, "step": 119380 }, { "epoch": 1.0322867938885094, "grad_norm": 21.336927404358637, "learning_rate": 2.8479172834251775e-06, "loss": 0.0843536376953125, "step": 119385 }, { "epoch": 1.0323300274100526, "grad_norm": 18.03143138901201, "learning_rate": 2.8477138125614685e-06, "loss": 0.07656097412109375, "step": 119390 }, { "epoch": 1.0323732609315959, "grad_norm": 1.1483535237321714, "learning_rate": 2.8475103424000904e-06, "loss": 0.01583099365234375, "step": 119395 }, { "epoch": 1.032416494453139, "grad_norm": 0.5166944563358518, "learning_rate": 2.8473068729419813e-06, "loss": 0.17331161499023437, "step": 119400 }, { "epoch": 1.0324597279746826, "grad_norm": 0.1175392618399237, "learning_rate": 2.847103404188078e-06, "loss": 0.05411605834960938, "step": 119405 }, { "epoch": 1.0325029614962258, "grad_norm": 4.957907855581335, "learning_rate": 2.8468999361393217e-06, "loss": 0.05000152587890625, "step": 119410 }, { "epoch": 1.032546195017769, "grad_norm": 34.28682628614384, "learning_rate": 2.8466964687966494e-06, "loss": 0.22723007202148438, "step": 119415 }, { "epoch": 1.0325894285393122, "grad_norm": 0.6110770588354804, "learning_rate": 2.8464930021609985e-06, "loss": 0.051563262939453125, "step": 119420 }, { "epoch": 1.0326326620608555, "grad_norm": 14.094632133355638, "learning_rate": 2.846289536233309e-06, "loss": 0.02727088928222656, "step": 119425 }, { "epoch": 1.0326758955823987, "grad_norm": 66.00080712739909, "learning_rate": 2.8460860710145187e-06, "loss": 0.16310653686523438, "step": 119430 }, { "epoch": 1.032719129103942, "grad_norm": 7.48969099035314, "learning_rate": 2.845882606505566e-06, "loss": 0.077685546875, "step": 119435 }, { "epoch": 1.0327623626254854, "grad_norm": 0.3184500491369431, "learning_rate": 2.8456791427073873e-06, "loss": 0.1517925262451172, "step": 119440 }, { "epoch": 1.0328055961470286, "grad_norm": 1.4806333604464115, "learning_rate": 2.845475679620925e-06, "loss": 0.0323516845703125, "step": 119445 }, { "epoch": 1.0328488296685718, "grad_norm": 7.396185241667817, "learning_rate": 2.8452722172471134e-06, "loss": 0.020670700073242187, "step": 119450 }, { "epoch": 1.032892063190115, "grad_norm": 7.447080815697372, "learning_rate": 2.8450687555868942e-06, "loss": 0.03499736785888672, "step": 119455 }, { "epoch": 1.0329352967116583, "grad_norm": 1.317164010994759, "learning_rate": 2.8448652946412037e-06, "loss": 0.008171653747558594, "step": 119460 }, { "epoch": 1.0329785302332015, "grad_norm": 1.6402077007739777, "learning_rate": 2.844661834410981e-06, "loss": 0.014856338500976562, "step": 119465 }, { "epoch": 1.033021763754745, "grad_norm": 14.710347994606398, "learning_rate": 2.844458374897163e-06, "loss": 0.0647369384765625, "step": 119470 }, { "epoch": 1.0330649972762882, "grad_norm": 9.259958505051438, "learning_rate": 2.8442549161006893e-06, "loss": 0.03760223388671875, "step": 119475 }, { "epoch": 1.0331082307978314, "grad_norm": 0.4240482971303485, "learning_rate": 2.8440514580224996e-06, "loss": 0.11399612426757813, "step": 119480 }, { "epoch": 1.0331514643193747, "grad_norm": 17.6042756970362, "learning_rate": 2.8438480006635303e-06, "loss": 0.19606266021728516, "step": 119485 }, { "epoch": 1.0331946978409179, "grad_norm": 2.1648118916554693, "learning_rate": 2.8436445440247202e-06, "loss": 0.0190277099609375, "step": 119490 }, { "epoch": 1.0332379313624611, "grad_norm": 0.3143373981287058, "learning_rate": 2.8434410881070075e-06, "loss": 0.09723129272460937, "step": 119495 }, { "epoch": 1.0332811648840046, "grad_norm": 0.5798662070308163, "learning_rate": 2.8432376329113302e-06, "loss": 0.32178955078125, "step": 119500 }, { "epoch": 1.0333243984055478, "grad_norm": 1.835154360441221, "learning_rate": 2.843034178438627e-06, "loss": 0.2151094436645508, "step": 119505 }, { "epoch": 1.033367631927091, "grad_norm": 6.9089999450213195, "learning_rate": 2.8428307246898374e-06, "loss": 0.0295013427734375, "step": 119510 }, { "epoch": 1.0334108654486343, "grad_norm": 0.1659368851873912, "learning_rate": 2.8426272716658984e-06, "loss": 0.02087249755859375, "step": 119515 }, { "epoch": 1.0334540989701775, "grad_norm": 21.75641571221585, "learning_rate": 2.8424238193677485e-06, "loss": 0.2013641357421875, "step": 119520 }, { "epoch": 1.0334973324917207, "grad_norm": 0.6175000552984874, "learning_rate": 2.8422203677963253e-06, "loss": 0.07024993896484374, "step": 119525 }, { "epoch": 1.033540566013264, "grad_norm": 0.647575188160506, "learning_rate": 2.842016916952568e-06, "loss": 0.23873214721679686, "step": 119530 }, { "epoch": 1.0335837995348074, "grad_norm": 3.9140504596009253, "learning_rate": 2.8418134668374144e-06, "loss": 0.14077835083007811, "step": 119535 }, { "epoch": 1.0336270330563506, "grad_norm": 0.15497655992117865, "learning_rate": 2.841610017451804e-06, "loss": 0.06441192626953125, "step": 119540 }, { "epoch": 1.0336702665778938, "grad_norm": 26.084915147128857, "learning_rate": 2.841406568796674e-06, "loss": 0.04519157409667969, "step": 119545 }, { "epoch": 1.033713500099437, "grad_norm": 8.275228769481298, "learning_rate": 2.841203120872963e-06, "loss": 0.03850059509277344, "step": 119550 }, { "epoch": 1.0337567336209803, "grad_norm": 3.4377478738361784, "learning_rate": 2.8409996736816087e-06, "loss": 0.028343963623046874, "step": 119555 }, { "epoch": 1.0337999671425235, "grad_norm": 2.569666749350704, "learning_rate": 2.8407962272235506e-06, "loss": 0.0372406005859375, "step": 119560 }, { "epoch": 1.033843200664067, "grad_norm": 0.4463286804865636, "learning_rate": 2.8405927814997244e-06, "loss": 0.015648269653320314, "step": 119565 }, { "epoch": 1.0338864341856102, "grad_norm": 1.5558826893256366, "learning_rate": 2.840389336511072e-06, "loss": 0.054825973510742185, "step": 119570 }, { "epoch": 1.0339296677071534, "grad_norm": 41.36748709346167, "learning_rate": 2.8401858922585297e-06, "loss": 0.31460723876953123, "step": 119575 }, { "epoch": 1.0339729012286967, "grad_norm": 0.1991718443149384, "learning_rate": 2.839982448743036e-06, "loss": 0.08665084838867188, "step": 119580 }, { "epoch": 1.03401613475024, "grad_norm": 12.35554300940368, "learning_rate": 2.839779005965528e-06, "loss": 0.05077056884765625, "step": 119585 }, { "epoch": 1.0340593682717831, "grad_norm": 2.1413255728231815, "learning_rate": 2.8395755639269466e-06, "loss": 0.0542572021484375, "step": 119590 }, { "epoch": 1.0341026017933266, "grad_norm": 0.4402558666389517, "learning_rate": 2.8393721226282263e-06, "loss": 0.01697845458984375, "step": 119595 }, { "epoch": 1.0341458353148698, "grad_norm": 1.2931121942313317, "learning_rate": 2.8391686820703096e-06, "loss": 0.0113128662109375, "step": 119600 }, { "epoch": 1.034189068836413, "grad_norm": 11.349635858212496, "learning_rate": 2.8389652422541325e-06, "loss": 0.15849151611328124, "step": 119605 }, { "epoch": 1.0342323023579563, "grad_norm": 27.951566488165216, "learning_rate": 2.838761803180633e-06, "loss": 0.907720947265625, "step": 119610 }, { "epoch": 1.0342755358794995, "grad_norm": 3.31584289135432, "learning_rate": 2.8385583648507497e-06, "loss": 0.05674057006835938, "step": 119615 }, { "epoch": 1.0343187694010427, "grad_norm": 10.259129171704052, "learning_rate": 2.8383549272654215e-06, "loss": 0.127227783203125, "step": 119620 }, { "epoch": 1.034362002922586, "grad_norm": 71.60658993509328, "learning_rate": 2.8381514904255864e-06, "loss": 0.22966995239257812, "step": 119625 }, { "epoch": 1.0344052364441294, "grad_norm": 5.98420796778199, "learning_rate": 2.8379480543321805e-06, "loss": 0.05713081359863281, "step": 119630 }, { "epoch": 1.0344484699656726, "grad_norm": 5.6992524955805655, "learning_rate": 2.8377446189861452e-06, "loss": 0.16788825988769532, "step": 119635 }, { "epoch": 1.0344917034872159, "grad_norm": 7.463332422925007, "learning_rate": 2.8375411843884174e-06, "loss": 0.11971397399902343, "step": 119640 }, { "epoch": 1.034534937008759, "grad_norm": 0.26959654789865445, "learning_rate": 2.837337750539935e-06, "loss": 0.1742401123046875, "step": 119645 }, { "epoch": 1.0345781705303023, "grad_norm": 2.8176000934004164, "learning_rate": 2.8371343174416372e-06, "loss": 0.0637237548828125, "step": 119650 }, { "epoch": 1.0346214040518456, "grad_norm": 76.46029404078175, "learning_rate": 2.836930885094461e-06, "loss": 0.2378384590148926, "step": 119655 }, { "epoch": 1.034664637573389, "grad_norm": 7.162038450570753, "learning_rate": 2.8367274534993444e-06, "loss": 0.02151031494140625, "step": 119660 }, { "epoch": 1.0347078710949322, "grad_norm": 1.4831739097689827, "learning_rate": 2.8365240226572272e-06, "loss": 0.06006011962890625, "step": 119665 }, { "epoch": 1.0347511046164755, "grad_norm": 14.390321911035713, "learning_rate": 2.836320592569047e-06, "loss": 0.14542884826660157, "step": 119670 }, { "epoch": 1.0347943381380187, "grad_norm": 1.371195184364631, "learning_rate": 2.836117163235741e-06, "loss": 0.12047805786132812, "step": 119675 }, { "epoch": 1.034837571659562, "grad_norm": 1.3403656902553056, "learning_rate": 2.835913734658249e-06, "loss": 0.07450084686279297, "step": 119680 }, { "epoch": 1.0348808051811051, "grad_norm": 4.556140883309691, "learning_rate": 2.835710306837508e-06, "loss": 0.10288848876953124, "step": 119685 }, { "epoch": 1.0349240387026484, "grad_norm": 7.350670132487691, "learning_rate": 2.835506879774457e-06, "loss": 0.02337226867675781, "step": 119690 }, { "epoch": 1.0349672722241918, "grad_norm": 0.881497113961311, "learning_rate": 2.835303453470032e-06, "loss": 0.11637229919433593, "step": 119695 }, { "epoch": 1.035010505745735, "grad_norm": 0.9923456496845375, "learning_rate": 2.8351000279251742e-06, "loss": 0.07671585083007812, "step": 119700 }, { "epoch": 1.0350537392672783, "grad_norm": 0.5679470217997827, "learning_rate": 2.8348966031408207e-06, "loss": 0.253936767578125, "step": 119705 }, { "epoch": 1.0350969727888215, "grad_norm": 14.365196468259695, "learning_rate": 2.8346931791179096e-06, "loss": 0.08885650634765625, "step": 119710 }, { "epoch": 1.0351402063103647, "grad_norm": 5.194564153566207, "learning_rate": 2.834489755857379e-06, "loss": 0.16737060546875, "step": 119715 }, { "epoch": 1.035183439831908, "grad_norm": 35.572155946157565, "learning_rate": 2.834286333360166e-06, "loss": 0.1863189697265625, "step": 119720 }, { "epoch": 1.0352266733534514, "grad_norm": 10.452984086354567, "learning_rate": 2.8340829116272097e-06, "loss": 0.04895095825195313, "step": 119725 }, { "epoch": 1.0352699068749946, "grad_norm": 33.4520429004592, "learning_rate": 2.833879490659449e-06, "loss": 0.13789710998535157, "step": 119730 }, { "epoch": 1.0353131403965379, "grad_norm": 1.2477579809219592, "learning_rate": 2.8336760704578216e-06, "loss": 0.0116119384765625, "step": 119735 }, { "epoch": 1.035356373918081, "grad_norm": 0.18881401809109646, "learning_rate": 2.833472651023266e-06, "loss": 0.06068572998046875, "step": 119740 }, { "epoch": 1.0353996074396243, "grad_norm": 0.14203057738568875, "learning_rate": 2.8332692323567193e-06, "loss": 0.13760147094726563, "step": 119745 }, { "epoch": 1.0354428409611676, "grad_norm": 2.5103483592994764, "learning_rate": 2.8330658144591197e-06, "loss": 0.013512420654296874, "step": 119750 }, { "epoch": 1.035486074482711, "grad_norm": 0.1974131318749026, "learning_rate": 2.8328623973314058e-06, "loss": 0.013460540771484375, "step": 119755 }, { "epoch": 1.0355293080042542, "grad_norm": 37.74567111960162, "learning_rate": 2.8326589809745155e-06, "loss": 0.06392669677734375, "step": 119760 }, { "epoch": 1.0355725415257975, "grad_norm": 1.7231422375456042, "learning_rate": 2.8324555653893883e-06, "loss": 0.030286407470703124, "step": 119765 }, { "epoch": 1.0356157750473407, "grad_norm": 30.13400322571714, "learning_rate": 2.8322521505769606e-06, "loss": 0.38556709289550783, "step": 119770 }, { "epoch": 1.035659008568884, "grad_norm": 6.317234955566911, "learning_rate": 2.8320487365381716e-06, "loss": 0.04625396728515625, "step": 119775 }, { "epoch": 1.0357022420904272, "grad_norm": 0.5541468708268735, "learning_rate": 2.8318453232739583e-06, "loss": 0.008762359619140625, "step": 119780 }, { "epoch": 1.0357454756119706, "grad_norm": 0.9672081046678923, "learning_rate": 2.8316419107852604e-06, "loss": 0.09492645263671876, "step": 119785 }, { "epoch": 1.0357887091335138, "grad_norm": 1.005807922491998, "learning_rate": 2.8314384990730135e-06, "loss": 0.19024810791015626, "step": 119790 }, { "epoch": 1.035831942655057, "grad_norm": 4.459613411392652, "learning_rate": 2.8312350881381586e-06, "loss": 0.04963226318359375, "step": 119795 }, { "epoch": 1.0358751761766003, "grad_norm": 17.918939355062328, "learning_rate": 2.831031677981633e-06, "loss": 0.05235157012939453, "step": 119800 }, { "epoch": 1.0359184096981435, "grad_norm": 7.717488438746545, "learning_rate": 2.8308282686043742e-06, "loss": 0.018918228149414063, "step": 119805 }, { "epoch": 1.0359616432196868, "grad_norm": 2.1423613717644785, "learning_rate": 2.8306248600073197e-06, "loss": 0.121795654296875, "step": 119810 }, { "epoch": 1.03600487674123, "grad_norm": 21.683739322779278, "learning_rate": 2.8304214521914086e-06, "loss": 0.14729537963867187, "step": 119815 }, { "epoch": 1.0360481102627734, "grad_norm": 3.247651786352078, "learning_rate": 2.830218045157578e-06, "loss": 0.1324676513671875, "step": 119820 }, { "epoch": 1.0360913437843167, "grad_norm": 1.0356702584534268, "learning_rate": 2.8300146389067678e-06, "loss": 0.122332763671875, "step": 119825 }, { "epoch": 1.03613457730586, "grad_norm": 7.995031548870503, "learning_rate": 2.8298112334399154e-06, "loss": 0.26513671875, "step": 119830 }, { "epoch": 1.0361778108274031, "grad_norm": 6.597150670999425, "learning_rate": 2.8296078287579585e-06, "loss": 0.084283447265625, "step": 119835 }, { "epoch": 1.0362210443489464, "grad_norm": 8.16150537501664, "learning_rate": 2.8294044248618343e-06, "loss": 0.031789398193359374, "step": 119840 }, { "epoch": 1.0362642778704896, "grad_norm": 1.507359154066073, "learning_rate": 2.829201021752483e-06, "loss": 0.03361663818359375, "step": 119845 }, { "epoch": 1.036307511392033, "grad_norm": 7.1946771611965055, "learning_rate": 2.8289976194308394e-06, "loss": 0.0366485595703125, "step": 119850 }, { "epoch": 1.0363507449135763, "grad_norm": 0.5861078228075655, "learning_rate": 2.8287942178978456e-06, "loss": 0.08806686401367188, "step": 119855 }, { "epoch": 1.0363939784351195, "grad_norm": 4.250799142401954, "learning_rate": 2.8285908171544378e-06, "loss": 0.08134002685546875, "step": 119860 }, { "epoch": 1.0364372119566627, "grad_norm": 1.054433207538985, "learning_rate": 2.8283874172015535e-06, "loss": 0.016709136962890624, "step": 119865 }, { "epoch": 1.036480445478206, "grad_norm": 33.15625846549751, "learning_rate": 2.828184018040131e-06, "loss": 0.1451631546020508, "step": 119870 }, { "epoch": 1.0365236789997492, "grad_norm": 5.323500689052748, "learning_rate": 2.827980619671109e-06, "loss": 0.025202178955078126, "step": 119875 }, { "epoch": 1.0365669125212924, "grad_norm": 2.1215147387724826, "learning_rate": 2.827777222095425e-06, "loss": 0.019424057006835936, "step": 119880 }, { "epoch": 1.0366101460428359, "grad_norm": 14.926710792831374, "learning_rate": 2.8275738253140164e-06, "loss": 0.11218719482421875, "step": 119885 }, { "epoch": 1.036653379564379, "grad_norm": 1.3205975899933733, "learning_rate": 2.8273704293278234e-06, "loss": 0.16005439758300782, "step": 119890 }, { "epoch": 1.0366966130859223, "grad_norm": 4.319395109207865, "learning_rate": 2.827167034137782e-06, "loss": 0.21625595092773436, "step": 119895 }, { "epoch": 1.0367398466074655, "grad_norm": 0.5836880538267022, "learning_rate": 2.8269636397448314e-06, "loss": 0.132586669921875, "step": 119900 }, { "epoch": 1.0367830801290088, "grad_norm": 14.466036354658659, "learning_rate": 2.8267602461499094e-06, "loss": 0.3130840301513672, "step": 119905 }, { "epoch": 1.036826313650552, "grad_norm": 6.763824847356797, "learning_rate": 2.8265568533539537e-06, "loss": 0.057447052001953124, "step": 119910 }, { "epoch": 1.0368695471720955, "grad_norm": 10.938549324179869, "learning_rate": 2.8263534613579017e-06, "loss": 0.046506500244140624, "step": 119915 }, { "epoch": 1.0369127806936387, "grad_norm": 0.788851508297601, "learning_rate": 2.826150070162693e-06, "loss": 0.07568817138671875, "step": 119920 }, { "epoch": 1.036956014215182, "grad_norm": 22.239330392936548, "learning_rate": 2.825946679769264e-06, "loss": 0.097015380859375, "step": 119925 }, { "epoch": 1.0369992477367251, "grad_norm": 2.119836099397344, "learning_rate": 2.8257432901785546e-06, "loss": 0.01478729248046875, "step": 119930 }, { "epoch": 1.0370424812582684, "grad_norm": 0.20016225792056905, "learning_rate": 2.8255399013915013e-06, "loss": 0.06297779083251953, "step": 119935 }, { "epoch": 1.0370857147798116, "grad_norm": 0.11532698513942673, "learning_rate": 2.8253365134090425e-06, "loss": 0.10739822387695312, "step": 119940 }, { "epoch": 1.0371289483013548, "grad_norm": 0.2700624605753366, "learning_rate": 2.8251331262321154e-06, "loss": 0.05680694580078125, "step": 119945 }, { "epoch": 1.0371721818228983, "grad_norm": 52.08390400859261, "learning_rate": 2.82492973986166e-06, "loss": 0.24676666259765626, "step": 119950 }, { "epoch": 1.0372154153444415, "grad_norm": 0.03626703019692749, "learning_rate": 2.824726354298613e-06, "loss": 0.041334915161132815, "step": 119955 }, { "epoch": 1.0372586488659847, "grad_norm": 4.238793344607509, "learning_rate": 2.8245229695439125e-06, "loss": 0.1048248291015625, "step": 119960 }, { "epoch": 1.037301882387528, "grad_norm": 6.825421086623924, "learning_rate": 2.824319585598497e-06, "loss": 0.032666015625, "step": 119965 }, { "epoch": 1.0373451159090712, "grad_norm": 10.01951323034521, "learning_rate": 2.8241162024633036e-06, "loss": 0.0763519287109375, "step": 119970 }, { "epoch": 1.0373883494306144, "grad_norm": 0.15749509435999487, "learning_rate": 2.8239128201392705e-06, "loss": 0.007746315002441407, "step": 119975 }, { "epoch": 1.0374315829521579, "grad_norm": 19.324680403150346, "learning_rate": 2.8237094386273358e-06, "loss": 0.1105712890625, "step": 119980 }, { "epoch": 1.037474816473701, "grad_norm": 0.12520757059489537, "learning_rate": 2.8235060579284375e-06, "loss": 0.05906982421875, "step": 119985 }, { "epoch": 1.0375180499952443, "grad_norm": 0.442626200791598, "learning_rate": 2.8233026780435145e-06, "loss": 0.13734893798828124, "step": 119990 }, { "epoch": 1.0375612835167876, "grad_norm": 34.9137846647494, "learning_rate": 2.8230992989735038e-06, "loss": 0.16889495849609376, "step": 119995 }, { "epoch": 1.0376045170383308, "grad_norm": 3.0958689613198365, "learning_rate": 2.8228959207193433e-06, "loss": 0.23533592224121094, "step": 120000 }, { "epoch": 1.0376045170383308, "eval_loss": 0.13517992198467255, "eval_margin": 0.14652018249034882, "eval_mean_neg": -0.0009239530190825462, "eval_mean_pos": 0.7222235202789307, "eval_runtime": 20.7958, "eval_samples_per_second": 11.108, "eval_steps_per_second": 5.578, "step": 120000 }, { "epoch": 1.037647750559874, "grad_norm": 3.758484622228887, "learning_rate": 2.8226925432819707e-06, "loss": 0.07839126586914062, "step": 120005 }, { "epoch": 1.0376909840814175, "grad_norm": 30.62842515193429, "learning_rate": 2.8224891666623247e-06, "loss": 0.08335914611816406, "step": 120010 }, { "epoch": 1.0377342176029607, "grad_norm": 0.7712561087308862, "learning_rate": 2.8222857908613427e-06, "loss": 0.019338607788085938, "step": 120015 }, { "epoch": 1.037777451124504, "grad_norm": 2.6492082502965557, "learning_rate": 2.822082415879964e-06, "loss": 0.07061767578125, "step": 120020 }, { "epoch": 1.0378206846460472, "grad_norm": 12.189825808058579, "learning_rate": 2.8218790417191248e-06, "loss": 0.04939422607421875, "step": 120025 }, { "epoch": 1.0378639181675904, "grad_norm": 3.819783992389764, "learning_rate": 2.8216756683797637e-06, "loss": 0.1797271728515625, "step": 120030 }, { "epoch": 1.0379071516891336, "grad_norm": 1.4449478452973585, "learning_rate": 2.8214722958628182e-06, "loss": 0.011708450317382813, "step": 120035 }, { "epoch": 1.037950385210677, "grad_norm": 21.04180485374587, "learning_rate": 2.8212689241692278e-06, "loss": 0.06831703186035157, "step": 120040 }, { "epoch": 1.0379936187322203, "grad_norm": 1.7262940806761988, "learning_rate": 2.821065553299928e-06, "loss": 0.030876922607421874, "step": 120045 }, { "epoch": 1.0380368522537635, "grad_norm": 0.05260869081677538, "learning_rate": 2.8208621832558587e-06, "loss": 0.060344696044921875, "step": 120050 }, { "epoch": 1.0380800857753067, "grad_norm": 0.5109183829803138, "learning_rate": 2.8206588140379575e-06, "loss": 0.09465484619140625, "step": 120055 }, { "epoch": 1.03812331929685, "grad_norm": 3.4125485450154085, "learning_rate": 2.820455445647162e-06, "loss": 0.04867076873779297, "step": 120060 }, { "epoch": 1.0381665528183932, "grad_norm": 1.7574238906136899, "learning_rate": 2.8202520780844093e-06, "loss": 0.058118438720703124, "step": 120065 }, { "epoch": 1.0382097863399364, "grad_norm": 5.763715008530023, "learning_rate": 2.8200487113506393e-06, "loss": 0.26209259033203125, "step": 120070 }, { "epoch": 1.0382530198614799, "grad_norm": 6.708953958642827, "learning_rate": 2.819845345446787e-06, "loss": 0.02850513458251953, "step": 120075 }, { "epoch": 1.0382962533830231, "grad_norm": 2.541157374689812, "learning_rate": 2.8196419803737934e-06, "loss": 0.1476531982421875, "step": 120080 }, { "epoch": 1.0383394869045663, "grad_norm": 0.6896935481442719, "learning_rate": 2.8194386161325953e-06, "loss": 0.063323974609375, "step": 120085 }, { "epoch": 1.0383827204261096, "grad_norm": 4.786876222459523, "learning_rate": 2.81923525272413e-06, "loss": 0.029394149780273438, "step": 120090 }, { "epoch": 1.0384259539476528, "grad_norm": 7.776525683251715, "learning_rate": 2.8190318901493356e-06, "loss": 0.04182586669921875, "step": 120095 }, { "epoch": 1.038469187469196, "grad_norm": 9.138849454064582, "learning_rate": 2.8188285284091505e-06, "loss": 0.0567901611328125, "step": 120100 }, { "epoch": 1.0385124209907395, "grad_norm": 1.5071196249450578, "learning_rate": 2.818625167504511e-06, "loss": 0.009704208374023438, "step": 120105 }, { "epoch": 1.0385556545122827, "grad_norm": 14.980071243290846, "learning_rate": 2.818421807436357e-06, "loss": 0.0542266845703125, "step": 120110 }, { "epoch": 1.038598888033826, "grad_norm": 11.265086641228041, "learning_rate": 2.818218448205626e-06, "loss": 0.42752609252929685, "step": 120115 }, { "epoch": 1.0386421215553692, "grad_norm": 0.19776568444833764, "learning_rate": 2.818015089813255e-06, "loss": 0.10343475341796875, "step": 120120 }, { "epoch": 1.0386853550769124, "grad_norm": 17.7399608797649, "learning_rate": 2.8178117322601824e-06, "loss": 0.03897781372070312, "step": 120125 }, { "epoch": 1.0387285885984556, "grad_norm": 4.201532088372679, "learning_rate": 2.8176083755473467e-06, "loss": 0.04563827514648437, "step": 120130 }, { "epoch": 1.0387718221199989, "grad_norm": 9.071426104301533, "learning_rate": 2.817405019675684e-06, "loss": 0.104937744140625, "step": 120135 }, { "epoch": 1.0388150556415423, "grad_norm": 3.0574548731275177, "learning_rate": 2.8172016646461333e-06, "loss": 0.041461944580078125, "step": 120140 }, { "epoch": 1.0388582891630855, "grad_norm": 8.888470748169595, "learning_rate": 2.8169983104596326e-06, "loss": 0.115423583984375, "step": 120145 }, { "epoch": 1.0389015226846288, "grad_norm": 18.605204883928753, "learning_rate": 2.8167949571171193e-06, "loss": 0.34874114990234373, "step": 120150 }, { "epoch": 1.038944756206172, "grad_norm": 4.881662269878321, "learning_rate": 2.816591604619532e-06, "loss": 0.13801193237304688, "step": 120155 }, { "epoch": 1.0389879897277152, "grad_norm": 3.708220062697582, "learning_rate": 2.8163882529678084e-06, "loss": 0.028558349609375, "step": 120160 }, { "epoch": 1.0390312232492585, "grad_norm": 17.2315615951137, "learning_rate": 2.8161849021628856e-06, "loss": 0.106353759765625, "step": 120165 }, { "epoch": 1.039074456770802, "grad_norm": 16.792578409539406, "learning_rate": 2.8159815522057e-06, "loss": 0.10392837524414063, "step": 120170 }, { "epoch": 1.0391176902923451, "grad_norm": 11.279758960124346, "learning_rate": 2.8157782030971932e-06, "loss": 0.10406723022460937, "step": 120175 }, { "epoch": 1.0391609238138884, "grad_norm": 6.531287723824039, "learning_rate": 2.8155748548383003e-06, "loss": 0.11389923095703125, "step": 120180 }, { "epoch": 1.0392041573354316, "grad_norm": 0.6141124283530944, "learning_rate": 2.8153715074299605e-06, "loss": 0.09961090087890626, "step": 120185 }, { "epoch": 1.0392473908569748, "grad_norm": 0.7038608574326773, "learning_rate": 2.8151681608731113e-06, "loss": 0.043691253662109374, "step": 120190 }, { "epoch": 1.039290624378518, "grad_norm": 0.33067150362586745, "learning_rate": 2.8149648151686897e-06, "loss": 0.027199172973632814, "step": 120195 }, { "epoch": 1.0393338579000615, "grad_norm": 7.254804250684105, "learning_rate": 2.814761470317633e-06, "loss": 0.084442138671875, "step": 120200 }, { "epoch": 1.0393770914216047, "grad_norm": 9.484367839950298, "learning_rate": 2.814558126320881e-06, "loss": 0.11338424682617188, "step": 120205 }, { "epoch": 1.039420324943148, "grad_norm": 10.792695541609406, "learning_rate": 2.81435478317937e-06, "loss": 0.0564666748046875, "step": 120210 }, { "epoch": 1.0394635584646912, "grad_norm": 1.1038108649965663, "learning_rate": 2.814151440894039e-06, "loss": 0.044864654541015625, "step": 120215 }, { "epoch": 1.0395067919862344, "grad_norm": 10.114578609463411, "learning_rate": 2.8139480994658253e-06, "loss": 0.10394287109375, "step": 120220 }, { "epoch": 1.0395500255077776, "grad_norm": 7.0260545160764725, "learning_rate": 2.8137447588956664e-06, "loss": 0.14940376281738282, "step": 120225 }, { "epoch": 1.0395932590293209, "grad_norm": 0.4287624136810076, "learning_rate": 2.8135414191844997e-06, "loss": 0.0070781707763671875, "step": 120230 }, { "epoch": 1.0396364925508643, "grad_norm": 2.897600156973523, "learning_rate": 2.8133380803332633e-06, "loss": 0.0357574462890625, "step": 120235 }, { "epoch": 1.0396797260724076, "grad_norm": 0.8983161887604718, "learning_rate": 2.8131347423428953e-06, "loss": 0.021704483032226562, "step": 120240 }, { "epoch": 1.0397229595939508, "grad_norm": 4.037839867864967, "learning_rate": 2.812931405214334e-06, "loss": 0.08314285278320313, "step": 120245 }, { "epoch": 1.039766193115494, "grad_norm": 3.02111409157195, "learning_rate": 2.8127280689485163e-06, "loss": 0.02332725524902344, "step": 120250 }, { "epoch": 1.0398094266370372, "grad_norm": 1.4424908803595071, "learning_rate": 2.8125247335463807e-06, "loss": 0.047149276733398436, "step": 120255 }, { "epoch": 1.0398526601585805, "grad_norm": 17.61855469256781, "learning_rate": 2.8123213990088637e-06, "loss": 0.20599365234375, "step": 120260 }, { "epoch": 1.039895893680124, "grad_norm": 35.359831114575954, "learning_rate": 2.8121180653369035e-06, "loss": 0.07044792175292969, "step": 120265 }, { "epoch": 1.0399391272016671, "grad_norm": 50.246636221215276, "learning_rate": 2.8119147325314384e-06, "loss": 0.37118377685546877, "step": 120270 }, { "epoch": 1.0399823607232104, "grad_norm": 11.28551819549463, "learning_rate": 2.8117114005934067e-06, "loss": 0.3004852294921875, "step": 120275 }, { "epoch": 1.0400255942447536, "grad_norm": 0.521380576439282, "learning_rate": 2.811508069523745e-06, "loss": 0.0198089599609375, "step": 120280 }, { "epoch": 1.0400688277662968, "grad_norm": 5.268236463743778, "learning_rate": 2.8113047393233917e-06, "loss": 0.015381622314453124, "step": 120285 }, { "epoch": 1.04011206128784, "grad_norm": 0.29482170647799105, "learning_rate": 2.8111014099932838e-06, "loss": 0.071954345703125, "step": 120290 }, { "epoch": 1.0401552948093835, "grad_norm": 9.26917689820199, "learning_rate": 2.810898081534358e-06, "loss": 0.0831155776977539, "step": 120295 }, { "epoch": 1.0401985283309267, "grad_norm": 0.41300393687299375, "learning_rate": 2.810694753947556e-06, "loss": 0.0759185791015625, "step": 120300 }, { "epoch": 1.04024176185247, "grad_norm": 0.030536335557624868, "learning_rate": 2.8104914272338124e-06, "loss": 0.10467567443847656, "step": 120305 }, { "epoch": 1.0402849953740132, "grad_norm": 0.8015512789355186, "learning_rate": 2.810288101394066e-06, "loss": 0.05371017456054687, "step": 120310 }, { "epoch": 1.0403282288955564, "grad_norm": 20.41545183128068, "learning_rate": 2.8100847764292533e-06, "loss": 0.15804595947265626, "step": 120315 }, { "epoch": 1.0403714624170997, "grad_norm": 0.5489452423174428, "learning_rate": 2.809881452340313e-06, "loss": 0.026070213317871092, "step": 120320 }, { "epoch": 1.0404146959386429, "grad_norm": 2.518359489821419, "learning_rate": 2.809678129128183e-06, "loss": 0.18414688110351562, "step": 120325 }, { "epoch": 1.0404579294601863, "grad_norm": 25.644408762689068, "learning_rate": 2.8094748067937993e-06, "loss": 0.23718719482421874, "step": 120330 }, { "epoch": 1.0405011629817296, "grad_norm": 52.805031491407945, "learning_rate": 2.8092714853381022e-06, "loss": 0.1817047119140625, "step": 120335 }, { "epoch": 1.0405443965032728, "grad_norm": 17.484443903133048, "learning_rate": 2.809068164762028e-06, "loss": 0.06626739501953124, "step": 120340 }, { "epoch": 1.040587630024816, "grad_norm": 4.830895215752722, "learning_rate": 2.8088648450665143e-06, "loss": 0.02755107879638672, "step": 120345 }, { "epoch": 1.0406308635463593, "grad_norm": 3.5069610919082304, "learning_rate": 2.8086615262524996e-06, "loss": 0.14202880859375, "step": 120350 }, { "epoch": 1.0406740970679025, "grad_norm": 28.784879880689576, "learning_rate": 2.808458208320921e-06, "loss": 0.13556747436523436, "step": 120355 }, { "epoch": 1.040717330589446, "grad_norm": 19.88586412371449, "learning_rate": 2.808254891272715e-06, "loss": 0.211944580078125, "step": 120360 }, { "epoch": 1.0407605641109892, "grad_norm": 26.23128144329791, "learning_rate": 2.8080515751088218e-06, "loss": 0.3067008972167969, "step": 120365 }, { "epoch": 1.0408037976325324, "grad_norm": 0.08168046735269052, "learning_rate": 2.8078482598301777e-06, "loss": 0.02377777099609375, "step": 120370 }, { "epoch": 1.0408470311540756, "grad_norm": 0.7933656345925004, "learning_rate": 2.8076449454377194e-06, "loss": 0.01101226806640625, "step": 120375 }, { "epoch": 1.0408902646756188, "grad_norm": 2.686393039797254, "learning_rate": 2.8074416319323867e-06, "loss": 0.0163970947265625, "step": 120380 }, { "epoch": 1.040933498197162, "grad_norm": 13.618379277017302, "learning_rate": 2.8072383193151163e-06, "loss": 0.110198974609375, "step": 120385 }, { "epoch": 1.0409767317187053, "grad_norm": 1.0118533009053647, "learning_rate": 2.807035007586845e-06, "loss": 0.02161684036254883, "step": 120390 }, { "epoch": 1.0410199652402488, "grad_norm": 7.134027872562755, "learning_rate": 2.806831696748511e-06, "loss": 0.05074920654296875, "step": 120395 }, { "epoch": 1.041063198761792, "grad_norm": 12.744187628919823, "learning_rate": 2.806628386801053e-06, "loss": 0.035144805908203125, "step": 120400 }, { "epoch": 1.0411064322833352, "grad_norm": 0.23082522009672551, "learning_rate": 2.806425077745407e-06, "loss": 0.194866943359375, "step": 120405 }, { "epoch": 1.0411496658048784, "grad_norm": 0.2506277040464577, "learning_rate": 2.806221769582512e-06, "loss": 0.12799835205078125, "step": 120410 }, { "epoch": 1.0411928993264217, "grad_norm": 0.5109492980077516, "learning_rate": 2.8060184623133054e-06, "loss": 0.10032501220703124, "step": 120415 }, { "epoch": 1.041236132847965, "grad_norm": 25.47637385686419, "learning_rate": 2.805815155938724e-06, "loss": 0.10277862548828125, "step": 120420 }, { "epoch": 1.0412793663695084, "grad_norm": 10.271611059501144, "learning_rate": 2.805611850459705e-06, "loss": 0.10009765625, "step": 120425 }, { "epoch": 1.0413225998910516, "grad_norm": 0.6357537016474378, "learning_rate": 2.8054085458771884e-06, "loss": 0.017653656005859376, "step": 120430 }, { "epoch": 1.0413658334125948, "grad_norm": 0.20784637179020046, "learning_rate": 2.8052052421921096e-06, "loss": 0.025694656372070312, "step": 120435 }, { "epoch": 1.041409066934138, "grad_norm": 10.869474422812088, "learning_rate": 2.8050019394054075e-06, "loss": 0.03155670166015625, "step": 120440 }, { "epoch": 1.0414523004556813, "grad_norm": 4.575555520245143, "learning_rate": 2.8047986375180194e-06, "loss": 0.0467132568359375, "step": 120445 }, { "epoch": 1.0414955339772245, "grad_norm": 3.496778567969805, "learning_rate": 2.804595336530883e-06, "loss": 0.2555084228515625, "step": 120450 }, { "epoch": 1.041538767498768, "grad_norm": 0.6920685758561366, "learning_rate": 2.804392036444934e-06, "loss": 0.035082244873046876, "step": 120455 }, { "epoch": 1.0415820010203112, "grad_norm": 3.446183624804741, "learning_rate": 2.8041887372611126e-06, "loss": 0.12112884521484375, "step": 120460 }, { "epoch": 1.0416252345418544, "grad_norm": 9.109781634060456, "learning_rate": 2.803985438980355e-06, "loss": 0.11476593017578125, "step": 120465 }, { "epoch": 1.0416684680633976, "grad_norm": 2.749880606321942, "learning_rate": 2.8037821416036004e-06, "loss": 0.02244415283203125, "step": 120470 }, { "epoch": 1.0417117015849409, "grad_norm": 15.034649659070917, "learning_rate": 2.8035788451317847e-06, "loss": 0.060894775390625, "step": 120475 }, { "epoch": 1.041754935106484, "grad_norm": 3.2004763669813885, "learning_rate": 2.8033755495658462e-06, "loss": 0.032117462158203124, "step": 120480 }, { "epoch": 1.0417981686280273, "grad_norm": 2.658322262492098, "learning_rate": 2.8031722549067217e-06, "loss": 0.02196807861328125, "step": 120485 }, { "epoch": 1.0418414021495708, "grad_norm": 0.748798811645675, "learning_rate": 2.8029689611553493e-06, "loss": 0.04915313720703125, "step": 120490 }, { "epoch": 1.041884635671114, "grad_norm": 1.7604789965988998, "learning_rate": 2.8027656683126666e-06, "loss": 0.055425262451171874, "step": 120495 }, { "epoch": 1.0419278691926572, "grad_norm": 10.130991724432286, "learning_rate": 2.802562376379612e-06, "loss": 0.1452606201171875, "step": 120500 }, { "epoch": 1.0419711027142005, "grad_norm": 0.7043085748872987, "learning_rate": 2.802359085357122e-06, "loss": 0.19626312255859374, "step": 120505 }, { "epoch": 1.0420143362357437, "grad_norm": 0.5191248429809104, "learning_rate": 2.802155795246135e-06, "loss": 0.07331619262695313, "step": 120510 }, { "epoch": 1.042057569757287, "grad_norm": 1.0254852238134438, "learning_rate": 2.801952506047587e-06, "loss": 0.026317596435546875, "step": 120515 }, { "epoch": 1.0421008032788304, "grad_norm": 0.14396063662823497, "learning_rate": 2.8017492177624155e-06, "loss": 0.1277761459350586, "step": 120520 }, { "epoch": 1.0421440368003736, "grad_norm": 13.91207124220127, "learning_rate": 2.801545930391561e-06, "loss": 0.10845947265625, "step": 120525 }, { "epoch": 1.0421872703219168, "grad_norm": 9.299986109980539, "learning_rate": 2.8013426439359587e-06, "loss": 0.035947418212890624, "step": 120530 }, { "epoch": 1.04223050384346, "grad_norm": 0.6116603822844622, "learning_rate": 2.801139358396547e-06, "loss": 0.02295379638671875, "step": 120535 }, { "epoch": 1.0422737373650033, "grad_norm": 1.2210768700388996, "learning_rate": 2.8009360737742623e-06, "loss": 0.10462207794189453, "step": 120540 }, { "epoch": 1.0423169708865465, "grad_norm": 23.306485416042555, "learning_rate": 2.8007327900700427e-06, "loss": 0.21067352294921876, "step": 120545 }, { "epoch": 1.04236020440809, "grad_norm": 0.26535459667287536, "learning_rate": 2.800529507284825e-06, "loss": 0.009725189208984375, "step": 120550 }, { "epoch": 1.0424034379296332, "grad_norm": 1.1935631702444207, "learning_rate": 2.8003262254195492e-06, "loss": 0.03560981750488281, "step": 120555 }, { "epoch": 1.0424466714511764, "grad_norm": 1.2471524959297398, "learning_rate": 2.800122944475151e-06, "loss": 0.02021942138671875, "step": 120560 }, { "epoch": 1.0424899049727196, "grad_norm": 7.104347748073964, "learning_rate": 2.7999196644525683e-06, "loss": 0.0303192138671875, "step": 120565 }, { "epoch": 1.0425331384942629, "grad_norm": 0.09245761621789161, "learning_rate": 2.7997163853527377e-06, "loss": 0.004821014404296875, "step": 120570 }, { "epoch": 1.042576372015806, "grad_norm": 5.420196355417348, "learning_rate": 2.7995131071765977e-06, "loss": 0.03329010009765625, "step": 120575 }, { "epoch": 1.0426196055373493, "grad_norm": 1.781882844674845, "learning_rate": 2.799309829925086e-06, "loss": 0.0775421142578125, "step": 120580 }, { "epoch": 1.0426628390588928, "grad_norm": 9.856657489437321, "learning_rate": 2.799106553599138e-06, "loss": 0.055384063720703126, "step": 120585 }, { "epoch": 1.042706072580436, "grad_norm": 16.699555403695708, "learning_rate": 2.7989032781996946e-06, "loss": 0.12867279052734376, "step": 120590 }, { "epoch": 1.0427493061019792, "grad_norm": 19.73177297984558, "learning_rate": 2.7987000037276916e-06, "loss": 0.06372795104980469, "step": 120595 }, { "epoch": 1.0427925396235225, "grad_norm": 8.953385191807483, "learning_rate": 2.798496730184065e-06, "loss": 0.0632354736328125, "step": 120600 }, { "epoch": 1.0428357731450657, "grad_norm": 0.8624121369998723, "learning_rate": 2.798293457569755e-06, "loss": 0.05311737060546875, "step": 120605 }, { "epoch": 1.042879006666609, "grad_norm": 8.242435254663846, "learning_rate": 2.7980901858856973e-06, "loss": 0.08459854125976562, "step": 120610 }, { "epoch": 1.0429222401881524, "grad_norm": 1.0443547987578363, "learning_rate": 2.7978869151328287e-06, "loss": 0.0792144775390625, "step": 120615 }, { "epoch": 1.0429654737096956, "grad_norm": 0.33597974307470957, "learning_rate": 2.797683645312089e-06, "loss": 0.065667724609375, "step": 120620 }, { "epoch": 1.0430087072312388, "grad_norm": 36.58477891615889, "learning_rate": 2.7974803764244146e-06, "loss": 0.10228195190429687, "step": 120625 }, { "epoch": 1.043051940752782, "grad_norm": 4.085754427395597, "learning_rate": 2.797277108470742e-06, "loss": 0.04037685394287109, "step": 120630 }, { "epoch": 1.0430951742743253, "grad_norm": 2.108598694632069, "learning_rate": 2.79707384145201e-06, "loss": 0.0162841796875, "step": 120635 }, { "epoch": 1.0431384077958685, "grad_norm": 6.668574893622657, "learning_rate": 2.796870575369156e-06, "loss": 0.041534805297851564, "step": 120640 }, { "epoch": 1.0431816413174118, "grad_norm": 17.578255727231607, "learning_rate": 2.796667310223115e-06, "loss": 0.050357818603515625, "step": 120645 }, { "epoch": 1.0432248748389552, "grad_norm": 1.0270773277691356, "learning_rate": 2.7964640460148282e-06, "loss": 0.13077392578125, "step": 120650 }, { "epoch": 1.0432681083604984, "grad_norm": 14.510220289867819, "learning_rate": 2.796260782745231e-06, "loss": 0.09631576538085937, "step": 120655 }, { "epoch": 1.0433113418820417, "grad_norm": 4.457561987811152, "learning_rate": 2.7960575204152607e-06, "loss": 0.02949371337890625, "step": 120660 }, { "epoch": 1.043354575403585, "grad_norm": 5.0478177205125805, "learning_rate": 2.7958542590258557e-06, "loss": 0.053897857666015625, "step": 120665 }, { "epoch": 1.0433978089251281, "grad_norm": 41.21426301420299, "learning_rate": 2.7956509985779527e-06, "loss": 0.16044921875, "step": 120670 }, { "epoch": 1.0434410424466714, "grad_norm": 0.13406945865662317, "learning_rate": 2.795447739072489e-06, "loss": 0.08356285095214844, "step": 120675 }, { "epoch": 1.0434842759682148, "grad_norm": 45.24316582482047, "learning_rate": 2.795244480510401e-06, "loss": 0.2128599166870117, "step": 120680 }, { "epoch": 1.043527509489758, "grad_norm": 2.2687774372293457, "learning_rate": 2.795041222892629e-06, "loss": 0.08019218444824219, "step": 120685 }, { "epoch": 1.0435707430113013, "grad_norm": 17.3003272957541, "learning_rate": 2.7948379662201076e-06, "loss": 0.0991851806640625, "step": 120690 }, { "epoch": 1.0436139765328445, "grad_norm": 10.81778750027934, "learning_rate": 2.794634710493777e-06, "loss": 0.08257293701171875, "step": 120695 }, { "epoch": 1.0436572100543877, "grad_norm": 0.6081967630781027, "learning_rate": 2.794431455714572e-06, "loss": 0.2112152099609375, "step": 120700 }, { "epoch": 1.043700443575931, "grad_norm": 0.9162973062384497, "learning_rate": 2.794228201883431e-06, "loss": 0.032520103454589847, "step": 120705 }, { "epoch": 1.0437436770974744, "grad_norm": 21.852360905958097, "learning_rate": 2.7940249490012905e-06, "loss": 0.09497909545898438, "step": 120710 }, { "epoch": 1.0437869106190176, "grad_norm": 1.7071467089461778, "learning_rate": 2.79382169706909e-06, "loss": 0.030229568481445312, "step": 120715 }, { "epoch": 1.0438301441405609, "grad_norm": 22.05605673480899, "learning_rate": 2.793618446087765e-06, "loss": 0.09074573516845703, "step": 120720 }, { "epoch": 1.043873377662104, "grad_norm": 47.76296761207495, "learning_rate": 2.7934151960582537e-06, "loss": 0.13166465759277343, "step": 120725 }, { "epoch": 1.0439166111836473, "grad_norm": 3.9329565404573774, "learning_rate": 2.7932119469814936e-06, "loss": 0.07058181762695312, "step": 120730 }, { "epoch": 1.0439598447051905, "grad_norm": 0.25015971105152346, "learning_rate": 2.793008698858422e-06, "loss": 0.0824249267578125, "step": 120735 }, { "epoch": 1.044003078226734, "grad_norm": 5.532501437579403, "learning_rate": 2.7928054516899746e-06, "loss": 0.03624420166015625, "step": 120740 }, { "epoch": 1.0440463117482772, "grad_norm": 15.883618586966978, "learning_rate": 2.79260220547709e-06, "loss": 0.2999855041503906, "step": 120745 }, { "epoch": 1.0440895452698205, "grad_norm": 0.16087549953975483, "learning_rate": 2.792398960220707e-06, "loss": 0.07947006225585937, "step": 120750 }, { "epoch": 1.0441327787913637, "grad_norm": 5.096733167235445, "learning_rate": 2.7921957159217615e-06, "loss": 0.0816162109375, "step": 120755 }, { "epoch": 1.044176012312907, "grad_norm": 2.7234386765018845, "learning_rate": 2.791992472581191e-06, "loss": 0.07242507934570312, "step": 120760 }, { "epoch": 1.0442192458344501, "grad_norm": 0.30578735083781156, "learning_rate": 2.791789230199932e-06, "loss": 0.12719573974609374, "step": 120765 }, { "epoch": 1.0442624793559934, "grad_norm": 0.3592785167073694, "learning_rate": 2.7915859887789235e-06, "loss": 0.0163604736328125, "step": 120770 }, { "epoch": 1.0443057128775368, "grad_norm": 0.8221113162883924, "learning_rate": 2.791382748319101e-06, "loss": 0.01563568115234375, "step": 120775 }, { "epoch": 1.04434894639908, "grad_norm": 14.681976383424166, "learning_rate": 2.7911795088214037e-06, "loss": 0.041796875, "step": 120780 }, { "epoch": 1.0443921799206233, "grad_norm": 3.952496777314378, "learning_rate": 2.7909762702867687e-06, "loss": 0.039928436279296875, "step": 120785 }, { "epoch": 1.0444354134421665, "grad_norm": 63.83011205719171, "learning_rate": 2.790773032716132e-06, "loss": 0.3253168106079102, "step": 120790 }, { "epoch": 1.0444786469637097, "grad_norm": 5.141132367089252, "learning_rate": 2.7905697961104312e-06, "loss": 0.03634910583496094, "step": 120795 }, { "epoch": 1.044521880485253, "grad_norm": 0.12723564188597353, "learning_rate": 2.7903665604706046e-06, "loss": 0.23084564208984376, "step": 120800 }, { "epoch": 1.0445651140067964, "grad_norm": 15.051230109448076, "learning_rate": 2.7901633257975876e-06, "loss": 0.10924530029296875, "step": 120805 }, { "epoch": 1.0446083475283396, "grad_norm": 0.1066907075142546, "learning_rate": 2.78996009209232e-06, "loss": 0.03660392761230469, "step": 120810 }, { "epoch": 1.0446515810498829, "grad_norm": 1.507381779862129, "learning_rate": 2.7897568593557384e-06, "loss": 0.08274078369140625, "step": 120815 }, { "epoch": 1.044694814571426, "grad_norm": 0.82493004604685, "learning_rate": 2.789553627588779e-06, "loss": 0.09429855346679687, "step": 120820 }, { "epoch": 1.0447380480929693, "grad_norm": 0.8376281061195896, "learning_rate": 2.7893503967923794e-06, "loss": 0.0595428466796875, "step": 120825 }, { "epoch": 1.0447812816145126, "grad_norm": 22.5858523275165, "learning_rate": 2.7891471669674775e-06, "loss": 0.07495689392089844, "step": 120830 }, { "epoch": 1.0448245151360558, "grad_norm": 7.603630012732212, "learning_rate": 2.78894393811501e-06, "loss": 0.021375656127929688, "step": 120835 }, { "epoch": 1.0448677486575992, "grad_norm": 23.475558126397175, "learning_rate": 2.7887407102359137e-06, "loss": 0.0947052001953125, "step": 120840 }, { "epoch": 1.0449109821791425, "grad_norm": 1.1772017217473343, "learning_rate": 2.788537483331128e-06, "loss": 0.045875930786132814, "step": 120845 }, { "epoch": 1.0449542157006857, "grad_norm": 2.028873451877049, "learning_rate": 2.788334257401588e-06, "loss": 0.05172996520996094, "step": 120850 }, { "epoch": 1.044997449222229, "grad_norm": 7.694078832620013, "learning_rate": 2.7881310324482316e-06, "loss": 0.02698516845703125, "step": 120855 }, { "epoch": 1.0450406827437722, "grad_norm": 0.9666041243093221, "learning_rate": 2.7879278084719965e-06, "loss": 0.06391258239746093, "step": 120860 }, { "epoch": 1.0450839162653154, "grad_norm": 0.1544000725666927, "learning_rate": 2.78772458547382e-06, "loss": 0.07435302734375, "step": 120865 }, { "epoch": 1.0451271497868588, "grad_norm": 8.130969749101766, "learning_rate": 2.787521363454637e-06, "loss": 0.09649524688720704, "step": 120870 }, { "epoch": 1.045170383308402, "grad_norm": 8.4467091408711, "learning_rate": 2.7873181424153887e-06, "loss": 0.1283050537109375, "step": 120875 }, { "epoch": 1.0452136168299453, "grad_norm": 0.9068443739443299, "learning_rate": 2.78711492235701e-06, "loss": 0.03456802368164062, "step": 120880 }, { "epoch": 1.0452568503514885, "grad_norm": 0.1685421489803384, "learning_rate": 2.786911703280438e-06, "loss": 0.013570404052734375, "step": 120885 }, { "epoch": 1.0453000838730317, "grad_norm": 0.02967547281679752, "learning_rate": 2.786708485186611e-06, "loss": 0.14961185455322265, "step": 120890 }, { "epoch": 1.045343317394575, "grad_norm": 0.33358912691158976, "learning_rate": 2.7865052680764654e-06, "loss": 0.03623046875, "step": 120895 }, { "epoch": 1.0453865509161184, "grad_norm": 0.36301366567648646, "learning_rate": 2.7863020519509375e-06, "loss": 0.005894088745117187, "step": 120900 }, { "epoch": 1.0454297844376617, "grad_norm": 1.4902965709412517, "learning_rate": 2.7860988368109666e-06, "loss": 0.047498321533203124, "step": 120905 }, { "epoch": 1.0454730179592049, "grad_norm": 0.5369610402018761, "learning_rate": 2.7858956226574897e-06, "loss": 0.16210670471191407, "step": 120910 }, { "epoch": 1.0455162514807481, "grad_norm": 4.557785169704988, "learning_rate": 2.785692409491442e-06, "loss": 0.035498046875, "step": 120915 }, { "epoch": 1.0455594850022913, "grad_norm": 6.098858674626224, "learning_rate": 2.785489197313763e-06, "loss": 0.1164306640625, "step": 120920 }, { "epoch": 1.0456027185238346, "grad_norm": 0.18748602088278615, "learning_rate": 2.7852859861253884e-06, "loss": 0.2350902557373047, "step": 120925 }, { "epoch": 1.0456459520453778, "grad_norm": 0.7115797628412397, "learning_rate": 2.7850827759272564e-06, "loss": 0.0419403076171875, "step": 120930 }, { "epoch": 1.0456891855669213, "grad_norm": 0.1184794174620711, "learning_rate": 2.784879566720302e-06, "loss": 0.01658973693847656, "step": 120935 }, { "epoch": 1.0457324190884645, "grad_norm": 0.7056141821788505, "learning_rate": 2.7846763585054654e-06, "loss": 0.06658248901367188, "step": 120940 }, { "epoch": 1.0457756526100077, "grad_norm": 10.25429910718487, "learning_rate": 2.784473151283682e-06, "loss": 0.17139778137207032, "step": 120945 }, { "epoch": 1.045818886131551, "grad_norm": 2.9761503195405825, "learning_rate": 2.7842699450558892e-06, "loss": 0.0830352783203125, "step": 120950 }, { "epoch": 1.0458621196530942, "grad_norm": 0.7887486973391529, "learning_rate": 2.7840667398230254e-06, "loss": 0.031966018676757815, "step": 120955 }, { "epoch": 1.0459053531746374, "grad_norm": 0.5118853195194972, "learning_rate": 2.783863535586026e-06, "loss": 0.02886505126953125, "step": 120960 }, { "epoch": 1.0459485866961808, "grad_norm": 3.921842304251346, "learning_rate": 2.7836603323458277e-06, "loss": 0.03918266296386719, "step": 120965 }, { "epoch": 1.045991820217724, "grad_norm": 10.339469543145393, "learning_rate": 2.78345713010337e-06, "loss": 0.031402206420898436, "step": 120970 }, { "epoch": 1.0460350537392673, "grad_norm": 0.167221867609818, "learning_rate": 2.7832539288595888e-06, "loss": 0.02039680480957031, "step": 120975 }, { "epoch": 1.0460782872608105, "grad_norm": 0.17886527604707575, "learning_rate": 2.783050728615422e-06, "loss": 0.08262653350830078, "step": 120980 }, { "epoch": 1.0461215207823538, "grad_norm": 3.9792994613775057, "learning_rate": 2.7828475293718057e-06, "loss": 0.037933731079101564, "step": 120985 }, { "epoch": 1.046164754303897, "grad_norm": 1.3392878365437095, "learning_rate": 2.7826443311296767e-06, "loss": 0.13212051391601562, "step": 120990 }, { "epoch": 1.0462079878254404, "grad_norm": 5.365086935030813, "learning_rate": 2.782441133889973e-06, "loss": 0.025521469116210938, "step": 120995 }, { "epoch": 1.0462512213469837, "grad_norm": 16.51227814370667, "learning_rate": 2.7822379376536317e-06, "loss": 0.148065185546875, "step": 121000 }, { "epoch": 1.046294454868527, "grad_norm": 1.4744540617774349, "learning_rate": 2.7820347424215906e-06, "loss": 0.019735336303710938, "step": 121005 }, { "epoch": 1.0463376883900701, "grad_norm": 35.43819548578619, "learning_rate": 2.7818315481947853e-06, "loss": 0.13125, "step": 121010 }, { "epoch": 1.0463809219116134, "grad_norm": 1.8127312182520263, "learning_rate": 2.7816283549741543e-06, "loss": 0.03150634765625, "step": 121015 }, { "epoch": 1.0464241554331566, "grad_norm": 5.964536567904252, "learning_rate": 2.781425162760633e-06, "loss": 0.04188098907470703, "step": 121020 }, { "epoch": 1.0464673889546998, "grad_norm": 1.3761678478496198, "learning_rate": 2.78122197155516e-06, "loss": 0.13218994140625, "step": 121025 }, { "epoch": 1.0465106224762433, "grad_norm": 0.5551370804350141, "learning_rate": 2.781018781358671e-06, "loss": 0.03425445556640625, "step": 121030 }, { "epoch": 1.0465538559977865, "grad_norm": 0.9789320277372743, "learning_rate": 2.780815592172105e-06, "loss": 0.07256240844726562, "step": 121035 }, { "epoch": 1.0465970895193297, "grad_norm": 1.1676335298974514, "learning_rate": 2.7806124039963986e-06, "loss": 0.10583438873291015, "step": 121040 }, { "epoch": 1.046640323040873, "grad_norm": 2.264955210541038, "learning_rate": 2.780409216832488e-06, "loss": 0.021148681640625, "step": 121045 }, { "epoch": 1.0466835565624162, "grad_norm": 0.6378679910893195, "learning_rate": 2.7802060306813102e-06, "loss": 0.0203765869140625, "step": 121050 }, { "epoch": 1.0467267900839594, "grad_norm": 2.662862669703623, "learning_rate": 2.7800028455438037e-06, "loss": 0.027323150634765626, "step": 121055 }, { "epoch": 1.0467700236055029, "grad_norm": 7.529433725647035, "learning_rate": 2.779799661420903e-06, "loss": 0.0999847412109375, "step": 121060 }, { "epoch": 1.046813257127046, "grad_norm": 3.971389351341134, "learning_rate": 2.779596478313548e-06, "loss": 0.12717323303222655, "step": 121065 }, { "epoch": 1.0468564906485893, "grad_norm": 42.159814092466895, "learning_rate": 2.7793932962226746e-06, "loss": 0.1356487274169922, "step": 121070 }, { "epoch": 1.0468997241701326, "grad_norm": 3.219000315030826, "learning_rate": 2.7791901151492204e-06, "loss": 0.08759346008300781, "step": 121075 }, { "epoch": 1.0469429576916758, "grad_norm": 0.7257988423865692, "learning_rate": 2.7789869350941207e-06, "loss": 0.058485221862792966, "step": 121080 }, { "epoch": 1.046986191213219, "grad_norm": 14.076996290188632, "learning_rate": 2.7787837560583143e-06, "loss": 0.0363861083984375, "step": 121085 }, { "epoch": 1.0470294247347622, "grad_norm": 8.06022246123059, "learning_rate": 2.7785805780427375e-06, "loss": 0.17227325439453126, "step": 121090 }, { "epoch": 1.0470726582563057, "grad_norm": 6.110549832355016, "learning_rate": 2.7783774010483266e-06, "loss": 0.15015945434570313, "step": 121095 }, { "epoch": 1.047115891777849, "grad_norm": 5.071772004751737, "learning_rate": 2.778174225076021e-06, "loss": 0.022369384765625, "step": 121100 }, { "epoch": 1.0471591252993921, "grad_norm": 36.32385329424598, "learning_rate": 2.777971050126756e-06, "loss": 0.1632232666015625, "step": 121105 }, { "epoch": 1.0472023588209354, "grad_norm": 3.4635799910910716, "learning_rate": 2.7777678762014683e-06, "loss": 0.12350378036499024, "step": 121110 }, { "epoch": 1.0472455923424786, "grad_norm": 22.0624691324216, "learning_rate": 2.7775647033010963e-06, "loss": 0.1558521270751953, "step": 121115 }, { "epoch": 1.0472888258640218, "grad_norm": 0.2705462930726047, "learning_rate": 2.7773615314265757e-06, "loss": 0.019640636444091798, "step": 121120 }, { "epoch": 1.0473320593855653, "grad_norm": 0.974044085145553, "learning_rate": 2.7771583605788437e-06, "loss": 0.101739501953125, "step": 121125 }, { "epoch": 1.0473752929071085, "grad_norm": 1.3569415989796973, "learning_rate": 2.776955190758838e-06, "loss": 0.03993492126464844, "step": 121130 }, { "epoch": 1.0474185264286517, "grad_norm": 4.521204836694981, "learning_rate": 2.7767520219674956e-06, "loss": 0.04512100219726563, "step": 121135 }, { "epoch": 1.047461759950195, "grad_norm": 0.0641288721860863, "learning_rate": 2.7765488542057528e-06, "loss": 0.2152374267578125, "step": 121140 }, { "epoch": 1.0475049934717382, "grad_norm": 0.7206393144693776, "learning_rate": 2.7763456874745475e-06, "loss": 0.35561599731445315, "step": 121145 }, { "epoch": 1.0475482269932814, "grad_norm": 1.8234716499034251, "learning_rate": 2.7761425217748157e-06, "loss": 0.186053466796875, "step": 121150 }, { "epoch": 1.0475914605148249, "grad_norm": 0.7088164410038771, "learning_rate": 2.775939357107494e-06, "loss": 0.025053024291992188, "step": 121155 }, { "epoch": 1.047634694036368, "grad_norm": 0.9769499513385781, "learning_rate": 2.775736193473521e-06, "loss": 0.06481246948242188, "step": 121160 }, { "epoch": 1.0476779275579113, "grad_norm": 3.0637001632194893, "learning_rate": 2.7755330308738328e-06, "loss": 0.05831069946289062, "step": 121165 }, { "epoch": 1.0477211610794546, "grad_norm": 4.666206873545955, "learning_rate": 2.7753298693093662e-06, "loss": 0.05121192932128906, "step": 121170 }, { "epoch": 1.0477643946009978, "grad_norm": 0.43525564691486507, "learning_rate": 2.7751267087810595e-06, "loss": 0.11072425842285157, "step": 121175 }, { "epoch": 1.047807628122541, "grad_norm": 1.4362459261846963, "learning_rate": 2.7749235492898476e-06, "loss": 0.19336090087890626, "step": 121180 }, { "epoch": 1.0478508616440843, "grad_norm": 6.5667264743385, "learning_rate": 2.7747203908366686e-06, "loss": 0.1521759033203125, "step": 121185 }, { "epoch": 1.0478940951656277, "grad_norm": 0.3212198220206892, "learning_rate": 2.7745172334224585e-06, "loss": 0.044655990600585935, "step": 121190 }, { "epoch": 1.047937328687171, "grad_norm": 1.1491578575113284, "learning_rate": 2.774314077048155e-06, "loss": 0.07917633056640624, "step": 121195 }, { "epoch": 1.0479805622087142, "grad_norm": 35.076730547818386, "learning_rate": 2.7741109217146963e-06, "loss": 0.20327911376953126, "step": 121200 }, { "epoch": 1.0480237957302574, "grad_norm": 15.897944966352723, "learning_rate": 2.7739077674230177e-06, "loss": 0.044034576416015624, "step": 121205 }, { "epoch": 1.0480670292518006, "grad_norm": 2.2258339724977216, "learning_rate": 2.7737046141740566e-06, "loss": 0.017680549621582033, "step": 121210 }, { "epoch": 1.0481102627733438, "grad_norm": 0.8925221818209963, "learning_rate": 2.773501461968749e-06, "loss": 0.2598846435546875, "step": 121215 }, { "epoch": 1.0481534962948873, "grad_norm": 27.342065158181253, "learning_rate": 2.773298310808033e-06, "loss": 0.19834136962890625, "step": 121220 }, { "epoch": 1.0481967298164305, "grad_norm": 2.9090575282398543, "learning_rate": 2.7730951606928446e-06, "loss": 0.06285400390625, "step": 121225 }, { "epoch": 1.0482399633379738, "grad_norm": 25.773412515103512, "learning_rate": 2.7728920116241223e-06, "loss": 0.10070648193359374, "step": 121230 }, { "epoch": 1.048283196859517, "grad_norm": 1.4092448342822368, "learning_rate": 2.7726888636028017e-06, "loss": 0.0177764892578125, "step": 121235 }, { "epoch": 1.0483264303810602, "grad_norm": 5.474231695460934, "learning_rate": 2.7724857166298202e-06, "loss": 0.07541923522949219, "step": 121240 }, { "epoch": 1.0483696639026034, "grad_norm": 15.762600069086078, "learning_rate": 2.772282570706114e-06, "loss": 0.0575958251953125, "step": 121245 }, { "epoch": 1.048412897424147, "grad_norm": 0.4516091966937639, "learning_rate": 2.77207942583262e-06, "loss": 0.030315399169921875, "step": 121250 }, { "epoch": 1.0484561309456901, "grad_norm": 1.7388039188373825, "learning_rate": 2.771876282010276e-06, "loss": 0.0565521240234375, "step": 121255 }, { "epoch": 1.0484993644672334, "grad_norm": 1.9090660246312845, "learning_rate": 2.771673139240019e-06, "loss": 0.08451404571533203, "step": 121260 }, { "epoch": 1.0485425979887766, "grad_norm": 4.190831217502002, "learning_rate": 2.771469997522785e-06, "loss": 0.026194000244140626, "step": 121265 }, { "epoch": 1.0485858315103198, "grad_norm": 0.6662211170605541, "learning_rate": 2.7712668568595115e-06, "loss": 0.12489471435546876, "step": 121270 }, { "epoch": 1.048629065031863, "grad_norm": 4.136739275295244, "learning_rate": 2.7710637172511343e-06, "loss": 0.07104110717773438, "step": 121275 }, { "epoch": 1.0486722985534063, "grad_norm": 22.216921941476166, "learning_rate": 2.7708605786985916e-06, "loss": 0.044596290588378905, "step": 121280 }, { "epoch": 1.0487155320749497, "grad_norm": 51.667457443109974, "learning_rate": 2.7706574412028183e-06, "loss": 0.137322998046875, "step": 121285 }, { "epoch": 1.048758765596493, "grad_norm": 2.111173418596708, "learning_rate": 2.770454304764754e-06, "loss": 0.10713958740234375, "step": 121290 }, { "epoch": 1.0488019991180362, "grad_norm": 2.825615623667788, "learning_rate": 2.770251169385334e-06, "loss": 0.29588470458984373, "step": 121295 }, { "epoch": 1.0488452326395794, "grad_norm": 0.15800416116671076, "learning_rate": 2.7700480350654956e-06, "loss": 0.28194141387939453, "step": 121300 }, { "epoch": 1.0488884661611226, "grad_norm": 12.425994414714488, "learning_rate": 2.7698449018061745e-06, "loss": 0.12440032958984375, "step": 121305 }, { "epoch": 1.0489316996826659, "grad_norm": 38.661808984882306, "learning_rate": 2.7696417696083093e-06, "loss": 0.14321022033691405, "step": 121310 }, { "epoch": 1.0489749332042093, "grad_norm": 1.9307187037984908, "learning_rate": 2.7694386384728344e-06, "loss": 0.14753055572509766, "step": 121315 }, { "epoch": 1.0490181667257525, "grad_norm": 6.674162995266469, "learning_rate": 2.769235508400689e-06, "loss": 0.2595935821533203, "step": 121320 }, { "epoch": 1.0490614002472958, "grad_norm": 0.7535593595669393, "learning_rate": 2.7690323793928097e-06, "loss": 0.02971038818359375, "step": 121325 }, { "epoch": 1.049104633768839, "grad_norm": 4.150866003587794, "learning_rate": 2.7688292514501328e-06, "loss": 0.11920909881591797, "step": 121330 }, { "epoch": 1.0491478672903822, "grad_norm": 1.0826171800666273, "learning_rate": 2.7686261245735937e-06, "loss": 0.129400634765625, "step": 121335 }, { "epoch": 1.0491911008119255, "grad_norm": 18.48444940623243, "learning_rate": 2.7684229987641315e-06, "loss": 0.09312286376953124, "step": 121340 }, { "epoch": 1.0492343343334687, "grad_norm": 4.042591252171545, "learning_rate": 2.7682198740226818e-06, "loss": 0.027654266357421874, "step": 121345 }, { "epoch": 1.0492775678550121, "grad_norm": 1.0718313621890931, "learning_rate": 2.7680167503501804e-06, "loss": 0.05238037109375, "step": 121350 }, { "epoch": 1.0493208013765554, "grad_norm": 1.525953668161483, "learning_rate": 2.7678136277475663e-06, "loss": 0.12240524291992187, "step": 121355 }, { "epoch": 1.0493640348980986, "grad_norm": 5.906304283321127, "learning_rate": 2.7676105062157755e-06, "loss": 0.11350555419921875, "step": 121360 }, { "epoch": 1.0494072684196418, "grad_norm": 0.23722208058067945, "learning_rate": 2.7674073857557435e-06, "loss": 0.07317123413085938, "step": 121365 }, { "epoch": 1.049450501941185, "grad_norm": 11.385631875484291, "learning_rate": 2.767204266368409e-06, "loss": 0.318658447265625, "step": 121370 }, { "epoch": 1.0494937354627283, "grad_norm": 1.4432568698338866, "learning_rate": 2.767001148054708e-06, "loss": 0.21139945983886718, "step": 121375 }, { "epoch": 1.0495369689842717, "grad_norm": 2.54603432247725, "learning_rate": 2.7667980308155756e-06, "loss": 0.055165863037109374, "step": 121380 }, { "epoch": 1.049580202505815, "grad_norm": 3.4826723749708175, "learning_rate": 2.7665949146519514e-06, "loss": 0.07172317504882812, "step": 121385 }, { "epoch": 1.0496234360273582, "grad_norm": 2.371513586157829, "learning_rate": 2.76639179956477e-06, "loss": 0.04789390563964844, "step": 121390 }, { "epoch": 1.0496666695489014, "grad_norm": 8.710406979372143, "learning_rate": 2.76618868555497e-06, "loss": 0.02769622802734375, "step": 121395 }, { "epoch": 1.0497099030704446, "grad_norm": 0.18545063518531407, "learning_rate": 2.765985572623487e-06, "loss": 0.25098609924316406, "step": 121400 }, { "epoch": 1.0497531365919879, "grad_norm": 9.354552819295181, "learning_rate": 2.765782460771258e-06, "loss": 0.0890899658203125, "step": 121405 }, { "epoch": 1.0497963701135313, "grad_norm": 1.378732263613071, "learning_rate": 2.765579349999218e-06, "loss": 0.059766387939453124, "step": 121410 }, { "epoch": 1.0498396036350746, "grad_norm": 29.581163158657848, "learning_rate": 2.7653762403083065e-06, "loss": 0.10867233276367187, "step": 121415 }, { "epoch": 1.0498828371566178, "grad_norm": 0.23822027303897772, "learning_rate": 2.765173131699459e-06, "loss": 0.03264312744140625, "step": 121420 }, { "epoch": 1.049926070678161, "grad_norm": 3.757425250424092, "learning_rate": 2.7649700241736124e-06, "loss": 0.1615751266479492, "step": 121425 }, { "epoch": 1.0499693041997042, "grad_norm": 7.261038630486752, "learning_rate": 2.764766917731703e-06, "loss": 0.0263824462890625, "step": 121430 }, { "epoch": 1.0500125377212475, "grad_norm": 3.9963923354058495, "learning_rate": 2.7645638123746683e-06, "loss": 0.1080780029296875, "step": 121435 }, { "epoch": 1.0500557712427907, "grad_norm": 1.0677233598010614, "learning_rate": 2.764360708103444e-06, "loss": 0.023789215087890624, "step": 121440 }, { "epoch": 1.0500990047643342, "grad_norm": 1.9877044765513772, "learning_rate": 2.764157604918967e-06, "loss": 0.14512214660644532, "step": 121445 }, { "epoch": 1.0501422382858774, "grad_norm": 0.5320078586437692, "learning_rate": 2.7639545028221743e-06, "loss": 0.3650714874267578, "step": 121450 }, { "epoch": 1.0501854718074206, "grad_norm": 0.96514156502, "learning_rate": 2.7637514018140032e-06, "loss": 0.021312713623046875, "step": 121455 }, { "epoch": 1.0502287053289638, "grad_norm": 2.5373690262053326, "learning_rate": 2.7635483018953897e-06, "loss": 0.12713756561279296, "step": 121460 }, { "epoch": 1.050271938850507, "grad_norm": 5.85318912791547, "learning_rate": 2.763345203067271e-06, "loss": 0.019557952880859375, "step": 121465 }, { "epoch": 1.0503151723720503, "grad_norm": 7.32414446054597, "learning_rate": 2.763142105330582e-06, "loss": 0.02004852294921875, "step": 121470 }, { "epoch": 1.0503584058935937, "grad_norm": 8.188701710873433, "learning_rate": 2.7629390086862613e-06, "loss": 0.16009292602539063, "step": 121475 }, { "epoch": 1.050401639415137, "grad_norm": 10.673826419886774, "learning_rate": 2.7627359131352445e-06, "loss": 0.20696239471435546, "step": 121480 }, { "epoch": 1.0504448729366802, "grad_norm": 24.329032550576706, "learning_rate": 2.7625328186784696e-06, "loss": 0.087701416015625, "step": 121485 }, { "epoch": 1.0504881064582234, "grad_norm": 2.929748277339329, "learning_rate": 2.7623297253168724e-06, "loss": 0.0575678825378418, "step": 121490 }, { "epoch": 1.0505313399797667, "grad_norm": 2.1060473648722717, "learning_rate": 2.762126633051389e-06, "loss": 0.076812744140625, "step": 121495 }, { "epoch": 1.05057457350131, "grad_norm": 0.7020465166904523, "learning_rate": 2.7619235418829564e-06, "loss": 0.00691070556640625, "step": 121500 }, { "epoch": 1.0506178070228533, "grad_norm": 2.0819749227049935, "learning_rate": 2.761720451812511e-06, "loss": 0.03101692199707031, "step": 121505 }, { "epoch": 1.0506610405443966, "grad_norm": 1.4684846179165778, "learning_rate": 2.7615173628409905e-06, "loss": 0.0153533935546875, "step": 121510 }, { "epoch": 1.0507042740659398, "grad_norm": 6.219005476486279, "learning_rate": 2.761314274969331e-06, "loss": 0.05100517272949219, "step": 121515 }, { "epoch": 1.050747507587483, "grad_norm": 3.6736139926464157, "learning_rate": 2.761111188198469e-06, "loss": 0.06764755249023438, "step": 121520 }, { "epoch": 1.0507907411090263, "grad_norm": 9.48439233101324, "learning_rate": 2.760908102529341e-06, "loss": 0.1626190185546875, "step": 121525 }, { "epoch": 1.0508339746305695, "grad_norm": 18.446012552774373, "learning_rate": 2.7607050179628833e-06, "loss": 0.1353790283203125, "step": 121530 }, { "epoch": 1.0508772081521127, "grad_norm": 0.2826440434231839, "learning_rate": 2.7605019345000337e-06, "loss": 0.3096527099609375, "step": 121535 }, { "epoch": 1.0509204416736562, "grad_norm": 4.110610488582549, "learning_rate": 2.7602988521417265e-06, "loss": 0.34839324951171874, "step": 121540 }, { "epoch": 1.0509636751951994, "grad_norm": 0.6630512505244163, "learning_rate": 2.7600957708889007e-06, "loss": 0.03751220703125, "step": 121545 }, { "epoch": 1.0510069087167426, "grad_norm": 0.0854969418005236, "learning_rate": 2.7598926907424926e-06, "loss": 0.01775703430175781, "step": 121550 }, { "epoch": 1.0510501422382859, "grad_norm": 13.153661258216786, "learning_rate": 2.759689611703438e-06, "loss": 0.0705169677734375, "step": 121555 }, { "epoch": 1.051093375759829, "grad_norm": 0.05623863118338361, "learning_rate": 2.759486533772673e-06, "loss": 0.22607097625732422, "step": 121560 }, { "epoch": 1.0511366092813723, "grad_norm": 15.043419422260564, "learning_rate": 2.7592834569511355e-06, "loss": 0.06429100036621094, "step": 121565 }, { "epoch": 1.0511798428029158, "grad_norm": 1.4984693185073124, "learning_rate": 2.75908038123976e-06, "loss": 0.056536865234375, "step": 121570 }, { "epoch": 1.051223076324459, "grad_norm": 9.877725925124244, "learning_rate": 2.758877306639486e-06, "loss": 0.31786575317382815, "step": 121575 }, { "epoch": 1.0512663098460022, "grad_norm": 2.048864875890377, "learning_rate": 2.7586742331512487e-06, "loss": 0.06171350479125977, "step": 121580 }, { "epoch": 1.0513095433675455, "grad_norm": 0.38731577429060826, "learning_rate": 2.758471160775984e-06, "loss": 0.2542816162109375, "step": 121585 }, { "epoch": 1.0513527768890887, "grad_norm": 10.54105078188751, "learning_rate": 2.758268089514629e-06, "loss": 0.05145339965820313, "step": 121590 }, { "epoch": 1.051396010410632, "grad_norm": 21.348870531358386, "learning_rate": 2.7580650193681203e-06, "loss": 0.104034423828125, "step": 121595 }, { "epoch": 1.0514392439321751, "grad_norm": 2.0764647592789722, "learning_rate": 2.7578619503373933e-06, "loss": 0.036554908752441405, "step": 121600 }, { "epoch": 1.0514824774537186, "grad_norm": 0.6060900239032465, "learning_rate": 2.757658882423387e-06, "loss": 0.12188072204589843, "step": 121605 }, { "epoch": 1.0515257109752618, "grad_norm": 0.9860445569500871, "learning_rate": 2.7574558156270362e-06, "loss": 0.305181884765625, "step": 121610 }, { "epoch": 1.051568944496805, "grad_norm": 28.98918745922747, "learning_rate": 2.7572527499492773e-06, "loss": 0.05396881103515625, "step": 121615 }, { "epoch": 1.0516121780183483, "grad_norm": 0.9880388290797764, "learning_rate": 2.7570496853910477e-06, "loss": 0.06775779724121093, "step": 121620 }, { "epoch": 1.0516554115398915, "grad_norm": 2.321025763070068, "learning_rate": 2.7568466219532835e-06, "loss": 0.07181396484375, "step": 121625 }, { "epoch": 1.0516986450614347, "grad_norm": 0.4862537088592047, "learning_rate": 2.756643559636922e-06, "loss": 0.011284637451171874, "step": 121630 }, { "epoch": 1.0517418785829782, "grad_norm": 0.3132764101164704, "learning_rate": 2.7564404984428963e-06, "loss": 0.04292869567871094, "step": 121635 }, { "epoch": 1.0517851121045214, "grad_norm": 4.705683000707841, "learning_rate": 2.7562374383721478e-06, "loss": 0.09886894226074219, "step": 121640 }, { "epoch": 1.0518283456260646, "grad_norm": 1.473139471841382, "learning_rate": 2.7560343794256093e-06, "loss": 0.0934661865234375, "step": 121645 }, { "epoch": 1.0518715791476079, "grad_norm": 5.4488387693407025, "learning_rate": 2.75583132160422e-06, "loss": 0.02523193359375, "step": 121650 }, { "epoch": 1.051914812669151, "grad_norm": 21.27919381023064, "learning_rate": 2.7556282649089146e-06, "loss": 0.07660980224609375, "step": 121655 }, { "epoch": 1.0519580461906943, "grad_norm": 3.9908476016981442, "learning_rate": 2.7554252093406303e-06, "loss": 0.01937713623046875, "step": 121660 }, { "epoch": 1.0520012797122378, "grad_norm": 2.4733113336869033, "learning_rate": 2.755222154900302e-06, "loss": 0.0553497314453125, "step": 121665 }, { "epoch": 1.052044513233781, "grad_norm": 0.7726368292346915, "learning_rate": 2.7550191015888688e-06, "loss": 0.0231201171875, "step": 121670 }, { "epoch": 1.0520877467553242, "grad_norm": 17.501020049498365, "learning_rate": 2.754816049407265e-06, "loss": 0.191705322265625, "step": 121675 }, { "epoch": 1.0521309802768675, "grad_norm": 30.48213326475131, "learning_rate": 2.754612998356429e-06, "loss": 0.34440765380859373, "step": 121680 }, { "epoch": 1.0521742137984107, "grad_norm": 7.280419276906502, "learning_rate": 2.754409948437296e-06, "loss": 0.08818588256835938, "step": 121685 }, { "epoch": 1.052217447319954, "grad_norm": 1.4238515951505446, "learning_rate": 2.754206899650802e-06, "loss": 0.031243896484375, "step": 121690 }, { "epoch": 1.0522606808414974, "grad_norm": 17.16768817399536, "learning_rate": 2.7540038519978843e-06, "loss": 0.13302154541015626, "step": 121695 }, { "epoch": 1.0523039143630406, "grad_norm": 0.13266805965308015, "learning_rate": 2.7538008054794787e-06, "loss": 0.06813507080078125, "step": 121700 }, { "epoch": 1.0523471478845838, "grad_norm": 0.9364234160549211, "learning_rate": 2.753597760096522e-06, "loss": 0.10017738342285157, "step": 121705 }, { "epoch": 1.052390381406127, "grad_norm": 4.808569374915447, "learning_rate": 2.7533947158499516e-06, "loss": 0.08443069458007812, "step": 121710 }, { "epoch": 1.0524336149276703, "grad_norm": 16.684819443240567, "learning_rate": 2.7531916727407024e-06, "loss": 0.04661712646484375, "step": 121715 }, { "epoch": 1.0524768484492135, "grad_norm": 10.331818740388867, "learning_rate": 2.7529886307697117e-06, "loss": 0.06614723205566406, "step": 121720 }, { "epoch": 1.0525200819707567, "grad_norm": 5.964266069158523, "learning_rate": 2.752785589937915e-06, "loss": 0.06505966186523438, "step": 121725 }, { "epoch": 1.0525633154923002, "grad_norm": 43.54979960428201, "learning_rate": 2.7525825502462496e-06, "loss": 0.130859375, "step": 121730 }, { "epoch": 1.0526065490138434, "grad_norm": 8.389078429994013, "learning_rate": 2.752379511695651e-06, "loss": 0.064544677734375, "step": 121735 }, { "epoch": 1.0526497825353867, "grad_norm": 6.271229804603006, "learning_rate": 2.7521764742870574e-06, "loss": 0.06295967102050781, "step": 121740 }, { "epoch": 1.0526930160569299, "grad_norm": 3.044431230694903, "learning_rate": 2.7519734380214038e-06, "loss": 0.019899368286132812, "step": 121745 }, { "epoch": 1.0527362495784731, "grad_norm": 3.689088034434479, "learning_rate": 2.7517704028996266e-06, "loss": 0.037677574157714847, "step": 121750 }, { "epoch": 1.0527794831000163, "grad_norm": 1.0574373967377615, "learning_rate": 2.751567368922662e-06, "loss": 0.02972564697265625, "step": 121755 }, { "epoch": 1.0528227166215598, "grad_norm": 0.5446987225663545, "learning_rate": 2.7513643360914464e-06, "loss": 0.17610702514648438, "step": 121760 }, { "epoch": 1.052865950143103, "grad_norm": 0.9292733962439526, "learning_rate": 2.751161304406917e-06, "loss": 0.13117599487304688, "step": 121765 }, { "epoch": 1.0529091836646463, "grad_norm": 0.12057465222336582, "learning_rate": 2.75095827387001e-06, "loss": 0.013474273681640624, "step": 121770 }, { "epoch": 1.0529524171861895, "grad_norm": 0.3006060997437879, "learning_rate": 2.7507552444816613e-06, "loss": 0.00894012451171875, "step": 121775 }, { "epoch": 1.0529956507077327, "grad_norm": 6.860813917218658, "learning_rate": 2.7505522162428078e-06, "loss": 0.09908294677734375, "step": 121780 }, { "epoch": 1.053038884229276, "grad_norm": 44.30312570278749, "learning_rate": 2.7503491891543846e-06, "loss": 0.17351360321044923, "step": 121785 }, { "epoch": 1.0530821177508192, "grad_norm": 69.84677673124584, "learning_rate": 2.7501461632173296e-06, "loss": 0.03961601257324219, "step": 121790 }, { "epoch": 1.0531253512723626, "grad_norm": 3.1326063305628864, "learning_rate": 2.749943138432577e-06, "loss": 0.086651611328125, "step": 121795 }, { "epoch": 1.0531685847939058, "grad_norm": 4.564565728665453, "learning_rate": 2.749740114801066e-06, "loss": 0.036937713623046875, "step": 121800 }, { "epoch": 1.053211818315449, "grad_norm": 2.0954964130626217, "learning_rate": 2.7495370923237313e-06, "loss": 0.04446601867675781, "step": 121805 }, { "epoch": 1.0532550518369923, "grad_norm": 4.279638109241103, "learning_rate": 2.7493340710015095e-06, "loss": 0.16011199951171876, "step": 121810 }, { "epoch": 1.0532982853585355, "grad_norm": 0.7077281166516967, "learning_rate": 2.749131050835336e-06, "loss": 0.10121231079101563, "step": 121815 }, { "epoch": 1.0533415188800788, "grad_norm": 0.5014626049041535, "learning_rate": 2.7489280318261487e-06, "loss": 0.049901580810546874, "step": 121820 }, { "epoch": 1.0533847524016222, "grad_norm": 0.7735633821917118, "learning_rate": 2.748725013974882e-06, "loss": 0.03986663818359375, "step": 121825 }, { "epoch": 1.0534279859231654, "grad_norm": 9.079642355083635, "learning_rate": 2.748521997282474e-06, "loss": 0.05578536987304687, "step": 121830 }, { "epoch": 1.0534712194447087, "grad_norm": 0.8543502373235474, "learning_rate": 2.748318981749861e-06, "loss": 0.032034111022949216, "step": 121835 }, { "epoch": 1.053514452966252, "grad_norm": 2.4618022908135844, "learning_rate": 2.748115967377978e-06, "loss": 0.13162155151367189, "step": 121840 }, { "epoch": 1.0535576864877951, "grad_norm": 2.4611331858715197, "learning_rate": 2.7479129541677622e-06, "loss": 0.09395828247070312, "step": 121845 }, { "epoch": 1.0536009200093384, "grad_norm": 34.70155173939953, "learning_rate": 2.7477099421201498e-06, "loss": 0.09673185348510742, "step": 121850 }, { "epoch": 1.0536441535308818, "grad_norm": 1.8527494361742176, "learning_rate": 2.7475069312360755e-06, "loss": 0.01475982666015625, "step": 121855 }, { "epoch": 1.053687387052425, "grad_norm": 6.86591749408609, "learning_rate": 2.747303921516478e-06, "loss": 0.112664794921875, "step": 121860 }, { "epoch": 1.0537306205739683, "grad_norm": 6.051930526695494, "learning_rate": 2.747100912962293e-06, "loss": 0.17846527099609374, "step": 121865 }, { "epoch": 1.0537738540955115, "grad_norm": 12.839678636261477, "learning_rate": 2.746897905574455e-06, "loss": 0.03082122802734375, "step": 121870 }, { "epoch": 1.0538170876170547, "grad_norm": 0.8144094250053191, "learning_rate": 2.7466948993539024e-06, "loss": 0.021181869506835937, "step": 121875 }, { "epoch": 1.053860321138598, "grad_norm": 7.089768327062298, "learning_rate": 2.7464918943015707e-06, "loss": 0.042282485961914064, "step": 121880 }, { "epoch": 1.0539035546601412, "grad_norm": 1.1055377268920006, "learning_rate": 2.7462888904183957e-06, "loss": 0.017000579833984376, "step": 121885 }, { "epoch": 1.0539467881816846, "grad_norm": 29.24769767384636, "learning_rate": 2.7460858877053125e-06, "loss": 0.049008655548095706, "step": 121890 }, { "epoch": 1.0539900217032279, "grad_norm": 7.07520273113524, "learning_rate": 2.7458828861632603e-06, "loss": 0.11202850341796874, "step": 121895 }, { "epoch": 1.054033255224771, "grad_norm": 0.22111643926743976, "learning_rate": 2.745679885793173e-06, "loss": 0.07275238037109374, "step": 121900 }, { "epoch": 1.0540764887463143, "grad_norm": 14.043195773051078, "learning_rate": 2.745476886595989e-06, "loss": 0.19298744201660156, "step": 121905 }, { "epoch": 1.0541197222678576, "grad_norm": 0.34348727404434015, "learning_rate": 2.745273888572642e-06, "loss": 0.09588947296142578, "step": 121910 }, { "epoch": 1.0541629557894008, "grad_norm": 9.351900761092574, "learning_rate": 2.7450708917240698e-06, "loss": 0.08120098114013671, "step": 121915 }, { "epoch": 1.0542061893109442, "grad_norm": 3.2862567347731417, "learning_rate": 2.7448678960512067e-06, "loss": 0.01598224639892578, "step": 121920 }, { "epoch": 1.0542494228324875, "grad_norm": 24.374187695715193, "learning_rate": 2.744664901554992e-06, "loss": 0.1186309814453125, "step": 121925 }, { "epoch": 1.0542926563540307, "grad_norm": 40.448546604440644, "learning_rate": 2.744461908236359e-06, "loss": 0.16321182250976562, "step": 121930 }, { "epoch": 1.054335889875574, "grad_norm": 0.2143759750839342, "learning_rate": 2.7442589160962464e-06, "loss": 0.04500732421875, "step": 121935 }, { "epoch": 1.0543791233971171, "grad_norm": 0.2089028700061234, "learning_rate": 2.7440559251355886e-06, "loss": 0.024309730529785155, "step": 121940 }, { "epoch": 1.0544223569186604, "grad_norm": 0.520944597608542, "learning_rate": 2.7438529353553224e-06, "loss": 0.2031341552734375, "step": 121945 }, { "epoch": 1.0544655904402038, "grad_norm": 1.0405871823172355, "learning_rate": 2.7436499467563826e-06, "loss": 0.05559520721435547, "step": 121950 }, { "epoch": 1.054508823961747, "grad_norm": 5.443944283897507, "learning_rate": 2.7434469593397077e-06, "loss": 0.033609771728515626, "step": 121955 }, { "epoch": 1.0545520574832903, "grad_norm": 2.261083926534501, "learning_rate": 2.743243973106232e-06, "loss": 0.123443603515625, "step": 121960 }, { "epoch": 1.0545952910048335, "grad_norm": 0.6405576477974849, "learning_rate": 2.7430409880568934e-06, "loss": 0.0103271484375, "step": 121965 }, { "epoch": 1.0546385245263767, "grad_norm": 13.982260066886651, "learning_rate": 2.742838004192627e-06, "loss": 0.29798164367675783, "step": 121970 }, { "epoch": 1.05468175804792, "grad_norm": 0.9378883721526994, "learning_rate": 2.742635021514369e-06, "loss": 0.1362152099609375, "step": 121975 }, { "epoch": 1.0547249915694632, "grad_norm": 31.719425273632034, "learning_rate": 2.7424320400230545e-06, "loss": 0.09289703369140626, "step": 121980 }, { "epoch": 1.0547682250910067, "grad_norm": 2.1689483964394545, "learning_rate": 2.7422290597196212e-06, "loss": 0.007654953002929688, "step": 121985 }, { "epoch": 1.0548114586125499, "grad_norm": 0.311524855696389, "learning_rate": 2.7420260806050046e-06, "loss": 0.01367778778076172, "step": 121990 }, { "epoch": 1.054854692134093, "grad_norm": 53.847623001152904, "learning_rate": 2.741823102680141e-06, "loss": 0.3438079833984375, "step": 121995 }, { "epoch": 1.0548979256556363, "grad_norm": 3.6557719186674267, "learning_rate": 2.741620125945967e-06, "loss": 0.021736526489257814, "step": 122000 }, { "epoch": 1.0549411591771796, "grad_norm": 1.175775832602887, "learning_rate": 2.7414171504034184e-06, "loss": 0.0132293701171875, "step": 122005 }, { "epoch": 1.0549843926987228, "grad_norm": 2.637765603302813, "learning_rate": 2.74121417605343e-06, "loss": 0.18129959106445312, "step": 122010 }, { "epoch": 1.0550276262202662, "grad_norm": 14.065864916188854, "learning_rate": 2.741011202896938e-06, "loss": 0.08001899719238281, "step": 122015 }, { "epoch": 1.0550708597418095, "grad_norm": 6.084820444632215, "learning_rate": 2.7408082309348814e-06, "loss": 0.251971435546875, "step": 122020 }, { "epoch": 1.0551140932633527, "grad_norm": 0.3926440411389335, "learning_rate": 2.740605260168194e-06, "loss": 0.041123580932617185, "step": 122025 }, { "epoch": 1.055157326784896, "grad_norm": 0.6010046306933629, "learning_rate": 2.7404022905978122e-06, "loss": 0.05382194519042969, "step": 122030 }, { "epoch": 1.0552005603064392, "grad_norm": 0.2004495649952842, "learning_rate": 2.7401993222246716e-06, "loss": 0.05563812255859375, "step": 122035 }, { "epoch": 1.0552437938279824, "grad_norm": 1.0240146987765224, "learning_rate": 2.739996355049709e-06, "loss": 0.01077728271484375, "step": 122040 }, { "epoch": 1.0552870273495256, "grad_norm": 0.23224873515777048, "learning_rate": 2.739793389073861e-06, "loss": 0.08067550659179687, "step": 122045 }, { "epoch": 1.055330260871069, "grad_norm": 0.1552683774442143, "learning_rate": 2.7395904242980607e-06, "loss": 0.05099029541015625, "step": 122050 }, { "epoch": 1.0553734943926123, "grad_norm": 22.771619973008594, "learning_rate": 2.739387460723248e-06, "loss": 0.06795120239257812, "step": 122055 }, { "epoch": 1.0554167279141555, "grad_norm": 19.23554890808534, "learning_rate": 2.7391844983503577e-06, "loss": 0.07231941223144531, "step": 122060 }, { "epoch": 1.0554599614356988, "grad_norm": 1.6401837586665333, "learning_rate": 2.738981537180324e-06, "loss": 0.132379150390625, "step": 122065 }, { "epoch": 1.055503194957242, "grad_norm": 12.62069316230292, "learning_rate": 2.738778577214086e-06, "loss": 0.036328125, "step": 122070 }, { "epoch": 1.0555464284787852, "grad_norm": 3.9993058543446587, "learning_rate": 2.738575618452578e-06, "loss": 0.10647010803222656, "step": 122075 }, { "epoch": 1.0555896620003287, "grad_norm": 2.6698859952810126, "learning_rate": 2.738372660896734e-06, "loss": 0.058148193359375, "step": 122080 }, { "epoch": 1.055632895521872, "grad_norm": 1.9811462365994414, "learning_rate": 2.7381697045474942e-06, "loss": 0.013373184204101562, "step": 122085 }, { "epoch": 1.0556761290434151, "grad_norm": 10.507114043209224, "learning_rate": 2.737966749405792e-06, "loss": 0.0730926513671875, "step": 122090 }, { "epoch": 1.0557193625649584, "grad_norm": 9.916033645538688, "learning_rate": 2.7377637954725643e-06, "loss": 0.1065338134765625, "step": 122095 }, { "epoch": 1.0557625960865016, "grad_norm": 1.9778494167378038, "learning_rate": 2.737560842748747e-06, "loss": 0.06587295532226563, "step": 122100 }, { "epoch": 1.0558058296080448, "grad_norm": 0.6675893995771043, "learning_rate": 2.7373578912352758e-06, "loss": 0.11641826629638671, "step": 122105 }, { "epoch": 1.0558490631295883, "grad_norm": 12.518844697122434, "learning_rate": 2.737154940933085e-06, "loss": 0.1403656005859375, "step": 122110 }, { "epoch": 1.0558922966511315, "grad_norm": 2.667160303909967, "learning_rate": 2.736951991843114e-06, "loss": 0.10498542785644531, "step": 122115 }, { "epoch": 1.0559355301726747, "grad_norm": 25.09754581589295, "learning_rate": 2.7367490439662974e-06, "loss": 0.22586536407470703, "step": 122120 }, { "epoch": 1.055978763694218, "grad_norm": 0.21685955121872333, "learning_rate": 2.7365460973035705e-06, "loss": 0.02547760009765625, "step": 122125 }, { "epoch": 1.0560219972157612, "grad_norm": 23.526200345376907, "learning_rate": 2.73634315185587e-06, "loss": 0.1075775146484375, "step": 122130 }, { "epoch": 1.0560652307373044, "grad_norm": 3.0069181859616396, "learning_rate": 2.7361402076241318e-06, "loss": 0.193115234375, "step": 122135 }, { "epoch": 1.0561084642588476, "grad_norm": 0.40421438372015667, "learning_rate": 2.7359372646092916e-06, "loss": 0.0583989143371582, "step": 122140 }, { "epoch": 1.056151697780391, "grad_norm": 0.09548631888783583, "learning_rate": 2.735734322812284e-06, "loss": 0.035028076171875, "step": 122145 }, { "epoch": 1.0561949313019343, "grad_norm": 1.1086047185535814, "learning_rate": 2.7355313822340475e-06, "loss": 0.012230873107910156, "step": 122150 }, { "epoch": 1.0562381648234775, "grad_norm": 0.03841085397573134, "learning_rate": 2.7353284428755165e-06, "loss": 0.07095870971679688, "step": 122155 }, { "epoch": 1.0562813983450208, "grad_norm": 7.878470066395989, "learning_rate": 2.7351255047376277e-06, "loss": 0.018621826171875, "step": 122160 }, { "epoch": 1.056324631866564, "grad_norm": 2.008692755664604, "learning_rate": 2.734922567821317e-06, "loss": 0.19271697998046874, "step": 122165 }, { "epoch": 1.0563678653881072, "grad_norm": 5.981166538379129, "learning_rate": 2.73471963212752e-06, "loss": 0.0321014404296875, "step": 122170 }, { "epoch": 1.0564110989096507, "grad_norm": 0.33290351651627215, "learning_rate": 2.734516697657171e-06, "loss": 0.12297801971435547, "step": 122175 }, { "epoch": 1.056454332431194, "grad_norm": 1.9894848103201423, "learning_rate": 2.7343137644112093e-06, "loss": 0.06755599975585938, "step": 122180 }, { "epoch": 1.0564975659527371, "grad_norm": 1.4860801367343537, "learning_rate": 2.734110832390568e-06, "loss": 0.06969757080078125, "step": 122185 }, { "epoch": 1.0565407994742804, "grad_norm": 12.011963154966026, "learning_rate": 2.733907901596185e-06, "loss": 0.11038360595703126, "step": 122190 }, { "epoch": 1.0565840329958236, "grad_norm": 0.02278865212642471, "learning_rate": 2.733704972028995e-06, "loss": 0.06447820663452149, "step": 122195 }, { "epoch": 1.0566272665173668, "grad_norm": 8.324125446308688, "learning_rate": 2.7335020436899342e-06, "loss": 0.25396728515625, "step": 122200 }, { "epoch": 1.0566705000389103, "grad_norm": 0.5772958999430784, "learning_rate": 2.733299116579937e-06, "loss": 0.08148612976074218, "step": 122205 }, { "epoch": 1.0567137335604535, "grad_norm": 0.9551055641278264, "learning_rate": 2.7330961906999415e-06, "loss": 0.0160186767578125, "step": 122210 }, { "epoch": 1.0567569670819967, "grad_norm": 27.44457179629451, "learning_rate": 2.7328932660508834e-06, "loss": 0.07266521453857422, "step": 122215 }, { "epoch": 1.05680020060354, "grad_norm": 30.970120721217842, "learning_rate": 2.7326903426336984e-06, "loss": 0.1991607666015625, "step": 122220 }, { "epoch": 1.0568434341250832, "grad_norm": 0.5553975238418067, "learning_rate": 2.732487420449321e-06, "loss": 0.11197357177734375, "step": 122225 }, { "epoch": 1.0568866676466264, "grad_norm": 11.336893623297655, "learning_rate": 2.732284499498689e-06, "loss": 0.15001068115234376, "step": 122230 }, { "epoch": 1.0569299011681696, "grad_norm": 2.479176451747547, "learning_rate": 2.732081579782736e-06, "loss": 0.3333133697509766, "step": 122235 }, { "epoch": 1.056973134689713, "grad_norm": 0.2605494045413601, "learning_rate": 2.731878661302398e-06, "loss": 0.04399871826171875, "step": 122240 }, { "epoch": 1.0570163682112563, "grad_norm": 1.6331665706588039, "learning_rate": 2.731675744058614e-06, "loss": 0.06237106323242188, "step": 122245 }, { "epoch": 1.0570596017327996, "grad_norm": 2.7127843768064865, "learning_rate": 2.7314728280523175e-06, "loss": 0.0852081298828125, "step": 122250 }, { "epoch": 1.0571028352543428, "grad_norm": 0.16257271938167836, "learning_rate": 2.7312699132844443e-06, "loss": 0.06788291931152343, "step": 122255 }, { "epoch": 1.057146068775886, "grad_norm": 0.6774319098231926, "learning_rate": 2.73106699975593e-06, "loss": 0.015798568725585938, "step": 122260 }, { "epoch": 1.0571893022974292, "grad_norm": 0.5236088635079112, "learning_rate": 2.730864087467712e-06, "loss": 0.05487136840820313, "step": 122265 }, { "epoch": 1.0572325358189727, "grad_norm": 2.0343303755586226, "learning_rate": 2.730661176420723e-06, "loss": 0.1420520782470703, "step": 122270 }, { "epoch": 1.057275769340516, "grad_norm": 4.769994332888572, "learning_rate": 2.730458266615903e-06, "loss": 0.037480926513671874, "step": 122275 }, { "epoch": 1.0573190028620592, "grad_norm": 0.1861652682811451, "learning_rate": 2.730255358054185e-06, "loss": 0.27999114990234375, "step": 122280 }, { "epoch": 1.0573622363836024, "grad_norm": 0.3754741049764716, "learning_rate": 2.7300524507365057e-06, "loss": 0.11335029602050781, "step": 122285 }, { "epoch": 1.0574054699051456, "grad_norm": 39.68734181927762, "learning_rate": 2.7298495446638e-06, "loss": 0.09037704467773437, "step": 122290 }, { "epoch": 1.0574487034266888, "grad_norm": 25.064519235966305, "learning_rate": 2.7296466398370053e-06, "loss": 0.22450008392333984, "step": 122295 }, { "epoch": 1.057491936948232, "grad_norm": 5.070197608659083, "learning_rate": 2.7294437362570545e-06, "loss": 0.18146209716796874, "step": 122300 }, { "epoch": 1.0575351704697755, "grad_norm": 1.12773515627149, "learning_rate": 2.7292408339248874e-06, "loss": 0.05963592529296875, "step": 122305 }, { "epoch": 1.0575784039913187, "grad_norm": 5.6117398126483, "learning_rate": 2.729037932841437e-06, "loss": 0.18558216094970703, "step": 122310 }, { "epoch": 1.057621637512862, "grad_norm": 0.6749393539436157, "learning_rate": 2.7288350330076398e-06, "loss": 0.13975372314453124, "step": 122315 }, { "epoch": 1.0576648710344052, "grad_norm": 0.8368181454214982, "learning_rate": 2.7286321344244307e-06, "loss": 0.014328765869140624, "step": 122320 }, { "epoch": 1.0577081045559484, "grad_norm": 1.1481485428430804, "learning_rate": 2.7284292370927472e-06, "loss": 0.026641082763671876, "step": 122325 }, { "epoch": 1.0577513380774917, "grad_norm": 6.861236270772294, "learning_rate": 2.728226341013524e-06, "loss": 0.07834625244140625, "step": 122330 }, { "epoch": 1.0577945715990351, "grad_norm": 8.66172988896939, "learning_rate": 2.7280234461876954e-06, "loss": 0.10438308715820313, "step": 122335 }, { "epoch": 1.0578378051205783, "grad_norm": 2.1523985674783854, "learning_rate": 2.7278205526162e-06, "loss": 0.052242279052734375, "step": 122340 }, { "epoch": 1.0578810386421216, "grad_norm": 1.4758740564929937, "learning_rate": 2.727617660299972e-06, "loss": 0.012680435180664062, "step": 122345 }, { "epoch": 1.0579242721636648, "grad_norm": 0.47465150204242557, "learning_rate": 2.7274147692399467e-06, "loss": 0.024056434631347656, "step": 122350 }, { "epoch": 1.057967505685208, "grad_norm": 5.182714159172626, "learning_rate": 2.7272118794370617e-06, "loss": 0.026607513427734375, "step": 122355 }, { "epoch": 1.0580107392067513, "grad_norm": 3.689066129155174, "learning_rate": 2.7270089908922506e-06, "loss": 0.09838714599609374, "step": 122360 }, { "epoch": 1.0580539727282947, "grad_norm": 1.1283533718195593, "learning_rate": 2.7268061036064486e-06, "loss": 0.05493087768554687, "step": 122365 }, { "epoch": 1.058097206249838, "grad_norm": 46.8177632665161, "learning_rate": 2.7266032175805945e-06, "loss": 0.0824249267578125, "step": 122370 }, { "epoch": 1.0581404397713812, "grad_norm": 6.288762294433497, "learning_rate": 2.7264003328156218e-06, "loss": 0.0495880126953125, "step": 122375 }, { "epoch": 1.0581836732929244, "grad_norm": 5.157455403044999, "learning_rate": 2.7261974493124664e-06, "loss": 0.045354461669921874, "step": 122380 }, { "epoch": 1.0582269068144676, "grad_norm": 0.3068664458930798, "learning_rate": 2.725994567072065e-06, "loss": 0.02089996337890625, "step": 122385 }, { "epoch": 1.0582701403360109, "grad_norm": 2.4370777710636347, "learning_rate": 2.7257916860953517e-06, "loss": 0.01477813720703125, "step": 122390 }, { "epoch": 1.0583133738575543, "grad_norm": 4.496823651262881, "learning_rate": 2.725588806383263e-06, "loss": 0.04205818176269531, "step": 122395 }, { "epoch": 1.0583566073790975, "grad_norm": 0.3626102308247203, "learning_rate": 2.7253859279367334e-06, "loss": 0.03432197570800781, "step": 122400 }, { "epoch": 1.0583998409006408, "grad_norm": 0.6633248123984056, "learning_rate": 2.725183050756701e-06, "loss": 0.03613128662109375, "step": 122405 }, { "epoch": 1.058443074422184, "grad_norm": 0.23454116258099023, "learning_rate": 2.7249801748440993e-06, "loss": 0.07344093322753906, "step": 122410 }, { "epoch": 1.0584863079437272, "grad_norm": 7.7203157653451076, "learning_rate": 2.7247773001998654e-06, "loss": 0.04670257568359375, "step": 122415 }, { "epoch": 1.0585295414652705, "grad_norm": 18.214089392817918, "learning_rate": 2.7245744268249346e-06, "loss": 0.03699703216552734, "step": 122420 }, { "epoch": 1.0585727749868137, "grad_norm": 0.897198151346742, "learning_rate": 2.724371554720242e-06, "loss": 0.07398834228515624, "step": 122425 }, { "epoch": 1.0586160085083571, "grad_norm": 2.1202661903533366, "learning_rate": 2.724168683886722e-06, "loss": 0.04483795166015625, "step": 122430 }, { "epoch": 1.0586592420299004, "grad_norm": 16.16268769084, "learning_rate": 2.7239658143253123e-06, "loss": 0.109613037109375, "step": 122435 }, { "epoch": 1.0587024755514436, "grad_norm": 30.412206701527225, "learning_rate": 2.7237629460369486e-06, "loss": 0.151910400390625, "step": 122440 }, { "epoch": 1.0587457090729868, "grad_norm": 0.4817931451636149, "learning_rate": 2.7235600790225656e-06, "loss": 0.1210968017578125, "step": 122445 }, { "epoch": 1.05878894259453, "grad_norm": 4.099839189374964, "learning_rate": 2.7233572132830993e-06, "loss": 0.05961456298828125, "step": 122450 }, { "epoch": 1.0588321761160733, "grad_norm": 3.287901528774556, "learning_rate": 2.723154348819485e-06, "loss": 0.03744354248046875, "step": 122455 }, { "epoch": 1.0588754096376167, "grad_norm": 5.692834085917736, "learning_rate": 2.722951485632657e-06, "loss": 0.013510894775390626, "step": 122460 }, { "epoch": 1.05891864315916, "grad_norm": 0.6797907682391209, "learning_rate": 2.7227486237235534e-06, "loss": 0.18189697265625, "step": 122465 }, { "epoch": 1.0589618766807032, "grad_norm": 0.329526195879693, "learning_rate": 2.722545763093109e-06, "loss": 0.04698314666748047, "step": 122470 }, { "epoch": 1.0590051102022464, "grad_norm": 0.08215025429189368, "learning_rate": 2.722342903742259e-06, "loss": 0.02109375, "step": 122475 }, { "epoch": 1.0590483437237896, "grad_norm": 2.8428320818140307, "learning_rate": 2.722140045671939e-06, "loss": 0.0409027099609375, "step": 122480 }, { "epoch": 1.0590915772453329, "grad_norm": 27.230814112949222, "learning_rate": 2.721937188883084e-06, "loss": 0.05543384552001953, "step": 122485 }, { "epoch": 1.059134810766876, "grad_norm": 4.032592297820777, "learning_rate": 2.7217343333766303e-06, "loss": 0.08756866455078124, "step": 122490 }, { "epoch": 1.0591780442884196, "grad_norm": 2.389008691285944, "learning_rate": 2.7215314791535125e-06, "loss": 0.10516357421875, "step": 122495 }, { "epoch": 1.0592212778099628, "grad_norm": 0.6225183108712073, "learning_rate": 2.7213286262146678e-06, "loss": 0.027388381958007812, "step": 122500 }, { "epoch": 1.059264511331506, "grad_norm": 3.0974568756537706, "learning_rate": 2.721125774561031e-06, "loss": 0.09154424667358399, "step": 122505 }, { "epoch": 1.0593077448530492, "grad_norm": 0.7129703716437005, "learning_rate": 2.7209229241935375e-06, "loss": 0.08270187377929687, "step": 122510 }, { "epoch": 1.0593509783745925, "grad_norm": 0.6111428790097059, "learning_rate": 2.7207200751131222e-06, "loss": 0.3154453277587891, "step": 122515 }, { "epoch": 1.0593942118961357, "grad_norm": 5.582842255790206, "learning_rate": 2.720517227320722e-06, "loss": 0.13928604125976562, "step": 122520 }, { "epoch": 1.0594374454176791, "grad_norm": 3.9397435036081396, "learning_rate": 2.7203143808172704e-06, "loss": 0.1149932861328125, "step": 122525 }, { "epoch": 1.0594806789392224, "grad_norm": 3.801000304130073, "learning_rate": 2.720111535603705e-06, "loss": 0.0394500732421875, "step": 122530 }, { "epoch": 1.0595239124607656, "grad_norm": 144.99922165151514, "learning_rate": 2.7199086916809614e-06, "loss": 0.33702545166015624, "step": 122535 }, { "epoch": 1.0595671459823088, "grad_norm": 20.37574036290829, "learning_rate": 2.719705849049973e-06, "loss": 0.3147899627685547, "step": 122540 }, { "epoch": 1.059610379503852, "grad_norm": 4.085542342184483, "learning_rate": 2.7195030077116766e-06, "loss": 0.17808837890625, "step": 122545 }, { "epoch": 1.0596536130253953, "grad_norm": 2.996109910309492, "learning_rate": 2.7193001676670078e-06, "loss": 0.03895645141601563, "step": 122550 }, { "epoch": 1.0596968465469385, "grad_norm": 1.691220084949105, "learning_rate": 2.719097328916901e-06, "loss": 0.036527252197265624, "step": 122555 }, { "epoch": 1.059740080068482, "grad_norm": 16.2882317365817, "learning_rate": 2.7188944914622936e-06, "loss": 0.09102935791015625, "step": 122560 }, { "epoch": 1.0597833135900252, "grad_norm": 0.40936041374117993, "learning_rate": 2.71869165530412e-06, "loss": 0.0272003173828125, "step": 122565 }, { "epoch": 1.0598265471115684, "grad_norm": 4.314112338245794, "learning_rate": 2.718488820443315e-06, "loss": 0.08951759338378906, "step": 122570 }, { "epoch": 1.0598697806331117, "grad_norm": 3.434965061683022, "learning_rate": 2.718285986880815e-06, "loss": 0.0386962890625, "step": 122575 }, { "epoch": 1.0599130141546549, "grad_norm": 18.303780525213153, "learning_rate": 2.7180831546175547e-06, "loss": 0.12436103820800781, "step": 122580 }, { "epoch": 1.0599562476761981, "grad_norm": 0.31283300600482944, "learning_rate": 2.7178803236544707e-06, "loss": 0.0677642822265625, "step": 122585 }, { "epoch": 1.0599994811977416, "grad_norm": 4.667954784716332, "learning_rate": 2.7176774939924967e-06, "loss": 0.17754096984863282, "step": 122590 }, { "epoch": 1.0600427147192848, "grad_norm": 30.665413475693416, "learning_rate": 2.7174746656325694e-06, "loss": 0.12876129150390625, "step": 122595 }, { "epoch": 1.060085948240828, "grad_norm": 13.212008459497062, "learning_rate": 2.7172718385756253e-06, "loss": 0.0448883056640625, "step": 122600 }, { "epoch": 1.0601291817623713, "grad_norm": 0.48972778657893284, "learning_rate": 2.717069012822597e-06, "loss": 0.043408203125, "step": 122605 }, { "epoch": 1.0601724152839145, "grad_norm": 11.202715260332171, "learning_rate": 2.716866188374422e-06, "loss": 0.05560798645019531, "step": 122610 }, { "epoch": 1.0602156488054577, "grad_norm": 1.616996898770103, "learning_rate": 2.7166633652320355e-06, "loss": 0.0317779541015625, "step": 122615 }, { "epoch": 1.0602588823270012, "grad_norm": 0.1899405879584587, "learning_rate": 2.716460543396371e-06, "loss": 0.020523452758789064, "step": 122620 }, { "epoch": 1.0603021158485444, "grad_norm": 4.539924650431623, "learning_rate": 2.7162577228683666e-06, "loss": 0.076934814453125, "step": 122625 }, { "epoch": 1.0603453493700876, "grad_norm": 6.820262541498248, "learning_rate": 2.716054903648957e-06, "loss": 0.08779220581054688, "step": 122630 }, { "epoch": 1.0603885828916308, "grad_norm": 3.9893337501068022, "learning_rate": 2.7158520857390763e-06, "loss": 0.030930328369140624, "step": 122635 }, { "epoch": 1.060431816413174, "grad_norm": 1.2143837025360285, "learning_rate": 2.7156492691396607e-06, "loss": 0.40662269592285155, "step": 122640 }, { "epoch": 1.0604750499347173, "grad_norm": 4.648538536369596, "learning_rate": 2.715446453851646e-06, "loss": 0.014950180053710937, "step": 122645 }, { "epoch": 1.0605182834562608, "grad_norm": 4.988183764245783, "learning_rate": 2.7152436398759675e-06, "loss": 0.0506134033203125, "step": 122650 }, { "epoch": 1.060561516977804, "grad_norm": 0.2544530264769386, "learning_rate": 2.7150408272135584e-06, "loss": 0.042047119140625, "step": 122655 }, { "epoch": 1.0606047504993472, "grad_norm": 3.6252499057330017, "learning_rate": 2.7148380158653566e-06, "loss": 0.0896026611328125, "step": 122660 }, { "epoch": 1.0606479840208904, "grad_norm": 0.25742314885276396, "learning_rate": 2.7146352058322975e-06, "loss": 0.009158515930175781, "step": 122665 }, { "epoch": 1.0606912175424337, "grad_norm": 76.34739976311874, "learning_rate": 2.7144323971153154e-06, "loss": 0.32282028198242185, "step": 122670 }, { "epoch": 1.060734451063977, "grad_norm": 14.327033682041122, "learning_rate": 2.714229589715346e-06, "loss": 0.04220123291015625, "step": 122675 }, { "epoch": 1.0607776845855201, "grad_norm": 1.4905933831141376, "learning_rate": 2.714026783633324e-06, "loss": 0.12875518798828126, "step": 122680 }, { "epoch": 1.0608209181070636, "grad_norm": 2.6832931849166592, "learning_rate": 2.7138239788701846e-06, "loss": 0.03397369384765625, "step": 122685 }, { "epoch": 1.0608641516286068, "grad_norm": 14.728816691636752, "learning_rate": 2.7136211754268635e-06, "loss": 0.28054122924804686, "step": 122690 }, { "epoch": 1.06090738515015, "grad_norm": 0.6040239929184861, "learning_rate": 2.7134183733042976e-06, "loss": 0.06380691528320312, "step": 122695 }, { "epoch": 1.0609506186716933, "grad_norm": 2.750148439292341, "learning_rate": 2.7132155725034208e-06, "loss": 0.138690185546875, "step": 122700 }, { "epoch": 1.0609938521932365, "grad_norm": 20.482595699705097, "learning_rate": 2.713012773025168e-06, "loss": 0.04676132202148438, "step": 122705 }, { "epoch": 1.0610370857147797, "grad_norm": 1.0044667870746173, "learning_rate": 2.7128099748704747e-06, "loss": 0.013365936279296876, "step": 122710 }, { "epoch": 1.0610803192363232, "grad_norm": 0.07844493347635022, "learning_rate": 2.712607178040276e-06, "loss": 0.12108955383300782, "step": 122715 }, { "epoch": 1.0611235527578664, "grad_norm": 8.954544703418069, "learning_rate": 2.7124043825355083e-06, "loss": 0.04326324462890625, "step": 122720 }, { "epoch": 1.0611667862794096, "grad_norm": 1.3825665526002615, "learning_rate": 2.7122015883571064e-06, "loss": 0.20971527099609374, "step": 122725 }, { "epoch": 1.0612100198009529, "grad_norm": 9.660376001986304, "learning_rate": 2.711998795506005e-06, "loss": 0.0618560791015625, "step": 122730 }, { "epoch": 1.061253253322496, "grad_norm": 0.22423093505230296, "learning_rate": 2.7117960039831406e-06, "loss": 0.013540077209472656, "step": 122735 }, { "epoch": 1.0612964868440393, "grad_norm": 16.12526381693769, "learning_rate": 2.7115932137894464e-06, "loss": 0.0846334457397461, "step": 122740 }, { "epoch": 1.0613397203655826, "grad_norm": 3.798173908890186, "learning_rate": 2.7113904249258595e-06, "loss": 0.07903518676757812, "step": 122745 }, { "epoch": 1.061382953887126, "grad_norm": 5.646284899220553, "learning_rate": 2.711187637393313e-06, "loss": 0.04286346435546875, "step": 122750 }, { "epoch": 1.0614261874086692, "grad_norm": 2.324521551193912, "learning_rate": 2.710984851192745e-06, "loss": 0.09303512573242187, "step": 122755 }, { "epoch": 1.0614694209302125, "grad_norm": 1.998312448457712, "learning_rate": 2.710782066325089e-06, "loss": 0.06850051879882812, "step": 122760 }, { "epoch": 1.0615126544517557, "grad_norm": 4.538872052794707, "learning_rate": 2.710579282791281e-06, "loss": 0.03294048309326172, "step": 122765 }, { "epoch": 1.061555887973299, "grad_norm": 2.8138742499577676, "learning_rate": 2.710376500592255e-06, "loss": 0.01214447021484375, "step": 122770 }, { "epoch": 1.0615991214948421, "grad_norm": 0.3629159285273663, "learning_rate": 2.7101737197289476e-06, "loss": 0.06463165283203125, "step": 122775 }, { "epoch": 1.0616423550163856, "grad_norm": 0.24383864577013992, "learning_rate": 2.709970940202292e-06, "loss": 0.2359783172607422, "step": 122780 }, { "epoch": 1.0616855885379288, "grad_norm": 0.4592672582306348, "learning_rate": 2.709768162013226e-06, "loss": 0.08638687133789062, "step": 122785 }, { "epoch": 1.061728822059472, "grad_norm": 15.144496847677178, "learning_rate": 2.709565385162684e-06, "loss": 0.037740325927734374, "step": 122790 }, { "epoch": 1.0617720555810153, "grad_norm": 32.612629725565824, "learning_rate": 2.709362609651601e-06, "loss": 0.11045379638671875, "step": 122795 }, { "epoch": 1.0618152891025585, "grad_norm": 7.297930683347746, "learning_rate": 2.709159835480911e-06, "loss": 0.1180349349975586, "step": 122800 }, { "epoch": 1.0618585226241017, "grad_norm": 0.5743761624908327, "learning_rate": 2.7089570626515506e-06, "loss": 0.050196075439453126, "step": 122805 }, { "epoch": 1.0619017561456452, "grad_norm": 10.898087312460168, "learning_rate": 2.7087542911644533e-06, "loss": 0.06795425415039062, "step": 122810 }, { "epoch": 1.0619449896671884, "grad_norm": 3.45769557573507, "learning_rate": 2.7085515210205566e-06, "loss": 0.029981231689453124, "step": 122815 }, { "epoch": 1.0619882231887317, "grad_norm": 0.22595753756426698, "learning_rate": 2.708348752220795e-06, "loss": 0.03499298095703125, "step": 122820 }, { "epoch": 1.0620314567102749, "grad_norm": 1.5379011306305013, "learning_rate": 2.7081459847661027e-06, "loss": 0.03000946044921875, "step": 122825 }, { "epoch": 1.062074690231818, "grad_norm": 0.9468852284903552, "learning_rate": 2.707943218657415e-06, "loss": 0.05687103271484375, "step": 122830 }, { "epoch": 1.0621179237533613, "grad_norm": 1.3805021016630878, "learning_rate": 2.707740453895668e-06, "loss": 0.05159149169921875, "step": 122835 }, { "epoch": 1.0621611572749046, "grad_norm": 1.3599164456575887, "learning_rate": 2.707537690481796e-06, "loss": 0.04164886474609375, "step": 122840 }, { "epoch": 1.062204390796448, "grad_norm": 0.92327075284823, "learning_rate": 2.7073349284167334e-06, "loss": 0.030687332153320312, "step": 122845 }, { "epoch": 1.0622476243179912, "grad_norm": 41.85054019170058, "learning_rate": 2.707132167701417e-06, "loss": 0.10348014831542969, "step": 122850 }, { "epoch": 1.0622908578395345, "grad_norm": 2.3326568240978616, "learning_rate": 2.7069294083367814e-06, "loss": 0.05048027038574219, "step": 122855 }, { "epoch": 1.0623340913610777, "grad_norm": 5.856808379988086, "learning_rate": 2.706726650323761e-06, "loss": 0.10974655151367188, "step": 122860 }, { "epoch": 1.062377324882621, "grad_norm": 22.893588768676718, "learning_rate": 2.706523893663292e-06, "loss": 0.21668777465820313, "step": 122865 }, { "epoch": 1.0624205584041642, "grad_norm": 2.701322749473929, "learning_rate": 2.7063211383563087e-06, "loss": 0.05645675659179687, "step": 122870 }, { "epoch": 1.0624637919257076, "grad_norm": 32.216855687603136, "learning_rate": 2.706118384403745e-06, "loss": 0.0785682201385498, "step": 122875 }, { "epoch": 1.0625070254472508, "grad_norm": 13.245415877725836, "learning_rate": 2.705915631806539e-06, "loss": 0.05465984344482422, "step": 122880 }, { "epoch": 1.062550258968794, "grad_norm": 0.6416433092964637, "learning_rate": 2.705712880565623e-06, "loss": 0.05153350830078125, "step": 122885 }, { "epoch": 1.0625934924903373, "grad_norm": 14.27982114536815, "learning_rate": 2.7055101306819347e-06, "loss": 0.05821914672851562, "step": 122890 }, { "epoch": 1.0626367260118805, "grad_norm": 1.066420543533883, "learning_rate": 2.705307382156407e-06, "loss": 0.052545166015625, "step": 122895 }, { "epoch": 1.0626799595334238, "grad_norm": 0.08685502056489802, "learning_rate": 2.7051046349899755e-06, "loss": 0.03565788269042969, "step": 122900 }, { "epoch": 1.0627231930549672, "grad_norm": 9.872872383576706, "learning_rate": 2.704901889183574e-06, "loss": 0.03936691284179687, "step": 122905 }, { "epoch": 1.0627664265765104, "grad_norm": 4.1747571200171505, "learning_rate": 2.7046991447381405e-06, "loss": 0.08432731628417969, "step": 122910 }, { "epoch": 1.0628096600980537, "grad_norm": 0.2837096828103351, "learning_rate": 2.7044964016546075e-06, "loss": 0.08983783721923828, "step": 122915 }, { "epoch": 1.062852893619597, "grad_norm": 1.6521473367677728, "learning_rate": 2.7042936599339123e-06, "loss": 0.05443572998046875, "step": 122920 }, { "epoch": 1.0628961271411401, "grad_norm": 15.831961241998306, "learning_rate": 2.704090919576988e-06, "loss": 0.16784210205078126, "step": 122925 }, { "epoch": 1.0629393606626834, "grad_norm": 3.914976262971921, "learning_rate": 2.7038881805847703e-06, "loss": 0.09077606201171876, "step": 122930 }, { "epoch": 1.0629825941842266, "grad_norm": 1.0514494612558776, "learning_rate": 2.7036854429581937e-06, "loss": 0.04853515625, "step": 122935 }, { "epoch": 1.06302582770577, "grad_norm": 1.7613382696286892, "learning_rate": 2.7034827066981934e-06, "loss": 0.23343048095703126, "step": 122940 }, { "epoch": 1.0630690612273133, "grad_norm": 2.6792292146711, "learning_rate": 2.7032799718057053e-06, "loss": 0.037268447875976565, "step": 122945 }, { "epoch": 1.0631122947488565, "grad_norm": 0.30709282693296086, "learning_rate": 2.7030772382816637e-06, "loss": 0.08361892700195313, "step": 122950 }, { "epoch": 1.0631555282703997, "grad_norm": 8.172953654193627, "learning_rate": 2.7028745061270037e-06, "loss": 0.093267822265625, "step": 122955 }, { "epoch": 1.063198761791943, "grad_norm": 1.502841883402884, "learning_rate": 2.7026717753426606e-06, "loss": 0.0635995864868164, "step": 122960 }, { "epoch": 1.0632419953134862, "grad_norm": 3.2786271499433597, "learning_rate": 2.702469045929568e-06, "loss": 0.039461517333984376, "step": 122965 }, { "epoch": 1.0632852288350296, "grad_norm": 1.2883483261917004, "learning_rate": 2.702266317888662e-06, "loss": 0.010303783416748046, "step": 122970 }, { "epoch": 1.0633284623565729, "grad_norm": 2.9054864569958108, "learning_rate": 2.7020635912208775e-06, "loss": 0.15180740356445313, "step": 122975 }, { "epoch": 1.063371695878116, "grad_norm": 0.5271866219813265, "learning_rate": 2.70186086592715e-06, "loss": 0.04703216552734375, "step": 122980 }, { "epoch": 1.0634149293996593, "grad_norm": 0.06875410127636061, "learning_rate": 2.701658142008414e-06, "loss": 0.017327499389648438, "step": 122985 }, { "epoch": 1.0634581629212025, "grad_norm": 1.7674114415211153, "learning_rate": 2.701455419465604e-06, "loss": 0.02393684387207031, "step": 122990 }, { "epoch": 1.0635013964427458, "grad_norm": 5.944258380787609, "learning_rate": 2.701252698299655e-06, "loss": 0.2879486083984375, "step": 122995 }, { "epoch": 1.063544629964289, "grad_norm": 0.6354483568138771, "learning_rate": 2.7010499785115023e-06, "loss": 0.1599720001220703, "step": 123000 }, { "epoch": 1.0635878634858325, "grad_norm": 2.4186039785787763, "learning_rate": 2.7008472601020797e-06, "loss": 0.057660675048828124, "step": 123005 }, { "epoch": 1.0636310970073757, "grad_norm": 7.426235361727964, "learning_rate": 2.700644543072324e-06, "loss": 0.0140106201171875, "step": 123010 }, { "epoch": 1.063674330528919, "grad_norm": 12.6634700007365, "learning_rate": 2.7004418274231694e-06, "loss": 0.046825408935546875, "step": 123015 }, { "epoch": 1.0637175640504621, "grad_norm": 0.10854967112748105, "learning_rate": 2.7002391131555507e-06, "loss": 0.027005767822265624, "step": 123020 }, { "epoch": 1.0637607975720054, "grad_norm": 0.6315570294605748, "learning_rate": 2.7000364002704025e-06, "loss": 0.09237003326416016, "step": 123025 }, { "epoch": 1.0638040310935486, "grad_norm": 3.176255860794952, "learning_rate": 2.6998336887686597e-06, "loss": 0.0484771728515625, "step": 123030 }, { "epoch": 1.063847264615092, "grad_norm": 1.164674338463069, "learning_rate": 2.699630978651257e-06, "loss": 0.048919677734375, "step": 123035 }, { "epoch": 1.0638904981366353, "grad_norm": 1.5649884490341106, "learning_rate": 2.69942826991913e-06, "loss": 0.09136238098144531, "step": 123040 }, { "epoch": 1.0639337316581785, "grad_norm": 0.35311971503402917, "learning_rate": 2.699225562573214e-06, "loss": 0.038873291015625, "step": 123045 }, { "epoch": 1.0639769651797217, "grad_norm": 3.3954932190528253, "learning_rate": 2.699022856614443e-06, "loss": 0.2914289474487305, "step": 123050 }, { "epoch": 1.064020198701265, "grad_norm": 12.881557377906843, "learning_rate": 2.6988201520437515e-06, "loss": 0.1580667495727539, "step": 123055 }, { "epoch": 1.0640634322228082, "grad_norm": 0.46098187996308204, "learning_rate": 2.698617448862075e-06, "loss": 0.03554840087890625, "step": 123060 }, { "epoch": 1.0641066657443516, "grad_norm": 4.020853172202026, "learning_rate": 2.698414747070347e-06, "loss": 0.06619873046875, "step": 123065 }, { "epoch": 1.0641498992658949, "grad_norm": 0.05975114447508883, "learning_rate": 2.698212046669505e-06, "loss": 0.026703643798828124, "step": 123070 }, { "epoch": 1.064193132787438, "grad_norm": 1.3920949324524285, "learning_rate": 2.698009347660483e-06, "loss": 0.02060394287109375, "step": 123075 }, { "epoch": 1.0642363663089813, "grad_norm": 1.8721228695987986, "learning_rate": 2.697806650044214e-06, "loss": 0.0661844253540039, "step": 123080 }, { "epoch": 1.0642795998305246, "grad_norm": 5.638576574408495, "learning_rate": 2.6976039538216343e-06, "loss": 0.0972869873046875, "step": 123085 }, { "epoch": 1.0643228333520678, "grad_norm": 0.8514481012090365, "learning_rate": 2.697401258993679e-06, "loss": 0.00650787353515625, "step": 123090 }, { "epoch": 1.0643660668736112, "grad_norm": 38.68197551219729, "learning_rate": 2.697198565561282e-06, "loss": 0.5013916015625, "step": 123095 }, { "epoch": 1.0644093003951545, "grad_norm": 1.0397643376614845, "learning_rate": 2.696995873525377e-06, "loss": 0.17052879333496093, "step": 123100 }, { "epoch": 1.0644525339166977, "grad_norm": 1.430448388244408, "learning_rate": 2.696793182886902e-06, "loss": 0.0580596923828125, "step": 123105 }, { "epoch": 1.064495767438241, "grad_norm": 3.6620105339495748, "learning_rate": 2.696590493646789e-06, "loss": 0.046783447265625, "step": 123110 }, { "epoch": 1.0645390009597842, "grad_norm": 2.8433345133335837, "learning_rate": 2.696387805805975e-06, "loss": 0.37256393432617185, "step": 123115 }, { "epoch": 1.0645822344813274, "grad_norm": 9.374019822360646, "learning_rate": 2.6961851193653935e-06, "loss": 0.12473258972167969, "step": 123120 }, { "epoch": 1.0646254680028706, "grad_norm": 0.30371364624097336, "learning_rate": 2.695982434325979e-06, "loss": 0.057501220703125, "step": 123125 }, { "epoch": 1.064668701524414, "grad_norm": 1.5018191110874057, "learning_rate": 2.6957797506886654e-06, "loss": 0.06425018310546875, "step": 123130 }, { "epoch": 1.0647119350459573, "grad_norm": 0.3004220494137264, "learning_rate": 2.6955770684543903e-06, "loss": 0.081201171875, "step": 123135 }, { "epoch": 1.0647551685675005, "grad_norm": 48.644211020846434, "learning_rate": 2.6953743876240863e-06, "loss": 0.5131332397460937, "step": 123140 }, { "epoch": 1.0647984020890437, "grad_norm": 3.029574314002475, "learning_rate": 2.695171708198689e-06, "loss": 0.06373138427734375, "step": 123145 }, { "epoch": 1.064841635610587, "grad_norm": 7.033073368774794, "learning_rate": 2.6949690301791328e-06, "loss": 0.07365913391113281, "step": 123150 }, { "epoch": 1.0648848691321302, "grad_norm": 1.8668060765369152, "learning_rate": 2.6947663535663527e-06, "loss": 0.0292694091796875, "step": 123155 }, { "epoch": 1.0649281026536737, "grad_norm": 0.14968893155875093, "learning_rate": 2.694563678361282e-06, "loss": 0.36032257080078123, "step": 123160 }, { "epoch": 1.0649713361752169, "grad_norm": 0.6616230367200463, "learning_rate": 2.694361004564858e-06, "loss": 0.0316619873046875, "step": 123165 }, { "epoch": 1.0650145696967601, "grad_norm": 3.791657206428038, "learning_rate": 2.694158332178013e-06, "loss": 0.03277740478515625, "step": 123170 }, { "epoch": 1.0650578032183033, "grad_norm": 98.735816924438, "learning_rate": 2.6939556612016836e-06, "loss": 0.24504737854003905, "step": 123175 }, { "epoch": 1.0651010367398466, "grad_norm": 24.28421190296856, "learning_rate": 2.693752991636804e-06, "loss": 0.0836482048034668, "step": 123180 }, { "epoch": 1.0651442702613898, "grad_norm": 5.362201425974884, "learning_rate": 2.693550323484308e-06, "loss": 0.033118820190429686, "step": 123185 }, { "epoch": 1.065187503782933, "grad_norm": 9.409919458167781, "learning_rate": 2.69334765674513e-06, "loss": 0.0466796875, "step": 123190 }, { "epoch": 1.0652307373044765, "grad_norm": 0.9378705350744925, "learning_rate": 2.6931449914202058e-06, "loss": 0.042353057861328126, "step": 123195 }, { "epoch": 1.0652739708260197, "grad_norm": 0.7965506628147877, "learning_rate": 2.69294232751047e-06, "loss": 0.033599853515625, "step": 123200 }, { "epoch": 1.065317204347563, "grad_norm": 0.06781742553805459, "learning_rate": 2.6927396650168575e-06, "loss": 0.04218101501464844, "step": 123205 }, { "epoch": 1.0653604378691062, "grad_norm": 23.04765241918782, "learning_rate": 2.6925370039403024e-06, "loss": 0.15731735229492189, "step": 123210 }, { "epoch": 1.0654036713906494, "grad_norm": 6.443826035301455, "learning_rate": 2.6923343442817398e-06, "loss": 0.04475555419921875, "step": 123215 }, { "epoch": 1.0654469049121926, "grad_norm": 0.3251753321869574, "learning_rate": 2.6921316860421034e-06, "loss": 0.2665191650390625, "step": 123220 }, { "epoch": 1.065490138433736, "grad_norm": 0.6739272235899936, "learning_rate": 2.691929029222328e-06, "loss": 0.0182464599609375, "step": 123225 }, { "epoch": 1.0655333719552793, "grad_norm": 3.8140315452297315, "learning_rate": 2.6917263738233493e-06, "loss": 0.131683349609375, "step": 123230 }, { "epoch": 1.0655766054768225, "grad_norm": 0.47277803697819365, "learning_rate": 2.6915237198461014e-06, "loss": 0.08007431030273438, "step": 123235 }, { "epoch": 1.0656198389983658, "grad_norm": 0.18878978305981034, "learning_rate": 2.691321067291519e-06, "loss": 0.0266815185546875, "step": 123240 }, { "epoch": 1.065663072519909, "grad_norm": 0.6634220866839924, "learning_rate": 2.6911184161605367e-06, "loss": 0.09690933227539063, "step": 123245 }, { "epoch": 1.0657063060414522, "grad_norm": 51.61421046480833, "learning_rate": 2.6909157664540886e-06, "loss": 0.0742218017578125, "step": 123250 }, { "epoch": 1.0657495395629955, "grad_norm": 0.5404537357592456, "learning_rate": 2.6907131181731093e-06, "loss": 0.08450775146484375, "step": 123255 }, { "epoch": 1.065792773084539, "grad_norm": 2.6802595098641517, "learning_rate": 2.690510471318534e-06, "loss": 0.02444915771484375, "step": 123260 }, { "epoch": 1.0658360066060821, "grad_norm": 1.32706848510185, "learning_rate": 2.6903078258912974e-06, "loss": 0.057969284057617185, "step": 123265 }, { "epoch": 1.0658792401276254, "grad_norm": 2.7892658740531586, "learning_rate": 2.690105181892334e-06, "loss": 0.03773231506347656, "step": 123270 }, { "epoch": 1.0659224736491686, "grad_norm": 0.7206008136083408, "learning_rate": 2.689902539322578e-06, "loss": 0.15592803955078124, "step": 123275 }, { "epoch": 1.0659657071707118, "grad_norm": 58.59141135804429, "learning_rate": 2.689699898182963e-06, "loss": 0.32726287841796875, "step": 123280 }, { "epoch": 1.066008940692255, "grad_norm": 0.5118542705501131, "learning_rate": 2.6894972584744255e-06, "loss": 0.08017997741699219, "step": 123285 }, { "epoch": 1.0660521742137985, "grad_norm": 1.4825558023266845, "learning_rate": 2.6892946201978984e-06, "loss": 0.025856781005859374, "step": 123290 }, { "epoch": 1.0660954077353417, "grad_norm": 3.98906565478256, "learning_rate": 2.689091983354318e-06, "loss": 0.029854583740234374, "step": 123295 }, { "epoch": 1.066138641256885, "grad_norm": 3.481406638998241, "learning_rate": 2.688889347944618e-06, "loss": 0.3698890686035156, "step": 123300 }, { "epoch": 1.0661818747784282, "grad_norm": 3.9910919121510937, "learning_rate": 2.6886867139697326e-06, "loss": 0.17815895080566407, "step": 123305 }, { "epoch": 1.0662251082999714, "grad_norm": 0.6429354296005899, "learning_rate": 2.6884840814305963e-06, "loss": 0.026853561401367188, "step": 123310 }, { "epoch": 1.0662683418215146, "grad_norm": 0.9038780760680288, "learning_rate": 2.6882814503281444e-06, "loss": 0.03872756958007813, "step": 123315 }, { "epoch": 1.066311575343058, "grad_norm": 1.2990758397387667, "learning_rate": 2.6880788206633098e-06, "loss": 0.03658580780029297, "step": 123320 }, { "epoch": 1.0663548088646013, "grad_norm": 25.486109745170445, "learning_rate": 2.687876192437029e-06, "loss": 0.12952804565429688, "step": 123325 }, { "epoch": 1.0663980423861446, "grad_norm": 1.1452721263501882, "learning_rate": 2.687673565650236e-06, "loss": 0.15145339965820312, "step": 123330 }, { "epoch": 1.0664412759076878, "grad_norm": 17.465844646693768, "learning_rate": 2.687470940303864e-06, "loss": 0.0615875244140625, "step": 123335 }, { "epoch": 1.066484509429231, "grad_norm": 2.8737348719485496, "learning_rate": 2.687268316398849e-06, "loss": 0.18852996826171875, "step": 123340 }, { "epoch": 1.0665277429507742, "grad_norm": 2.744660369573161, "learning_rate": 2.687065693936125e-06, "loss": 0.013589859008789062, "step": 123345 }, { "epoch": 1.0665709764723177, "grad_norm": 20.475261282807576, "learning_rate": 2.686863072916627e-06, "loss": 0.15721664428710938, "step": 123350 }, { "epoch": 1.066614209993861, "grad_norm": 0.27197567305815407, "learning_rate": 2.6866604533412866e-06, "loss": 0.02504730224609375, "step": 123355 }, { "epoch": 1.0666574435154041, "grad_norm": 7.76394950087231, "learning_rate": 2.686457835211042e-06, "loss": 0.04573822021484375, "step": 123360 }, { "epoch": 1.0667006770369474, "grad_norm": 0.710725402590435, "learning_rate": 2.686255218526826e-06, "loss": 0.017779541015625, "step": 123365 }, { "epoch": 1.0667439105584906, "grad_norm": 0.27896195601199664, "learning_rate": 2.6860526032895736e-06, "loss": 0.11445465087890624, "step": 123370 }, { "epoch": 1.0667871440800338, "grad_norm": 0.5862931452247137, "learning_rate": 2.685849989500219e-06, "loss": 0.0062770843505859375, "step": 123375 }, { "epoch": 1.066830377601577, "grad_norm": 0.551539983260346, "learning_rate": 2.6856473771596958e-06, "loss": 0.0626556396484375, "step": 123380 }, { "epoch": 1.0668736111231205, "grad_norm": 0.16166864315214677, "learning_rate": 2.685444766268938e-06, "loss": 0.0580108642578125, "step": 123385 }, { "epoch": 1.0669168446446637, "grad_norm": 31.31909774802007, "learning_rate": 2.6852421568288833e-06, "loss": 0.126824951171875, "step": 123390 }, { "epoch": 1.066960078166207, "grad_norm": 0.07058386053669768, "learning_rate": 2.6850395488404625e-06, "loss": 0.04035873413085937, "step": 123395 }, { "epoch": 1.0670033116877502, "grad_norm": 12.94879825945549, "learning_rate": 2.684836942304612e-06, "loss": 0.06228561401367187, "step": 123400 }, { "epoch": 1.0670465452092934, "grad_norm": 0.039825719866431755, "learning_rate": 2.6846343372222666e-06, "loss": 0.02237110137939453, "step": 123405 }, { "epoch": 1.0670897787308367, "grad_norm": 0.5592299945192538, "learning_rate": 2.684431733594359e-06, "loss": 0.005856132507324219, "step": 123410 }, { "epoch": 1.06713301225238, "grad_norm": 6.1291397243395185, "learning_rate": 2.6842291314218232e-06, "loss": 0.06360378265380859, "step": 123415 }, { "epoch": 1.0671762457739233, "grad_norm": 4.217098264490751, "learning_rate": 2.684026530705596e-06, "loss": 0.06272506713867188, "step": 123420 }, { "epoch": 1.0672194792954666, "grad_norm": 0.978839164219865, "learning_rate": 2.68382393144661e-06, "loss": 0.012617874145507812, "step": 123425 }, { "epoch": 1.0672627128170098, "grad_norm": 0.21493838304677312, "learning_rate": 2.683621333645801e-06, "loss": 0.023276901245117186, "step": 123430 }, { "epoch": 1.067305946338553, "grad_norm": 0.7889588169369023, "learning_rate": 2.683418737304102e-06, "loss": 0.1505807876586914, "step": 123435 }, { "epoch": 1.0673491798600963, "grad_norm": 3.9019691818466153, "learning_rate": 2.6832161424224483e-06, "loss": 0.02295379638671875, "step": 123440 }, { "epoch": 1.0673924133816395, "grad_norm": 24.940401727175388, "learning_rate": 2.6830135490017727e-06, "loss": 0.09042491912841796, "step": 123445 }, { "epoch": 1.067435646903183, "grad_norm": 0.6398620652977697, "learning_rate": 2.6828109570430104e-06, "loss": 0.03198089599609375, "step": 123450 }, { "epoch": 1.0674788804247262, "grad_norm": 1.777304443511737, "learning_rate": 2.682608366547096e-06, "loss": 0.03529510498046875, "step": 123455 }, { "epoch": 1.0675221139462694, "grad_norm": 3.9425966401960215, "learning_rate": 2.6824057775149647e-06, "loss": 0.0486053466796875, "step": 123460 }, { "epoch": 1.0675653474678126, "grad_norm": 2.269330495334927, "learning_rate": 2.6822031899475503e-06, "loss": 0.0749053955078125, "step": 123465 }, { "epoch": 1.0676085809893558, "grad_norm": 6.196399832065868, "learning_rate": 2.682000603845786e-06, "loss": 0.06121368408203125, "step": 123470 }, { "epoch": 1.067651814510899, "grad_norm": 46.381051617871066, "learning_rate": 2.6817980192106067e-06, "loss": 0.1923084259033203, "step": 123475 }, { "epoch": 1.0676950480324425, "grad_norm": 23.631581172072636, "learning_rate": 2.6815954360429457e-06, "loss": 0.08067626953125, "step": 123480 }, { "epoch": 1.0677382815539858, "grad_norm": 26.426179162380055, "learning_rate": 2.68139285434374e-06, "loss": 0.2461620330810547, "step": 123485 }, { "epoch": 1.067781515075529, "grad_norm": 0.11607180493682152, "learning_rate": 2.681190274113922e-06, "loss": 0.057521820068359375, "step": 123490 }, { "epoch": 1.0678247485970722, "grad_norm": 0.13209926992652635, "learning_rate": 2.6809876953544267e-06, "loss": 0.13173828125, "step": 123495 }, { "epoch": 1.0678679821186154, "grad_norm": 13.646570101077451, "learning_rate": 2.680785118066188e-06, "loss": 0.08852252960205079, "step": 123500 }, { "epoch": 1.0679112156401587, "grad_norm": 35.26516682876308, "learning_rate": 2.6805825422501388e-06, "loss": 0.13947982788085939, "step": 123505 }, { "epoch": 1.067954449161702, "grad_norm": 13.424943177307943, "learning_rate": 2.6803799679072143e-06, "loss": 0.06335678100585937, "step": 123510 }, { "epoch": 1.0679976826832454, "grad_norm": 3.730147472253294, "learning_rate": 2.6801773950383507e-06, "loss": 0.035461044311523436, "step": 123515 }, { "epoch": 1.0680409162047886, "grad_norm": 3.450434267540867, "learning_rate": 2.6799748236444807e-06, "loss": 0.09684867858886718, "step": 123520 }, { "epoch": 1.0680841497263318, "grad_norm": 3.7092566800064946, "learning_rate": 2.6797722537265385e-06, "loss": 0.09965667724609376, "step": 123525 }, { "epoch": 1.068127383247875, "grad_norm": 14.956950385365914, "learning_rate": 2.679569685285458e-06, "loss": 0.15755577087402345, "step": 123530 }, { "epoch": 1.0681706167694183, "grad_norm": 4.863733996676338, "learning_rate": 2.6793671183221737e-06, "loss": 0.046149826049804686, "step": 123535 }, { "epoch": 1.0682138502909615, "grad_norm": 9.183441288973382, "learning_rate": 2.6791645528376206e-06, "loss": 0.067291259765625, "step": 123540 }, { "epoch": 1.068257083812505, "grad_norm": 2.9160210932487964, "learning_rate": 2.6789619888327302e-06, "loss": 0.08730087280273438, "step": 123545 }, { "epoch": 1.0683003173340482, "grad_norm": 1.3556944086240394, "learning_rate": 2.6787594263084408e-06, "loss": 0.039821624755859375, "step": 123550 }, { "epoch": 1.0683435508555914, "grad_norm": 5.962455004093565, "learning_rate": 2.678556865265684e-06, "loss": 0.09803848266601563, "step": 123555 }, { "epoch": 1.0683867843771346, "grad_norm": 39.4111887886401, "learning_rate": 2.6783543057053944e-06, "loss": 0.13175506591796876, "step": 123560 }, { "epoch": 1.0684300178986779, "grad_norm": 11.662976000466092, "learning_rate": 2.678151747628507e-06, "loss": 0.06235504150390625, "step": 123565 }, { "epoch": 1.068473251420221, "grad_norm": 9.533964267167827, "learning_rate": 2.677949191035955e-06, "loss": 0.02730560302734375, "step": 123570 }, { "epoch": 1.0685164849417645, "grad_norm": 3.4034501951533356, "learning_rate": 2.677746635928671e-06, "loss": 0.1121368408203125, "step": 123575 }, { "epoch": 1.0685597184633078, "grad_norm": 7.936163366135417, "learning_rate": 2.677544082307593e-06, "loss": 0.0773345947265625, "step": 123580 }, { "epoch": 1.068602951984851, "grad_norm": 0.7952409051267908, "learning_rate": 2.6773415301736535e-06, "loss": 0.03594284057617188, "step": 123585 }, { "epoch": 1.0686461855063942, "grad_norm": 5.488387139948549, "learning_rate": 2.677138979527785e-06, "loss": 0.13425083160400392, "step": 123590 }, { "epoch": 1.0686894190279375, "grad_norm": 16.60882924476899, "learning_rate": 2.676936430370924e-06, "loss": 0.05074329376220703, "step": 123595 }, { "epoch": 1.0687326525494807, "grad_norm": 18.724973958276824, "learning_rate": 2.6767338827040034e-06, "loss": 0.12401466369628907, "step": 123600 }, { "epoch": 1.0687758860710241, "grad_norm": 1.112046719483195, "learning_rate": 2.6765313365279568e-06, "loss": 0.0209930419921875, "step": 123605 }, { "epoch": 1.0688191195925674, "grad_norm": 19.668867396047787, "learning_rate": 2.67632879184372e-06, "loss": 0.0906768798828125, "step": 123610 }, { "epoch": 1.0688623531141106, "grad_norm": 0.21818348784885672, "learning_rate": 2.6761262486522264e-06, "loss": 0.029750823974609375, "step": 123615 }, { "epoch": 1.0689055866356538, "grad_norm": 1.1068789639177656, "learning_rate": 2.675923706954409e-06, "loss": 0.19017486572265624, "step": 123620 }, { "epoch": 1.068948820157197, "grad_norm": 3.045942292328969, "learning_rate": 2.6757211667512034e-06, "loss": 0.05350341796875, "step": 123625 }, { "epoch": 1.0689920536787403, "grad_norm": 0.4819178122373705, "learning_rate": 2.6755186280435437e-06, "loss": 0.0753509521484375, "step": 123630 }, { "epoch": 1.0690352872002835, "grad_norm": 2.581910456192036, "learning_rate": 2.675316090832363e-06, "loss": 0.04796867370605469, "step": 123635 }, { "epoch": 1.069078520721827, "grad_norm": 0.7520786230964711, "learning_rate": 2.6751135551185945e-06, "loss": 0.13257064819335937, "step": 123640 }, { "epoch": 1.0691217542433702, "grad_norm": 1.9580647721500333, "learning_rate": 2.674911020903175e-06, "loss": 0.033475494384765624, "step": 123645 }, { "epoch": 1.0691649877649134, "grad_norm": 0.7059453778317757, "learning_rate": 2.6747084881870366e-06, "loss": 0.05823974609375, "step": 123650 }, { "epoch": 1.0692082212864567, "grad_norm": 0.7689386267621877, "learning_rate": 2.6745059569711144e-06, "loss": 0.03763275146484375, "step": 123655 }, { "epoch": 1.0692514548079999, "grad_norm": 0.4619944125423066, "learning_rate": 2.6743034272563416e-06, "loss": 0.14239044189453126, "step": 123660 }, { "epoch": 1.069294688329543, "grad_norm": 1.1945433128864233, "learning_rate": 2.674100899043653e-06, "loss": 0.04358596801757812, "step": 123665 }, { "epoch": 1.0693379218510866, "grad_norm": 0.4501401245051406, "learning_rate": 2.6738983723339812e-06, "loss": 0.020950508117675782, "step": 123670 }, { "epoch": 1.0693811553726298, "grad_norm": 0.5726502364124088, "learning_rate": 2.673695847128262e-06, "loss": 0.223931884765625, "step": 123675 }, { "epoch": 1.069424388894173, "grad_norm": 1.6901323433541586, "learning_rate": 2.6734933234274284e-06, "loss": 0.044746780395507814, "step": 123680 }, { "epoch": 1.0694676224157162, "grad_norm": 1.4795291421797288, "learning_rate": 2.673290801232416e-06, "loss": 0.01004486083984375, "step": 123685 }, { "epoch": 1.0695108559372595, "grad_norm": 1.157248618609548, "learning_rate": 2.6730882805441565e-06, "loss": 0.015355682373046875, "step": 123690 }, { "epoch": 1.0695540894588027, "grad_norm": 11.96827089119085, "learning_rate": 2.6728857613635857e-06, "loss": 0.09300460815429687, "step": 123695 }, { "epoch": 1.069597322980346, "grad_norm": 1.3295552208929833, "learning_rate": 2.6726832436916356e-06, "loss": 0.0430267333984375, "step": 123700 }, { "epoch": 1.0696405565018894, "grad_norm": 18.096319757340158, "learning_rate": 2.6724807275292413e-06, "loss": 0.0412200927734375, "step": 123705 }, { "epoch": 1.0696837900234326, "grad_norm": 13.57699651168808, "learning_rate": 2.672278212877338e-06, "loss": 0.08598861694335938, "step": 123710 }, { "epoch": 1.0697270235449758, "grad_norm": 2.387704123525143, "learning_rate": 2.6720756997368588e-06, "loss": 0.06346874237060547, "step": 123715 }, { "epoch": 1.069770257066519, "grad_norm": 7.00189161305316, "learning_rate": 2.6718731881087374e-06, "loss": 0.049383544921875, "step": 123720 }, { "epoch": 1.0698134905880623, "grad_norm": 4.2012071148614405, "learning_rate": 2.6716706779939076e-06, "loss": 0.0442474365234375, "step": 123725 }, { "epoch": 1.0698567241096055, "grad_norm": 4.0961112104608866, "learning_rate": 2.6714681693933035e-06, "loss": 0.021091651916503907, "step": 123730 }, { "epoch": 1.069899957631149, "grad_norm": 12.2606583135108, "learning_rate": 2.6712656623078583e-06, "loss": 0.053195953369140625, "step": 123735 }, { "epoch": 1.0699431911526922, "grad_norm": 28.325490591964854, "learning_rate": 2.671063156738508e-06, "loss": 0.3842134475708008, "step": 123740 }, { "epoch": 1.0699864246742354, "grad_norm": 1.5624445691063087, "learning_rate": 2.6708606526861853e-06, "loss": 0.022190093994140625, "step": 123745 }, { "epoch": 1.0700296581957787, "grad_norm": 1.1957088958815678, "learning_rate": 2.6706581501518244e-06, "loss": 0.16230010986328125, "step": 123750 }, { "epoch": 1.070072891717322, "grad_norm": 0.10565795127978406, "learning_rate": 2.6704556491363584e-06, "loss": 0.017098236083984374, "step": 123755 }, { "epoch": 1.0701161252388651, "grad_norm": 4.295483570374866, "learning_rate": 2.6702531496407223e-06, "loss": 0.0394775390625, "step": 123760 }, { "epoch": 1.0701593587604084, "grad_norm": 2.5183982334203328, "learning_rate": 2.6700506516658482e-06, "loss": 0.028184127807617188, "step": 123765 }, { "epoch": 1.0702025922819518, "grad_norm": 1.1132597943576021, "learning_rate": 2.6698481552126727e-06, "loss": 0.078466796875, "step": 123770 }, { "epoch": 1.070245825803495, "grad_norm": 18.274752648365773, "learning_rate": 2.6696456602821283e-06, "loss": 0.1017425537109375, "step": 123775 }, { "epoch": 1.0702890593250383, "grad_norm": 32.254964526049385, "learning_rate": 2.6694431668751493e-06, "loss": 0.09021034240722656, "step": 123780 }, { "epoch": 1.0703322928465815, "grad_norm": 2.44975757142824, "learning_rate": 2.6692406749926677e-06, "loss": 0.07518997192382812, "step": 123785 }, { "epoch": 1.0703755263681247, "grad_norm": 0.13530934247194654, "learning_rate": 2.6690381846356205e-06, "loss": 0.053102874755859376, "step": 123790 }, { "epoch": 1.0704187598896682, "grad_norm": 0.7144336570033806, "learning_rate": 2.6688356958049394e-06, "loss": 0.010543060302734376, "step": 123795 }, { "epoch": 1.0704619934112114, "grad_norm": 11.777923599449574, "learning_rate": 2.6686332085015574e-06, "loss": 0.05914154052734375, "step": 123800 }, { "epoch": 1.0705052269327546, "grad_norm": 21.822578003902855, "learning_rate": 2.6684307227264116e-06, "loss": 0.04707260131835937, "step": 123805 }, { "epoch": 1.0705484604542979, "grad_norm": 31.547857958311397, "learning_rate": 2.6682282384804336e-06, "loss": 0.309100341796875, "step": 123810 }, { "epoch": 1.070591693975841, "grad_norm": 0.6200112452470227, "learning_rate": 2.668025755764557e-06, "loss": 0.12026081085205079, "step": 123815 }, { "epoch": 1.0706349274973843, "grad_norm": 1.027509799493826, "learning_rate": 2.667823274579717e-06, "loss": 0.11064872741699219, "step": 123820 }, { "epoch": 1.0706781610189275, "grad_norm": 2.1416694612978406, "learning_rate": 2.667620794926847e-06, "loss": 0.043947601318359376, "step": 123825 }, { "epoch": 1.070721394540471, "grad_norm": 1.8960874208036014, "learning_rate": 2.667418316806879e-06, "loss": 0.024148941040039062, "step": 123830 }, { "epoch": 1.0707646280620142, "grad_norm": 4.743916777966711, "learning_rate": 2.6672158402207494e-06, "loss": 0.10158767700195312, "step": 123835 }, { "epoch": 1.0708078615835575, "grad_norm": 1.3970694605675211, "learning_rate": 2.6670133651693915e-06, "loss": 0.04275703430175781, "step": 123840 }, { "epoch": 1.0708510951051007, "grad_norm": 23.02747626539508, "learning_rate": 2.6668108916537373e-06, "loss": 0.212957763671875, "step": 123845 }, { "epoch": 1.070894328626644, "grad_norm": 1.288772233227615, "learning_rate": 2.666608419674723e-06, "loss": 0.05095748901367188, "step": 123850 }, { "epoch": 1.0709375621481871, "grad_norm": 0.025985347040512526, "learning_rate": 2.6664059492332804e-06, "loss": 0.0417999267578125, "step": 123855 }, { "epoch": 1.0709807956697306, "grad_norm": 5.6645908286277935, "learning_rate": 2.6662034803303436e-06, "loss": 0.08437347412109375, "step": 123860 }, { "epoch": 1.0710240291912738, "grad_norm": 5.9945274335916, "learning_rate": 2.666001012966848e-06, "loss": 0.02627143859863281, "step": 123865 }, { "epoch": 1.071067262712817, "grad_norm": 0.43919458436990655, "learning_rate": 2.665798547143726e-06, "loss": 0.019174957275390626, "step": 123870 }, { "epoch": 1.0711104962343603, "grad_norm": 27.070640462633058, "learning_rate": 2.665596082861911e-06, "loss": 0.3402587890625, "step": 123875 }, { "epoch": 1.0711537297559035, "grad_norm": 0.4558381910719648, "learning_rate": 2.665393620122338e-06, "loss": 0.0527740478515625, "step": 123880 }, { "epoch": 1.0711969632774467, "grad_norm": 0.8648524447843914, "learning_rate": 2.66519115892594e-06, "loss": 0.018564605712890626, "step": 123885 }, { "epoch": 1.07124019679899, "grad_norm": 13.43833479056939, "learning_rate": 2.6649886992736506e-06, "loss": 0.0668426513671875, "step": 123890 }, { "epoch": 1.0712834303205334, "grad_norm": 2.055719993508754, "learning_rate": 2.6647862411664024e-06, "loss": 0.33298797607421876, "step": 123895 }, { "epoch": 1.0713266638420766, "grad_norm": 26.336004016351374, "learning_rate": 2.664583784605132e-06, "loss": 0.08995094299316406, "step": 123900 }, { "epoch": 1.0713698973636199, "grad_norm": 1.3248105349474693, "learning_rate": 2.664381329590771e-06, "loss": 0.01112823486328125, "step": 123905 }, { "epoch": 1.071413130885163, "grad_norm": 0.619641509937588, "learning_rate": 2.664178876124254e-06, "loss": 0.09061698913574219, "step": 123910 }, { "epoch": 1.0714563644067063, "grad_norm": 1.3679281920399815, "learning_rate": 2.6639764242065145e-06, "loss": 0.046724700927734376, "step": 123915 }, { "epoch": 1.0714995979282496, "grad_norm": 0.3315412246111638, "learning_rate": 2.663773973838486e-06, "loss": 0.03289356231689453, "step": 123920 }, { "epoch": 1.071542831449793, "grad_norm": 3.410283380041028, "learning_rate": 2.6635715250211014e-06, "loss": 0.018927001953125, "step": 123925 }, { "epoch": 1.0715860649713362, "grad_norm": 0.43573722195094217, "learning_rate": 2.663369077755295e-06, "loss": 0.09302635192871093, "step": 123930 }, { "epoch": 1.0716292984928795, "grad_norm": 9.037178719644801, "learning_rate": 2.6631666320420017e-06, "loss": 0.039776611328125, "step": 123935 }, { "epoch": 1.0716725320144227, "grad_norm": 8.273898596009666, "learning_rate": 2.6629641878821543e-06, "loss": 0.40774688720703123, "step": 123940 }, { "epoch": 1.071715765535966, "grad_norm": 1.3773023735929486, "learning_rate": 2.662761745276686e-06, "loss": 0.012819671630859375, "step": 123945 }, { "epoch": 1.0717589990575092, "grad_norm": 0.3113933537640994, "learning_rate": 2.6625593042265304e-06, "loss": 0.11989707946777343, "step": 123950 }, { "epoch": 1.0718022325790524, "grad_norm": 10.444452399790709, "learning_rate": 2.662356864732621e-06, "loss": 0.023630523681640626, "step": 123955 }, { "epoch": 1.0718454661005958, "grad_norm": 10.049266278597548, "learning_rate": 2.6621544267958927e-06, "loss": 0.09390449523925781, "step": 123960 }, { "epoch": 1.071888699622139, "grad_norm": 15.844534906274001, "learning_rate": 2.6619519904172783e-06, "loss": 0.24493637084960937, "step": 123965 }, { "epoch": 1.0719319331436823, "grad_norm": 3.756552740117459, "learning_rate": 2.661749555597712e-06, "loss": 0.02471771240234375, "step": 123970 }, { "epoch": 1.0719751666652255, "grad_norm": 3.598973129904365, "learning_rate": 2.661547122338126e-06, "loss": 0.107464599609375, "step": 123975 }, { "epoch": 1.0720184001867687, "grad_norm": 4.3156861626158545, "learning_rate": 2.6613446906394548e-06, "loss": 0.09022235870361328, "step": 123980 }, { "epoch": 1.072061633708312, "grad_norm": 0.12043947976151043, "learning_rate": 2.6611422605026325e-06, "loss": 0.020064926147460936, "step": 123985 }, { "epoch": 1.0721048672298554, "grad_norm": 2.188850192582435, "learning_rate": 2.6609398319285908e-06, "loss": 0.13271141052246094, "step": 123990 }, { "epoch": 1.0721481007513987, "grad_norm": 21.15336824125891, "learning_rate": 2.660737404918266e-06, "loss": 0.0915924072265625, "step": 123995 }, { "epoch": 1.0721913342729419, "grad_norm": 0.2611700804946431, "learning_rate": 2.6605349794725903e-06, "loss": 0.05009613037109375, "step": 124000 }, { "epoch": 1.0722345677944851, "grad_norm": 2.335505725478338, "learning_rate": 2.660332555592497e-06, "loss": 0.10824899673461914, "step": 124005 }, { "epoch": 1.0722778013160283, "grad_norm": 0.5095669881660896, "learning_rate": 2.6601301332789196e-06, "loss": 0.08033084869384766, "step": 124010 }, { "epoch": 1.0723210348375716, "grad_norm": 5.01454242715693, "learning_rate": 2.6599277125327925e-06, "loss": 0.02595977783203125, "step": 124015 }, { "epoch": 1.072364268359115, "grad_norm": 0.31600302811003056, "learning_rate": 2.6597252933550473e-06, "loss": 0.022832107543945313, "step": 124020 }, { "epoch": 1.0724075018806583, "grad_norm": 0.3774216712945353, "learning_rate": 2.6595228757466208e-06, "loss": 0.06137542724609375, "step": 124025 }, { "epoch": 1.0724507354022015, "grad_norm": 73.78301531640928, "learning_rate": 2.6593204597084443e-06, "loss": 0.272613525390625, "step": 124030 }, { "epoch": 1.0724939689237447, "grad_norm": 4.963886188070404, "learning_rate": 2.6591180452414516e-06, "loss": 0.1245361328125, "step": 124035 }, { "epoch": 1.072537202445288, "grad_norm": 3.55212528618704, "learning_rate": 2.6589156323465763e-06, "loss": 0.08770370483398438, "step": 124040 }, { "epoch": 1.0725804359668312, "grad_norm": 47.7504429817132, "learning_rate": 2.658713221024752e-06, "loss": 0.024912071228027344, "step": 124045 }, { "epoch": 1.0726236694883746, "grad_norm": 71.6803513852415, "learning_rate": 2.6585108112769124e-06, "loss": 0.40375213623046874, "step": 124050 }, { "epoch": 1.0726669030099178, "grad_norm": 1.1399579620088283, "learning_rate": 2.6583084031039895e-06, "loss": 0.023522186279296874, "step": 124055 }, { "epoch": 1.072710136531461, "grad_norm": 3.946709351626646, "learning_rate": 2.6581059965069193e-06, "loss": 0.031713104248046874, "step": 124060 }, { "epoch": 1.0727533700530043, "grad_norm": 2.3068288045916123, "learning_rate": 2.6579035914866344e-06, "loss": 0.26361083984375, "step": 124065 }, { "epoch": 1.0727966035745475, "grad_norm": 19.855791005749147, "learning_rate": 2.6577011880440665e-06, "loss": 0.14927825927734376, "step": 124070 }, { "epoch": 1.0728398370960908, "grad_norm": 31.387277505659206, "learning_rate": 2.6574987861801516e-06, "loss": 0.1270416259765625, "step": 124075 }, { "epoch": 1.072883070617634, "grad_norm": 0.14151805520336358, "learning_rate": 2.657296385895822e-06, "loss": 0.02589588165283203, "step": 124080 }, { "epoch": 1.0729263041391774, "grad_norm": 20.806266624780445, "learning_rate": 2.6570939871920095e-06, "loss": 0.21037521362304687, "step": 124085 }, { "epoch": 1.0729695376607207, "grad_norm": 1.803128667956689, "learning_rate": 2.6568915900696512e-06, "loss": 0.11691150665283204, "step": 124090 }, { "epoch": 1.073012771182264, "grad_norm": 2.462784237928514, "learning_rate": 2.6566891945296784e-06, "loss": 0.027404022216796876, "step": 124095 }, { "epoch": 1.0730560047038071, "grad_norm": 6.0741137304247435, "learning_rate": 2.656486800573024e-06, "loss": 0.04763526916503906, "step": 124100 }, { "epoch": 1.0730992382253504, "grad_norm": 19.28321592501675, "learning_rate": 2.6562844082006224e-06, "loss": 0.09220809936523437, "step": 124105 }, { "epoch": 1.0731424717468936, "grad_norm": 0.21092680859722143, "learning_rate": 2.6560820174134073e-06, "loss": 0.10558624267578125, "step": 124110 }, { "epoch": 1.073185705268437, "grad_norm": 17.857975700571995, "learning_rate": 2.6558796282123097e-06, "loss": 0.1000213623046875, "step": 124115 }, { "epoch": 1.0732289387899803, "grad_norm": 9.202661271994566, "learning_rate": 2.6556772405982664e-06, "loss": 0.07648687362670899, "step": 124120 }, { "epoch": 1.0732721723115235, "grad_norm": 5.482853024287322, "learning_rate": 2.6554748545722096e-06, "loss": 0.144183349609375, "step": 124125 }, { "epoch": 1.0733154058330667, "grad_norm": 8.206736941801584, "learning_rate": 2.655272470135071e-06, "loss": 0.03675270080566406, "step": 124130 }, { "epoch": 1.07335863935461, "grad_norm": 2.558820665877372, "learning_rate": 2.655070087287786e-06, "loss": 0.034905624389648435, "step": 124135 }, { "epoch": 1.0734018728761532, "grad_norm": 12.243039001166768, "learning_rate": 2.654867706031288e-06, "loss": 0.06404151916503906, "step": 124140 }, { "epoch": 1.0734451063976964, "grad_norm": 3.515311459948503, "learning_rate": 2.654665326366509e-06, "loss": 0.02573089599609375, "step": 124145 }, { "epoch": 1.0734883399192399, "grad_norm": 11.680357387325053, "learning_rate": 2.6544629482943817e-06, "loss": 0.08365936279296875, "step": 124150 }, { "epoch": 1.073531573440783, "grad_norm": 0.856601035490619, "learning_rate": 2.6542605718158418e-06, "loss": 0.024315834045410156, "step": 124155 }, { "epoch": 1.0735748069623263, "grad_norm": 0.537747783581112, "learning_rate": 2.6540581969318223e-06, "loss": 0.04879684448242187, "step": 124160 }, { "epoch": 1.0736180404838696, "grad_norm": 14.557725641329935, "learning_rate": 2.6538558236432555e-06, "loss": 0.2049285888671875, "step": 124165 }, { "epoch": 1.0736612740054128, "grad_norm": 13.793766701810135, "learning_rate": 2.6536534519510748e-06, "loss": 0.037000274658203124, "step": 124170 }, { "epoch": 1.073704507526956, "grad_norm": 8.700192529497402, "learning_rate": 2.653451081856214e-06, "loss": 0.13540496826171874, "step": 124175 }, { "epoch": 1.0737477410484995, "grad_norm": 0.07555827174536038, "learning_rate": 2.6532487133596056e-06, "loss": 0.027756309509277342, "step": 124180 }, { "epoch": 1.0737909745700427, "grad_norm": 22.387719426575913, "learning_rate": 2.6530463464621836e-06, "loss": 0.15556793212890624, "step": 124185 }, { "epoch": 1.073834208091586, "grad_norm": 0.40273421786237706, "learning_rate": 2.6528439811648813e-06, "loss": 0.03519363403320312, "step": 124190 }, { "epoch": 1.0738774416131291, "grad_norm": 0.5301344674066506, "learning_rate": 2.6526416174686327e-06, "loss": 0.02898712158203125, "step": 124195 }, { "epoch": 1.0739206751346724, "grad_norm": 1.1227051991815642, "learning_rate": 2.65243925537437e-06, "loss": 0.04737396240234375, "step": 124200 }, { "epoch": 1.0739639086562156, "grad_norm": 4.8029094408568715, "learning_rate": 2.652236894883026e-06, "loss": 0.05108833312988281, "step": 124205 }, { "epoch": 1.0740071421777588, "grad_norm": 0.7765266341136428, "learning_rate": 2.6520345359955345e-06, "loss": 0.09827499389648438, "step": 124210 }, { "epoch": 1.0740503756993023, "grad_norm": 10.455038305836261, "learning_rate": 2.6518321787128293e-06, "loss": 0.32633056640625, "step": 124215 }, { "epoch": 1.0740936092208455, "grad_norm": 19.009447667492946, "learning_rate": 2.651629823035844e-06, "loss": 0.11060905456542969, "step": 124220 }, { "epoch": 1.0741368427423887, "grad_norm": 7.1736419417745605, "learning_rate": 2.651427468965511e-06, "loss": 0.14603900909423828, "step": 124225 }, { "epoch": 1.074180076263932, "grad_norm": 3.080872054454041, "learning_rate": 2.6512251165027638e-06, "loss": 0.06602096557617188, "step": 124230 }, { "epoch": 1.0742233097854752, "grad_norm": 7.906640590859201, "learning_rate": 2.651022765648535e-06, "loss": 0.029657363891601562, "step": 124235 }, { "epoch": 1.0742665433070184, "grad_norm": 12.099620906305173, "learning_rate": 2.6508204164037586e-06, "loss": 0.10275192260742187, "step": 124240 }, { "epoch": 1.0743097768285619, "grad_norm": 1.168436935018217, "learning_rate": 2.6506180687693668e-06, "loss": 0.2289590835571289, "step": 124245 }, { "epoch": 1.074353010350105, "grad_norm": 19.886411285822764, "learning_rate": 2.6504157227462946e-06, "loss": 0.07969093322753906, "step": 124250 }, { "epoch": 1.0743962438716483, "grad_norm": 3.1258548746743804, "learning_rate": 2.6502133783354743e-06, "loss": 0.20403594970703126, "step": 124255 }, { "epoch": 1.0744394773931916, "grad_norm": 19.103953691888595, "learning_rate": 2.6500110355378388e-06, "loss": 0.1000152587890625, "step": 124260 }, { "epoch": 1.0744827109147348, "grad_norm": 2.191819242380069, "learning_rate": 2.649808694354321e-06, "loss": 0.039617919921875, "step": 124265 }, { "epoch": 1.074525944436278, "grad_norm": 42.645167873109, "learning_rate": 2.649606354785855e-06, "loss": 0.09363079071044922, "step": 124270 }, { "epoch": 1.0745691779578215, "grad_norm": 0.6751981865079801, "learning_rate": 2.6494040168333726e-06, "loss": 0.10940132141113282, "step": 124275 }, { "epoch": 1.0746124114793647, "grad_norm": 4.231137217274866, "learning_rate": 2.649201680497809e-06, "loss": 0.09935493469238281, "step": 124280 }, { "epoch": 1.074655645000908, "grad_norm": 29.001123236838875, "learning_rate": 2.6489993457800966e-06, "loss": 0.0884490966796875, "step": 124285 }, { "epoch": 1.0746988785224512, "grad_norm": 5.262577238100174, "learning_rate": 2.648797012681168e-06, "loss": 0.04082183837890625, "step": 124290 }, { "epoch": 1.0747421120439944, "grad_norm": 22.412570426090547, "learning_rate": 2.6485946812019555e-06, "loss": 0.08103179931640625, "step": 124295 }, { "epoch": 1.0747853455655376, "grad_norm": 4.841946762058717, "learning_rate": 2.648392351343394e-06, "loss": 0.03737335205078125, "step": 124300 }, { "epoch": 1.074828579087081, "grad_norm": 5.728512021925279, "learning_rate": 2.6481900231064164e-06, "loss": 0.0612213134765625, "step": 124305 }, { "epoch": 1.0748718126086243, "grad_norm": 10.092537362641593, "learning_rate": 2.6479876964919535e-06, "loss": 0.09432144165039062, "step": 124310 }, { "epoch": 1.0749150461301675, "grad_norm": 0.37209269993589594, "learning_rate": 2.647785371500942e-06, "loss": 0.04419403076171875, "step": 124315 }, { "epoch": 1.0749582796517108, "grad_norm": 0.16591581862700064, "learning_rate": 2.647583048134313e-06, "loss": 0.023949432373046874, "step": 124320 }, { "epoch": 1.075001513173254, "grad_norm": 9.142014083480582, "learning_rate": 2.647380726392999e-06, "loss": 0.03525390625, "step": 124325 }, { "epoch": 1.0750447466947972, "grad_norm": 0.7553778998822561, "learning_rate": 2.647178406277935e-06, "loss": 0.0096343994140625, "step": 124330 }, { "epoch": 1.0750879802163404, "grad_norm": 1.4927093607299706, "learning_rate": 2.646976087790053e-06, "loss": 0.045787811279296875, "step": 124335 }, { "epoch": 1.075131213737884, "grad_norm": 1.3243251301188925, "learning_rate": 2.646773770930284e-06, "loss": 0.1051849365234375, "step": 124340 }, { "epoch": 1.0751744472594271, "grad_norm": 4.540690562368273, "learning_rate": 2.6465714556995655e-06, "loss": 0.090557861328125, "step": 124345 }, { "epoch": 1.0752176807809704, "grad_norm": 4.61565563045504, "learning_rate": 2.646369142098827e-06, "loss": 0.024276351928710936, "step": 124350 }, { "epoch": 1.0752609143025136, "grad_norm": 23.439279099907917, "learning_rate": 2.6461668301290035e-06, "loss": 0.15572586059570312, "step": 124355 }, { "epoch": 1.0753041478240568, "grad_norm": 13.414879748514656, "learning_rate": 2.6459645197910274e-06, "loss": 0.045957183837890624, "step": 124360 }, { "epoch": 1.0753473813456, "grad_norm": 10.927563058824122, "learning_rate": 2.6457622110858315e-06, "loss": 0.02539215087890625, "step": 124365 }, { "epoch": 1.0753906148671435, "grad_norm": 1.9410740638852295, "learning_rate": 2.6455599040143483e-06, "loss": 0.011990737915039063, "step": 124370 }, { "epoch": 1.0754338483886867, "grad_norm": 0.5151065651142999, "learning_rate": 2.645357598577512e-06, "loss": 0.030197525024414064, "step": 124375 }, { "epoch": 1.07547708191023, "grad_norm": 4.209640812491961, "learning_rate": 2.645155294776255e-06, "loss": 0.02453155517578125, "step": 124380 }, { "epoch": 1.0755203154317732, "grad_norm": 2.431138868246455, "learning_rate": 2.6449529926115106e-06, "loss": 0.12684669494628906, "step": 124385 }, { "epoch": 1.0755635489533164, "grad_norm": 0.22516505112200064, "learning_rate": 2.644750692084212e-06, "loss": 0.045476531982421874, "step": 124390 }, { "epoch": 1.0756067824748596, "grad_norm": 2.116294304218249, "learning_rate": 2.6445483931952914e-06, "loss": 0.09004974365234375, "step": 124395 }, { "epoch": 1.0756500159964029, "grad_norm": 0.6409082709070917, "learning_rate": 2.6443460959456816e-06, "loss": 0.35759849548339845, "step": 124400 }, { "epoch": 1.0756932495179463, "grad_norm": 22.747403557811765, "learning_rate": 2.644143800336317e-06, "loss": 0.04823150634765625, "step": 124405 }, { "epoch": 1.0757364830394895, "grad_norm": 0.8170930255154182, "learning_rate": 2.6439415063681288e-06, "loss": 0.03721466064453125, "step": 124410 }, { "epoch": 1.0757797165610328, "grad_norm": 5.571419738206097, "learning_rate": 2.6437392140420517e-06, "loss": 0.110406494140625, "step": 124415 }, { "epoch": 1.075822950082576, "grad_norm": 23.772356218683782, "learning_rate": 2.6435369233590183e-06, "loss": 0.17498931884765626, "step": 124420 }, { "epoch": 1.0758661836041192, "grad_norm": 8.292303779258532, "learning_rate": 2.643334634319961e-06, "loss": 0.02851524353027344, "step": 124425 }, { "epoch": 1.0759094171256625, "grad_norm": 14.182493226544166, "learning_rate": 2.643132346925812e-06, "loss": 0.3925323486328125, "step": 124430 }, { "epoch": 1.075952650647206, "grad_norm": 24.1093016658964, "learning_rate": 2.6429300611775053e-06, "loss": 0.153302001953125, "step": 124435 }, { "epoch": 1.0759958841687491, "grad_norm": 21.708655694738464, "learning_rate": 2.6427277770759733e-06, "loss": 0.08442878723144531, "step": 124440 }, { "epoch": 1.0760391176902924, "grad_norm": 0.4910374708390304, "learning_rate": 2.6425254946221504e-06, "loss": 0.060914039611816406, "step": 124445 }, { "epoch": 1.0760823512118356, "grad_norm": 0.5913077811817617, "learning_rate": 2.642323213816968e-06, "loss": 0.010791778564453125, "step": 124450 }, { "epoch": 1.0761255847333788, "grad_norm": 3.6918709603963777, "learning_rate": 2.642120934661359e-06, "loss": 0.044393062591552734, "step": 124455 }, { "epoch": 1.076168818254922, "grad_norm": 7.218167757612888, "learning_rate": 2.6419186571562563e-06, "loss": 0.06697158813476563, "step": 124460 }, { "epoch": 1.0762120517764653, "grad_norm": 4.557291716613008, "learning_rate": 2.6417163813025933e-06, "loss": 0.1153228759765625, "step": 124465 }, { "epoch": 1.0762552852980087, "grad_norm": 10.181262739865593, "learning_rate": 2.6415141071013022e-06, "loss": 0.181732177734375, "step": 124470 }, { "epoch": 1.076298518819552, "grad_norm": 0.44788623202440503, "learning_rate": 2.6413118345533175e-06, "loss": 0.01795654296875, "step": 124475 }, { "epoch": 1.0763417523410952, "grad_norm": 3.947509935797016, "learning_rate": 2.641109563659571e-06, "loss": 0.13392486572265624, "step": 124480 }, { "epoch": 1.0763849858626384, "grad_norm": 4.447745460630573, "learning_rate": 2.6409072944209953e-06, "loss": 0.05103225708007812, "step": 124485 }, { "epoch": 1.0764282193841817, "grad_norm": 0.36282809744540767, "learning_rate": 2.6407050268385226e-06, "loss": 0.01273956298828125, "step": 124490 }, { "epoch": 1.0764714529057249, "grad_norm": 3.3813042357546426, "learning_rate": 2.6405027609130873e-06, "loss": 0.016097831726074218, "step": 124495 }, { "epoch": 1.0765146864272683, "grad_norm": 3.3738294250834397, "learning_rate": 2.64030049664562e-06, "loss": 0.06932754516601562, "step": 124500 }, { "epoch": 1.0765579199488116, "grad_norm": 36.99098469362473, "learning_rate": 2.6400982340370565e-06, "loss": 0.07583961486816407, "step": 124505 }, { "epoch": 1.0766011534703548, "grad_norm": 8.414711832723553, "learning_rate": 2.639895973088328e-06, "loss": 0.07280502319335938, "step": 124510 }, { "epoch": 1.076644386991898, "grad_norm": 9.972350453046829, "learning_rate": 2.639693713800368e-06, "loss": 0.071875, "step": 124515 }, { "epoch": 1.0766876205134412, "grad_norm": 0.4794233317745948, "learning_rate": 2.639491456174108e-06, "loss": 0.07219963073730469, "step": 124520 }, { "epoch": 1.0767308540349845, "grad_norm": 0.23009948904397323, "learning_rate": 2.639289200210482e-06, "loss": 0.08745498657226562, "step": 124525 }, { "epoch": 1.076774087556528, "grad_norm": 11.070381745726959, "learning_rate": 2.6390869459104205e-06, "loss": 0.08542938232421875, "step": 124530 }, { "epoch": 1.0768173210780712, "grad_norm": 17.644266228352866, "learning_rate": 2.63888469327486e-06, "loss": 0.09748916625976563, "step": 124535 }, { "epoch": 1.0768605545996144, "grad_norm": 19.130532937083096, "learning_rate": 2.6386824423047314e-06, "loss": 0.09362564086914063, "step": 124540 }, { "epoch": 1.0769037881211576, "grad_norm": 37.476952273096, "learning_rate": 2.638480193000967e-06, "loss": 0.07985649108886719, "step": 124545 }, { "epoch": 1.0769470216427008, "grad_norm": 2.3879532252495825, "learning_rate": 2.6382779453645e-06, "loss": 0.03216972351074219, "step": 124550 }, { "epoch": 1.076990255164244, "grad_norm": 0.5959114722571542, "learning_rate": 2.638075699396264e-06, "loss": 0.038553619384765626, "step": 124555 }, { "epoch": 1.0770334886857875, "grad_norm": 30.513172390321166, "learning_rate": 2.6378734550971887e-06, "loss": 0.19181442260742188, "step": 124560 }, { "epoch": 1.0770767222073308, "grad_norm": 22.1567893799865, "learning_rate": 2.6376712124682105e-06, "loss": 0.14278106689453124, "step": 124565 }, { "epoch": 1.077119955728874, "grad_norm": 1.217073974258837, "learning_rate": 2.637468971510261e-06, "loss": 0.04407501220703125, "step": 124570 }, { "epoch": 1.0771631892504172, "grad_norm": 1.2751554072255578, "learning_rate": 2.637266732224272e-06, "loss": 0.030742645263671875, "step": 124575 }, { "epoch": 1.0772064227719604, "grad_norm": 1.961971113628123, "learning_rate": 2.637064494611177e-06, "loss": 0.023044586181640625, "step": 124580 }, { "epoch": 1.0772496562935037, "grad_norm": 12.15591282376099, "learning_rate": 2.6368622586719087e-06, "loss": 0.06488838195800781, "step": 124585 }, { "epoch": 1.077292889815047, "grad_norm": 10.138730170325967, "learning_rate": 2.6366600244073995e-06, "loss": 0.15713233947753907, "step": 124590 }, { "epoch": 1.0773361233365903, "grad_norm": 0.5183097241613255, "learning_rate": 2.636457791818581e-06, "loss": 0.012454605102539063, "step": 124595 }, { "epoch": 1.0773793568581336, "grad_norm": 9.461931159866445, "learning_rate": 2.636255560906388e-06, "loss": 0.1485443115234375, "step": 124600 }, { "epoch": 1.0774225903796768, "grad_norm": 75.06838299415188, "learning_rate": 2.6360533316717525e-06, "loss": 0.2860321044921875, "step": 124605 }, { "epoch": 1.07746582390122, "grad_norm": 4.642062289020282, "learning_rate": 2.6358511041156066e-06, "loss": 0.29347896575927734, "step": 124610 }, { "epoch": 1.0775090574227633, "grad_norm": 3.7102224747611894, "learning_rate": 2.6356488782388835e-06, "loss": 0.37360153198242185, "step": 124615 }, { "epoch": 1.0775522909443065, "grad_norm": 1.8785547917818117, "learning_rate": 2.6354466540425154e-06, "loss": 0.10615882873535157, "step": 124620 }, { "epoch": 1.07759552446585, "grad_norm": 0.10479816602998836, "learning_rate": 2.6352444315274344e-06, "loss": 0.030888748168945313, "step": 124625 }, { "epoch": 1.0776387579873932, "grad_norm": 12.065269456014864, "learning_rate": 2.6350422106945744e-06, "loss": 0.08546714782714844, "step": 124630 }, { "epoch": 1.0776819915089364, "grad_norm": 1.627748420126682, "learning_rate": 2.634839991544867e-06, "loss": 0.1305643081665039, "step": 124635 }, { "epoch": 1.0777252250304796, "grad_norm": 4.932409250436669, "learning_rate": 2.634637774079246e-06, "loss": 0.04506702423095703, "step": 124640 }, { "epoch": 1.0777684585520229, "grad_norm": 0.2379282852888097, "learning_rate": 2.634435558298644e-06, "loss": 0.018762588500976562, "step": 124645 }, { "epoch": 1.077811692073566, "grad_norm": 24.127303228390804, "learning_rate": 2.634233344203992e-06, "loss": 0.07514076232910157, "step": 124650 }, { "epoch": 1.0778549255951093, "grad_norm": 0.4729935986828824, "learning_rate": 2.6340311317962233e-06, "loss": 0.10782976150512695, "step": 124655 }, { "epoch": 1.0778981591166528, "grad_norm": 55.147680737565416, "learning_rate": 2.6338289210762705e-06, "loss": 0.21872177124023437, "step": 124660 }, { "epoch": 1.077941392638196, "grad_norm": 3.822164846824936, "learning_rate": 2.6336267120450664e-06, "loss": 0.20695953369140624, "step": 124665 }, { "epoch": 1.0779846261597392, "grad_norm": 0.9102603636296116, "learning_rate": 2.6334245047035443e-06, "loss": 0.1569976806640625, "step": 124670 }, { "epoch": 1.0780278596812825, "grad_norm": 1.7519757932235231, "learning_rate": 2.633222299052636e-06, "loss": 0.06206855773925781, "step": 124675 }, { "epoch": 1.0780710932028257, "grad_norm": 15.523917138820822, "learning_rate": 2.6330200950932738e-06, "loss": 0.1507293701171875, "step": 124680 }, { "epoch": 1.078114326724369, "grad_norm": 1.7192034867241444, "learning_rate": 2.6328178928263896e-06, "loss": 0.1399555206298828, "step": 124685 }, { "epoch": 1.0781575602459124, "grad_norm": 0.1960495299378065, "learning_rate": 2.632615692252918e-06, "loss": 0.04328842163085937, "step": 124690 }, { "epoch": 1.0782007937674556, "grad_norm": 1.0683821512179874, "learning_rate": 2.632413493373789e-06, "loss": 0.012671661376953126, "step": 124695 }, { "epoch": 1.0782440272889988, "grad_norm": 51.1737683285928, "learning_rate": 2.6322112961899377e-06, "loss": 0.2769935607910156, "step": 124700 }, { "epoch": 1.078287260810542, "grad_norm": 0.39145161501035625, "learning_rate": 2.6320091007022953e-06, "loss": 0.013473129272460938, "step": 124705 }, { "epoch": 1.0783304943320853, "grad_norm": 2.251457785418518, "learning_rate": 2.6318069069117948e-06, "loss": 0.01701183319091797, "step": 124710 }, { "epoch": 1.0783737278536285, "grad_norm": 28.858457483197178, "learning_rate": 2.6316047148193675e-06, "loss": 0.1625232696533203, "step": 124715 }, { "epoch": 1.078416961375172, "grad_norm": 3.499207992520897, "learning_rate": 2.6314025244259464e-06, "loss": 0.047253990173339845, "step": 124720 }, { "epoch": 1.0784601948967152, "grad_norm": 3.8163734146688912, "learning_rate": 2.6312003357324645e-06, "loss": 0.07762794494628907, "step": 124725 }, { "epoch": 1.0785034284182584, "grad_norm": 1.450267132353513, "learning_rate": 2.6309981487398544e-06, "loss": 0.027927207946777343, "step": 124730 }, { "epoch": 1.0785466619398016, "grad_norm": 1.7172758838083506, "learning_rate": 2.6307959634490487e-06, "loss": 0.0701446533203125, "step": 124735 }, { "epoch": 1.0785898954613449, "grad_norm": 2.950947221696246, "learning_rate": 2.630593779860979e-06, "loss": 0.011151123046875, "step": 124740 }, { "epoch": 1.078633128982888, "grad_norm": 2.272856381719036, "learning_rate": 2.6303915979765783e-06, "loss": 0.03326988220214844, "step": 124745 }, { "epoch": 1.0786763625044316, "grad_norm": 19.489313132260982, "learning_rate": 2.6301894177967788e-06, "loss": 0.10294189453125, "step": 124750 }, { "epoch": 1.0787195960259748, "grad_norm": 5.359937320983228, "learning_rate": 2.629987239322512e-06, "loss": 0.13187332153320314, "step": 124755 }, { "epoch": 1.078762829547518, "grad_norm": 2.790041230418494, "learning_rate": 2.629785062554712e-06, "loss": 0.010089874267578125, "step": 124760 }, { "epoch": 1.0788060630690612, "grad_norm": 2.610160147091847, "learning_rate": 2.6295828874943115e-06, "loss": 0.02107391357421875, "step": 124765 }, { "epoch": 1.0788492965906045, "grad_norm": 0.25220389525089687, "learning_rate": 2.6293807141422413e-06, "loss": 0.0556182861328125, "step": 124770 }, { "epoch": 1.0788925301121477, "grad_norm": 2.2063982053712667, "learning_rate": 2.6291785424994344e-06, "loss": 0.02864227294921875, "step": 124775 }, { "epoch": 1.078935763633691, "grad_norm": 0.26468965370880737, "learning_rate": 2.628976372566824e-06, "loss": 0.028685760498046876, "step": 124780 }, { "epoch": 1.0789789971552344, "grad_norm": 4.135651798325665, "learning_rate": 2.62877420434534e-06, "loss": 0.05379180908203125, "step": 124785 }, { "epoch": 1.0790222306767776, "grad_norm": 0.42348462645305374, "learning_rate": 2.628572037835918e-06, "loss": 0.09991111755371093, "step": 124790 }, { "epoch": 1.0790654641983208, "grad_norm": 9.639487156359774, "learning_rate": 2.628369873039489e-06, "loss": 0.06065788269042969, "step": 124795 }, { "epoch": 1.079108697719864, "grad_norm": 0.3453293916975167, "learning_rate": 2.6281677099569848e-06, "loss": 0.039064788818359376, "step": 124800 }, { "epoch": 1.0791519312414073, "grad_norm": 14.896197922045241, "learning_rate": 2.627965548589339e-06, "loss": 0.15157546997070312, "step": 124805 }, { "epoch": 1.0791951647629505, "grad_norm": 2.2325037797472005, "learning_rate": 2.6277633889374828e-06, "loss": 0.04598388671875, "step": 124810 }, { "epoch": 1.079238398284494, "grad_norm": 0.44784302114145114, "learning_rate": 2.6275612310023477e-06, "loss": 0.055106735229492186, "step": 124815 }, { "epoch": 1.0792816318060372, "grad_norm": 28.53539884261944, "learning_rate": 2.627359074784869e-06, "loss": 0.16580963134765625, "step": 124820 }, { "epoch": 1.0793248653275804, "grad_norm": 0.9519230342490004, "learning_rate": 2.6271569202859766e-06, "loss": 0.03971786499023437, "step": 124825 }, { "epoch": 1.0793680988491237, "grad_norm": 1.0066359810992365, "learning_rate": 2.6269547675066036e-06, "loss": 0.03156585693359375, "step": 124830 }, { "epoch": 1.0794113323706669, "grad_norm": 4.60756994840534, "learning_rate": 2.626752616447683e-06, "loss": 0.07071876525878906, "step": 124835 }, { "epoch": 1.0794545658922101, "grad_norm": 0.8627866194066234, "learning_rate": 2.6265504671101456e-06, "loss": 0.31976966857910155, "step": 124840 }, { "epoch": 1.0794977994137533, "grad_norm": 3.162805495933327, "learning_rate": 2.626348319494925e-06, "loss": 0.06861648559570313, "step": 124845 }, { "epoch": 1.0795410329352968, "grad_norm": 2.640488529015642, "learning_rate": 2.6261461736029513e-06, "loss": 0.018317794799804686, "step": 124850 }, { "epoch": 1.07958426645684, "grad_norm": 0.47632310221999163, "learning_rate": 2.6259440294351603e-06, "loss": 0.17193222045898438, "step": 124855 }, { "epoch": 1.0796274999783833, "grad_norm": 0.061172589573833966, "learning_rate": 2.625741886992481e-06, "loss": 0.004595184326171875, "step": 124860 }, { "epoch": 1.0796707334999265, "grad_norm": 24.302490544414322, "learning_rate": 2.625539746275848e-06, "loss": 0.20144882202148437, "step": 124865 }, { "epoch": 1.0797139670214697, "grad_norm": 0.8891880948709387, "learning_rate": 2.6253376072861923e-06, "loss": 0.11515684127807617, "step": 124870 }, { "epoch": 1.079757200543013, "grad_norm": 2.095883460095654, "learning_rate": 2.625135470024447e-06, "loss": 0.04014663696289063, "step": 124875 }, { "epoch": 1.0798004340645564, "grad_norm": 59.278926450545995, "learning_rate": 2.6249333344915426e-06, "loss": 0.4886371612548828, "step": 124880 }, { "epoch": 1.0798436675860996, "grad_norm": 0.8127754272017229, "learning_rate": 2.6247312006884135e-06, "loss": 0.11948623657226562, "step": 124885 }, { "epoch": 1.0798869011076428, "grad_norm": 1.4144663902150316, "learning_rate": 2.62452906861599e-06, "loss": 0.10699691772460937, "step": 124890 }, { "epoch": 1.079930134629186, "grad_norm": 13.8559323653689, "learning_rate": 2.624326938275206e-06, "loss": 0.10378532409667969, "step": 124895 }, { "epoch": 1.0799733681507293, "grad_norm": 9.249379852735649, "learning_rate": 2.6241248096669937e-06, "loss": 0.030336761474609376, "step": 124900 }, { "epoch": 1.0800166016722725, "grad_norm": 1.3208412699119307, "learning_rate": 2.623922682792284e-06, "loss": 0.08553428649902343, "step": 124905 }, { "epoch": 1.0800598351938158, "grad_norm": 4.108253216420494, "learning_rate": 2.623720557652008e-06, "loss": 0.017301177978515624, "step": 124910 }, { "epoch": 1.0801030687153592, "grad_norm": 0.3115970827281073, "learning_rate": 2.6235184342471012e-06, "loss": 0.090606689453125, "step": 124915 }, { "epoch": 1.0801463022369024, "grad_norm": 3.8290729122051874, "learning_rate": 2.6233163125784937e-06, "loss": 0.008762359619140625, "step": 124920 }, { "epoch": 1.0801895357584457, "grad_norm": 2.1373413115159012, "learning_rate": 2.6231141926471185e-06, "loss": 0.07700424194335938, "step": 124925 }, { "epoch": 1.080232769279989, "grad_norm": 2.9867929287651345, "learning_rate": 2.6229120744539075e-06, "loss": 0.05775527954101563, "step": 124930 }, { "epoch": 1.0802760028015321, "grad_norm": 0.19619123907066013, "learning_rate": 2.6227099579997923e-06, "loss": 0.023382568359375, "step": 124935 }, { "epoch": 1.0803192363230754, "grad_norm": 15.468328399465674, "learning_rate": 2.622507843285705e-06, "loss": 0.12346458435058594, "step": 124940 }, { "epoch": 1.0803624698446188, "grad_norm": 2.8539872696875674, "learning_rate": 2.622305730312578e-06, "loss": 0.1478656768798828, "step": 124945 }, { "epoch": 1.080405703366162, "grad_norm": 0.13331386333544692, "learning_rate": 2.6221036190813443e-06, "loss": 0.08829574584960938, "step": 124950 }, { "epoch": 1.0804489368877053, "grad_norm": 0.20740788425686604, "learning_rate": 2.6219015095929355e-06, "loss": 0.040071868896484376, "step": 124955 }, { "epoch": 1.0804921704092485, "grad_norm": 3.84747960418852, "learning_rate": 2.6216994018482832e-06, "loss": 0.01425018310546875, "step": 124960 }, { "epoch": 1.0805354039307917, "grad_norm": 0.9971731658094368, "learning_rate": 2.6214972958483203e-06, "loss": 0.011871337890625, "step": 124965 }, { "epoch": 1.080578637452335, "grad_norm": 2.06289497724179, "learning_rate": 2.6212951915939774e-06, "loss": 0.01992034912109375, "step": 124970 }, { "epoch": 1.0806218709738784, "grad_norm": 0.16063436969788789, "learning_rate": 2.6210930890861873e-06, "loss": 0.025431060791015626, "step": 124975 }, { "epoch": 1.0806651044954216, "grad_norm": 0.45446046675468404, "learning_rate": 2.6208909883258837e-06, "loss": 0.020800209045410155, "step": 124980 }, { "epoch": 1.0807083380169649, "grad_norm": 1.7190587022781114, "learning_rate": 2.6206888893139972e-06, "loss": 0.03424835205078125, "step": 124985 }, { "epoch": 1.080751571538508, "grad_norm": 0.46094183822004775, "learning_rate": 2.62048679205146e-06, "loss": 0.06633186340332031, "step": 124990 }, { "epoch": 1.0807948050600513, "grad_norm": 8.078675633730644, "learning_rate": 2.6202846965392043e-06, "loss": 0.02910308837890625, "step": 124995 }, { "epoch": 1.0808380385815946, "grad_norm": 2.9270390214652413, "learning_rate": 2.620082602778161e-06, "loss": 0.08830413818359376, "step": 125000 }, { "epoch": 1.080881272103138, "grad_norm": 2.2062405955458058, "learning_rate": 2.6198805107692642e-06, "loss": 0.41790237426757815, "step": 125005 }, { "epoch": 1.0809245056246812, "grad_norm": 4.140095812202555, "learning_rate": 2.6196784205134436e-06, "loss": 0.030872344970703125, "step": 125010 }, { "epoch": 1.0809677391462245, "grad_norm": 7.419499067681822, "learning_rate": 2.6194763320116338e-06, "loss": 0.07973861694335938, "step": 125015 }, { "epoch": 1.0810109726677677, "grad_norm": 24.61970365460869, "learning_rate": 2.6192742452647657e-06, "loss": 0.29282684326171876, "step": 125020 }, { "epoch": 1.081054206189311, "grad_norm": 10.631349434261034, "learning_rate": 2.6190721602737702e-06, "loss": 0.09350624084472656, "step": 125025 }, { "epoch": 1.0810974397108541, "grad_norm": 11.408519201121534, "learning_rate": 2.618870077039581e-06, "loss": 0.024407196044921874, "step": 125030 }, { "epoch": 1.0811406732323974, "grad_norm": 2.3171881569915582, "learning_rate": 2.618667995563129e-06, "loss": 0.11470489501953125, "step": 125035 }, { "epoch": 1.0811839067539408, "grad_norm": 3.490794066448306, "learning_rate": 2.6184659158453454e-06, "loss": 0.08768196105957031, "step": 125040 }, { "epoch": 1.081227140275484, "grad_norm": 2.1035721291789526, "learning_rate": 2.618263837887165e-06, "loss": 0.09283599853515626, "step": 125045 }, { "epoch": 1.0812703737970273, "grad_norm": 3.664739120775382, "learning_rate": 2.6180617616895175e-06, "loss": 0.0361572265625, "step": 125050 }, { "epoch": 1.0813136073185705, "grad_norm": 0.3321344311737583, "learning_rate": 2.6178596872533352e-06, "loss": 0.05041990280151367, "step": 125055 }, { "epoch": 1.0813568408401137, "grad_norm": 14.649513671318081, "learning_rate": 2.6176576145795502e-06, "loss": 0.052829742431640625, "step": 125060 }, { "epoch": 1.081400074361657, "grad_norm": 0.6744633088189409, "learning_rate": 2.617455543669095e-06, "loss": 0.019431686401367186, "step": 125065 }, { "epoch": 1.0814433078832004, "grad_norm": 7.7825158616035655, "learning_rate": 2.6172534745229e-06, "loss": 0.026112747192382813, "step": 125070 }, { "epoch": 1.0814865414047437, "grad_norm": 4.219789732240645, "learning_rate": 2.617051407141899e-06, "loss": 0.04156646728515625, "step": 125075 }, { "epoch": 1.0815297749262869, "grad_norm": 0.555654617796529, "learning_rate": 2.6168493415270233e-06, "loss": 0.03400611877441406, "step": 125080 }, { "epoch": 1.08157300844783, "grad_norm": 1.717355596701259, "learning_rate": 2.616647277679204e-06, "loss": 0.020787429809570313, "step": 125085 }, { "epoch": 1.0816162419693733, "grad_norm": 15.928426150928312, "learning_rate": 2.6164452155993743e-06, "loss": 0.11854381561279297, "step": 125090 }, { "epoch": 1.0816594754909166, "grad_norm": 9.191704179076087, "learning_rate": 2.616243155288465e-06, "loss": 0.06920356750488281, "step": 125095 }, { "epoch": 1.0817027090124598, "grad_norm": 1.1869058153376189, "learning_rate": 2.6160410967474083e-06, "loss": 0.093341064453125, "step": 125100 }, { "epoch": 1.0817459425340032, "grad_norm": 1.412536669662038, "learning_rate": 2.6158390399771354e-06, "loss": 0.3984569549560547, "step": 125105 }, { "epoch": 1.0817891760555465, "grad_norm": 8.150250819203448, "learning_rate": 2.61563698497858e-06, "loss": 0.09862136840820312, "step": 125110 }, { "epoch": 1.0818324095770897, "grad_norm": 14.245030420590913, "learning_rate": 2.615434931752672e-06, "loss": 0.07506675720214843, "step": 125115 }, { "epoch": 1.081875643098633, "grad_norm": 1.5668471575590153, "learning_rate": 2.615232880300345e-06, "loss": 0.08513565063476562, "step": 125120 }, { "epoch": 1.0819188766201762, "grad_norm": 1.824073075447018, "learning_rate": 2.6150308306225296e-06, "loss": 0.01719512939453125, "step": 125125 }, { "epoch": 1.0819621101417194, "grad_norm": 20.9200025285676, "learning_rate": 2.614828782720158e-06, "loss": 0.18225669860839844, "step": 125130 }, { "epoch": 1.0820053436632628, "grad_norm": 0.6694354742770925, "learning_rate": 2.6146267365941613e-06, "loss": 0.08328304290771485, "step": 125135 }, { "epoch": 1.082048577184806, "grad_norm": 5.876922309548196, "learning_rate": 2.614424692245473e-06, "loss": 0.040305709838867186, "step": 125140 }, { "epoch": 1.0820918107063493, "grad_norm": 1.0523085769093115, "learning_rate": 2.6142226496750228e-06, "loss": 0.0391021728515625, "step": 125145 }, { "epoch": 1.0821350442278925, "grad_norm": 0.38581956840268933, "learning_rate": 2.6140206088837447e-06, "loss": 0.0222503662109375, "step": 125150 }, { "epoch": 1.0821782777494358, "grad_norm": 0.8013188928974595, "learning_rate": 2.6138185698725695e-06, "loss": 0.0255828857421875, "step": 125155 }, { "epoch": 1.082221511270979, "grad_norm": 6.307950148439616, "learning_rate": 2.613616532642429e-06, "loss": 0.2179168701171875, "step": 125160 }, { "epoch": 1.0822647447925222, "grad_norm": 35.30400014365819, "learning_rate": 2.6134144971942537e-06, "loss": 0.2283203125, "step": 125165 }, { "epoch": 1.0823079783140657, "grad_norm": 0.0936447675795698, "learning_rate": 2.6132124635289774e-06, "loss": 0.017731475830078124, "step": 125170 }, { "epoch": 1.082351211835609, "grad_norm": 0.5731151195715691, "learning_rate": 2.6130104316475313e-06, "loss": 0.0534881591796875, "step": 125175 }, { "epoch": 1.0823944453571521, "grad_norm": 7.403868332382267, "learning_rate": 2.6128084015508468e-06, "loss": 0.037441253662109375, "step": 125180 }, { "epoch": 1.0824376788786954, "grad_norm": 7.66005827573753, "learning_rate": 2.612606373239856e-06, "loss": 0.05689239501953125, "step": 125185 }, { "epoch": 1.0824809124002386, "grad_norm": 4.971510711617409, "learning_rate": 2.6124043467154905e-06, "loss": 0.06424560546875, "step": 125190 }, { "epoch": 1.0825241459217818, "grad_norm": 16.552889164794284, "learning_rate": 2.612202321978681e-06, "loss": 0.10007495880126953, "step": 125195 }, { "epoch": 1.0825673794433253, "grad_norm": 1.5231402330906376, "learning_rate": 2.6120002990303594e-06, "loss": 0.1982147216796875, "step": 125200 }, { "epoch": 1.0826106129648685, "grad_norm": 0.13474845119810036, "learning_rate": 2.6117982778714594e-06, "loss": 0.10719985961914062, "step": 125205 }, { "epoch": 1.0826538464864117, "grad_norm": 1.8569249831925114, "learning_rate": 2.6115962585029113e-06, "loss": 0.25894737243652344, "step": 125210 }, { "epoch": 1.082697080007955, "grad_norm": 2.283509167703708, "learning_rate": 2.6113942409256473e-06, "loss": 0.2703582763671875, "step": 125215 }, { "epoch": 1.0827403135294982, "grad_norm": 0.07765848235489972, "learning_rate": 2.611192225140598e-06, "loss": 0.26209564208984376, "step": 125220 }, { "epoch": 1.0827835470510414, "grad_norm": 22.314340627955364, "learning_rate": 2.610990211148697e-06, "loss": 0.04214324951171875, "step": 125225 }, { "epoch": 1.0828267805725849, "grad_norm": 9.579700147338169, "learning_rate": 2.6107881989508722e-06, "loss": 0.02948474884033203, "step": 125230 }, { "epoch": 1.082870014094128, "grad_norm": 10.961494399132148, "learning_rate": 2.6105861885480603e-06, "loss": 0.09084129333496094, "step": 125235 }, { "epoch": 1.0829132476156713, "grad_norm": 4.102115843366206, "learning_rate": 2.61038417994119e-06, "loss": 0.04121971130371094, "step": 125240 }, { "epoch": 1.0829564811372145, "grad_norm": 4.175489720716708, "learning_rate": 2.610182173131193e-06, "loss": 0.0356353759765625, "step": 125245 }, { "epoch": 1.0829997146587578, "grad_norm": 4.067654974183416, "learning_rate": 2.609980168119002e-06, "loss": 0.037192249298095705, "step": 125250 }, { "epoch": 1.083042948180301, "grad_norm": 4.295628990869322, "learning_rate": 2.6097781649055476e-06, "loss": 0.033149337768554686, "step": 125255 }, { "epoch": 1.0830861817018445, "grad_norm": 0.04552967220944727, "learning_rate": 2.6095761634917607e-06, "loss": 0.032209014892578124, "step": 125260 }, { "epoch": 1.0831294152233877, "grad_norm": 12.728831286220741, "learning_rate": 2.6093741638785755e-06, "loss": 0.09433517456054688, "step": 125265 }, { "epoch": 1.083172648744931, "grad_norm": 0.8425186211596181, "learning_rate": 2.609172166066922e-06, "loss": 0.0553192138671875, "step": 125270 }, { "epoch": 1.0832158822664741, "grad_norm": 3.6511944565655607, "learning_rate": 2.608970170057732e-06, "loss": 0.05721588134765625, "step": 125275 }, { "epoch": 1.0832591157880174, "grad_norm": 6.970418932993234, "learning_rate": 2.608768175851936e-06, "loss": 0.05053558349609375, "step": 125280 }, { "epoch": 1.0833023493095606, "grad_norm": 3.710191541392522, "learning_rate": 2.608566183450468e-06, "loss": 0.030975532531738282, "step": 125285 }, { "epoch": 1.0833455828311038, "grad_norm": 1.5130817616180303, "learning_rate": 2.6083641928542577e-06, "loss": 0.12900390625, "step": 125290 }, { "epoch": 1.0833888163526473, "grad_norm": 2.071829123392011, "learning_rate": 2.6081622040642356e-06, "loss": 0.05002670288085938, "step": 125295 }, { "epoch": 1.0834320498741905, "grad_norm": 11.19463641925144, "learning_rate": 2.6079602170813366e-06, "loss": 0.3891937255859375, "step": 125300 }, { "epoch": 1.0834752833957337, "grad_norm": 8.108067862510607, "learning_rate": 2.6077582319064905e-06, "loss": 0.028661346435546874, "step": 125305 }, { "epoch": 1.083518516917277, "grad_norm": 24.456212948205383, "learning_rate": 2.607556248540628e-06, "loss": 0.05661220550537109, "step": 125310 }, { "epoch": 1.0835617504388202, "grad_norm": 8.224029878322835, "learning_rate": 2.6073542669846814e-06, "loss": 0.05932273864746094, "step": 125315 }, { "epoch": 1.0836049839603634, "grad_norm": 1.4547061258160632, "learning_rate": 2.6071522872395833e-06, "loss": 0.038336181640625, "step": 125320 }, { "epoch": 1.0836482174819069, "grad_norm": 13.486574967702964, "learning_rate": 2.606950309306262e-06, "loss": 0.0444854736328125, "step": 125325 }, { "epoch": 1.08369145100345, "grad_norm": 0.7140365226714938, "learning_rate": 2.606748333185653e-06, "loss": 0.01970367431640625, "step": 125330 }, { "epoch": 1.0837346845249933, "grad_norm": 1.9456271825381763, "learning_rate": 2.6065463588786855e-06, "loss": 0.04253311157226562, "step": 125335 }, { "epoch": 1.0837779180465366, "grad_norm": 44.677623086843695, "learning_rate": 2.606344386386291e-06, "loss": 0.27633514404296877, "step": 125340 }, { "epoch": 1.0838211515680798, "grad_norm": 0.4851236739225348, "learning_rate": 2.606142415709402e-06, "loss": 0.15786666870117189, "step": 125345 }, { "epoch": 1.083864385089623, "grad_norm": 5.8601845296320985, "learning_rate": 2.6059404468489492e-06, "loss": 0.07321510314941407, "step": 125350 }, { "epoch": 1.0839076186111662, "grad_norm": 0.2710480361512436, "learning_rate": 2.6057384798058644e-06, "loss": 0.17398109436035156, "step": 125355 }, { "epoch": 1.0839508521327097, "grad_norm": 10.825864262862236, "learning_rate": 2.6055365145810777e-06, "loss": 0.0723388671875, "step": 125360 }, { "epoch": 1.083994085654253, "grad_norm": 1.9767638730942587, "learning_rate": 2.605334551175523e-06, "loss": 0.041424560546875, "step": 125365 }, { "epoch": 1.0840373191757962, "grad_norm": 5.25986844481743, "learning_rate": 2.6051325895901298e-06, "loss": 0.09025115966796875, "step": 125370 }, { "epoch": 1.0840805526973394, "grad_norm": 2.720503093259128, "learning_rate": 2.604930629825831e-06, "loss": 0.11331710815429688, "step": 125375 }, { "epoch": 1.0841237862188826, "grad_norm": 38.40289342942509, "learning_rate": 2.604728671883557e-06, "loss": 0.06724395751953124, "step": 125380 }, { "epoch": 1.0841670197404258, "grad_norm": 2.986508159349019, "learning_rate": 2.6045267157642395e-06, "loss": 0.044049072265625, "step": 125385 }, { "epoch": 1.0842102532619693, "grad_norm": 2.57855249016782, "learning_rate": 2.6043247614688083e-06, "loss": 0.040191650390625, "step": 125390 }, { "epoch": 1.0842534867835125, "grad_norm": 0.09828026328542276, "learning_rate": 2.604122808998198e-06, "loss": 0.056708335876464844, "step": 125395 }, { "epoch": 1.0842967203050558, "grad_norm": 0.5799755552480088, "learning_rate": 2.6039208583533375e-06, "loss": 0.0418426513671875, "step": 125400 }, { "epoch": 1.084339953826599, "grad_norm": 0.7618957013634301, "learning_rate": 2.60371890953516e-06, "loss": 0.0414581298828125, "step": 125405 }, { "epoch": 1.0843831873481422, "grad_norm": 15.188841289789643, "learning_rate": 2.6035169625445952e-06, "loss": 0.171661376953125, "step": 125410 }, { "epoch": 1.0844264208696854, "grad_norm": 0.6603321246394966, "learning_rate": 2.603315017382576e-06, "loss": 0.011782073974609375, "step": 125415 }, { "epoch": 1.0844696543912287, "grad_norm": 0.5591100122377758, "learning_rate": 2.603113074050031e-06, "loss": 0.11857109069824219, "step": 125420 }, { "epoch": 1.0845128879127721, "grad_norm": 9.369667708720826, "learning_rate": 2.602911132547894e-06, "loss": 0.03488779067993164, "step": 125425 }, { "epoch": 1.0845561214343153, "grad_norm": 1.590202093414969, "learning_rate": 2.6027091928770967e-06, "loss": 0.024907684326171874, "step": 125430 }, { "epoch": 1.0845993549558586, "grad_norm": 0.8041284897661509, "learning_rate": 2.6025072550385695e-06, "loss": 0.04776487350463867, "step": 125435 }, { "epoch": 1.0846425884774018, "grad_norm": 3.218875310553519, "learning_rate": 2.602305319033244e-06, "loss": 0.07965240478515626, "step": 125440 }, { "epoch": 1.084685821998945, "grad_norm": 6.536209801700542, "learning_rate": 2.60210338486205e-06, "loss": 0.031595611572265626, "step": 125445 }, { "epoch": 1.0847290555204885, "grad_norm": 3.619449434019156, "learning_rate": 2.601901452525921e-06, "loss": 0.15724334716796876, "step": 125450 }, { "epoch": 1.0847722890420317, "grad_norm": 0.5479864358063962, "learning_rate": 2.6016995220257856e-06, "loss": 0.036583900451660156, "step": 125455 }, { "epoch": 1.084815522563575, "grad_norm": 14.736642810937068, "learning_rate": 2.6014975933625784e-06, "loss": 0.05168895721435547, "step": 125460 }, { "epoch": 1.0848587560851182, "grad_norm": 0.9412809246818354, "learning_rate": 2.6012956665372295e-06, "loss": 0.0731719970703125, "step": 125465 }, { "epoch": 1.0849019896066614, "grad_norm": 21.67963356989237, "learning_rate": 2.6010937415506693e-06, "loss": 0.09212875366210938, "step": 125470 }, { "epoch": 1.0849452231282046, "grad_norm": 41.9840432314756, "learning_rate": 2.600891818403829e-06, "loss": 0.0992584228515625, "step": 125475 }, { "epoch": 1.0849884566497479, "grad_norm": 8.882006699219856, "learning_rate": 2.600689897097641e-06, "loss": 0.08808746337890624, "step": 125480 }, { "epoch": 1.0850316901712913, "grad_norm": 1.3757775986580874, "learning_rate": 2.600487977633034e-06, "loss": 0.19360580444335937, "step": 125485 }, { "epoch": 1.0850749236928345, "grad_norm": 26.3658770084373, "learning_rate": 2.600286060010943e-06, "loss": 0.1108154296875, "step": 125490 }, { "epoch": 1.0851181572143778, "grad_norm": 1.8222006745160984, "learning_rate": 2.6000841442322975e-06, "loss": 0.03721542358398437, "step": 125495 }, { "epoch": 1.085161390735921, "grad_norm": 9.48418491814481, "learning_rate": 2.5998822302980285e-06, "loss": 0.11158905029296876, "step": 125500 }, { "epoch": 1.0852046242574642, "grad_norm": 3.819324319633642, "learning_rate": 2.5996803182090666e-06, "loss": 0.01661376953125, "step": 125505 }, { "epoch": 1.0852478577790075, "grad_norm": 0.25915298086345473, "learning_rate": 2.5994784079663447e-06, "loss": 0.04110565185546875, "step": 125510 }, { "epoch": 1.085291091300551, "grad_norm": 8.680264609721673, "learning_rate": 2.5992764995707908e-06, "loss": 0.36189994812011717, "step": 125515 }, { "epoch": 1.0853343248220941, "grad_norm": 0.07098922188121701, "learning_rate": 2.5990745930233403e-06, "loss": 0.029152297973632814, "step": 125520 }, { "epoch": 1.0853775583436374, "grad_norm": 1.5452545683008125, "learning_rate": 2.5988726883249225e-06, "loss": 0.0358917236328125, "step": 125525 }, { "epoch": 1.0854207918651806, "grad_norm": 15.868925478794822, "learning_rate": 2.598670785476468e-06, "loss": 0.27010955810546877, "step": 125530 }, { "epoch": 1.0854640253867238, "grad_norm": 0.7760719034676486, "learning_rate": 2.598468884478908e-06, "loss": 0.016503143310546874, "step": 125535 }, { "epoch": 1.085507258908267, "grad_norm": 6.59219030653389, "learning_rate": 2.5982669853331742e-06, "loss": 0.20447196960449218, "step": 125540 }, { "epoch": 1.0855504924298103, "grad_norm": 0.206877028553668, "learning_rate": 2.598065088040198e-06, "loss": 0.059452056884765625, "step": 125545 }, { "epoch": 1.0855937259513537, "grad_norm": 16.35130299255665, "learning_rate": 2.5978631926009085e-06, "loss": 0.06188201904296875, "step": 125550 }, { "epoch": 1.085636959472897, "grad_norm": 0.9652498187469307, "learning_rate": 2.59766129901624e-06, "loss": 0.02022552490234375, "step": 125555 }, { "epoch": 1.0856801929944402, "grad_norm": 4.1036302808501, "learning_rate": 2.597459407287122e-06, "loss": 0.2531440734863281, "step": 125560 }, { "epoch": 1.0857234265159834, "grad_norm": 32.06230029818644, "learning_rate": 2.597257517414485e-06, "loss": 0.08999176025390625, "step": 125565 }, { "epoch": 1.0857666600375266, "grad_norm": 2.6141805422546533, "learning_rate": 2.5970556293992612e-06, "loss": 0.0483154296875, "step": 125570 }, { "epoch": 1.0858098935590699, "grad_norm": 1.1343089226212442, "learning_rate": 2.596853743242381e-06, "loss": 0.41386566162109373, "step": 125575 }, { "epoch": 1.0858531270806133, "grad_norm": 9.550181809083755, "learning_rate": 2.596651858944775e-06, "loss": 0.158154296875, "step": 125580 }, { "epoch": 1.0858963606021566, "grad_norm": 0.9244062782980864, "learning_rate": 2.596449976507376e-06, "loss": 0.03930397033691406, "step": 125585 }, { "epoch": 1.0859395941236998, "grad_norm": 34.44265353554068, "learning_rate": 2.5962480959311144e-06, "loss": 0.039190673828125, "step": 125590 }, { "epoch": 1.085982827645243, "grad_norm": 0.11612874234295134, "learning_rate": 2.5960462172169197e-06, "loss": 0.0797698974609375, "step": 125595 }, { "epoch": 1.0860260611667862, "grad_norm": 12.068750527726598, "learning_rate": 2.5958443403657252e-06, "loss": 0.08061676025390625, "step": 125600 }, { "epoch": 1.0860692946883295, "grad_norm": 22.690009959052315, "learning_rate": 2.5956424653784606e-06, "loss": 0.15888519287109376, "step": 125605 }, { "epoch": 1.0861125282098727, "grad_norm": 0.16233542449136917, "learning_rate": 2.5954405922560572e-06, "loss": 0.008541297912597657, "step": 125610 }, { "epoch": 1.0861557617314161, "grad_norm": 3.2181106070845136, "learning_rate": 2.595238720999445e-06, "loss": 0.39173431396484376, "step": 125615 }, { "epoch": 1.0861989952529594, "grad_norm": 1.734213273511591, "learning_rate": 2.5950368516095568e-06, "loss": 0.023504638671875, "step": 125620 }, { "epoch": 1.0862422287745026, "grad_norm": 3.9828893994629397, "learning_rate": 2.594834984087323e-06, "loss": 0.02402839660644531, "step": 125625 }, { "epoch": 1.0862854622960458, "grad_norm": 9.186432071349438, "learning_rate": 2.5946331184336747e-06, "loss": 0.09586544036865234, "step": 125630 }, { "epoch": 1.086328695817589, "grad_norm": 28.502076064464486, "learning_rate": 2.594431254649543e-06, "loss": 0.058551025390625, "step": 125635 }, { "epoch": 1.0863719293391323, "grad_norm": 53.30380065743373, "learning_rate": 2.594229392735858e-06, "loss": 0.12749481201171875, "step": 125640 }, { "epoch": 1.0864151628606757, "grad_norm": 0.31744816175125784, "learning_rate": 2.5940275326935506e-06, "loss": 0.01625823974609375, "step": 125645 }, { "epoch": 1.086458396382219, "grad_norm": 1.1635541420475868, "learning_rate": 2.5938256745235526e-06, "loss": 0.03060150146484375, "step": 125650 }, { "epoch": 1.0865016299037622, "grad_norm": 10.45908223488004, "learning_rate": 2.593623818226795e-06, "loss": 0.09137992858886719, "step": 125655 }, { "epoch": 1.0865448634253054, "grad_norm": 0.7213854808800897, "learning_rate": 2.593421963804209e-06, "loss": 0.11598587036132812, "step": 125660 }, { "epoch": 1.0865880969468487, "grad_norm": 17.28802525662125, "learning_rate": 2.5932201112567245e-06, "loss": 0.11108245849609374, "step": 125665 }, { "epoch": 1.0866313304683919, "grad_norm": 0.7807072352832423, "learning_rate": 2.593018260585272e-06, "loss": 0.13138465881347655, "step": 125670 }, { "epoch": 1.0866745639899353, "grad_norm": 3.367386130738581, "learning_rate": 2.592816411790784e-06, "loss": 0.024263763427734376, "step": 125675 }, { "epoch": 1.0867177975114786, "grad_norm": 7.442398657203446, "learning_rate": 2.592614564874191e-06, "loss": 0.15803375244140624, "step": 125680 }, { "epoch": 1.0867610310330218, "grad_norm": 6.261962264737084, "learning_rate": 2.592412719836423e-06, "loss": 0.03526229858398437, "step": 125685 }, { "epoch": 1.086804264554565, "grad_norm": 0.7188987077094737, "learning_rate": 2.5922108766784124e-06, "loss": 0.06363067626953126, "step": 125690 }, { "epoch": 1.0868474980761083, "grad_norm": 12.608641314360774, "learning_rate": 2.5920090354010893e-06, "loss": 0.12275543212890624, "step": 125695 }, { "epoch": 1.0868907315976515, "grad_norm": 2.3397810214353703, "learning_rate": 2.5918071960053835e-06, "loss": 0.031452178955078125, "step": 125700 }, { "epoch": 1.086933965119195, "grad_norm": 0.37432189724128606, "learning_rate": 2.591605358492228e-06, "loss": 0.1731048583984375, "step": 125705 }, { "epoch": 1.0869771986407382, "grad_norm": 8.62121571878457, "learning_rate": 2.5914035228625503e-06, "loss": 0.09457559585571289, "step": 125710 }, { "epoch": 1.0870204321622814, "grad_norm": 1.8448187251962973, "learning_rate": 2.5912016891172856e-06, "loss": 0.037371063232421876, "step": 125715 }, { "epoch": 1.0870636656838246, "grad_norm": 6.883146413521629, "learning_rate": 2.590999857257362e-06, "loss": 0.09587516784667968, "step": 125720 }, { "epoch": 1.0871068992053678, "grad_norm": 0.16708358073581459, "learning_rate": 2.5907980272837115e-06, "loss": 0.05343437194824219, "step": 125725 }, { "epoch": 1.087150132726911, "grad_norm": 1.2007001552721426, "learning_rate": 2.5905961991972635e-06, "loss": 0.022403526306152343, "step": 125730 }, { "epoch": 1.0871933662484543, "grad_norm": 7.128886613289803, "learning_rate": 2.5903943729989497e-06, "loss": 0.2631385803222656, "step": 125735 }, { "epoch": 1.0872365997699978, "grad_norm": 6.978218211607044, "learning_rate": 2.5901925486897002e-06, "loss": 0.059290313720703126, "step": 125740 }, { "epoch": 1.087279833291541, "grad_norm": 0.8502200587878918, "learning_rate": 2.5899907262704477e-06, "loss": 0.13589935302734374, "step": 125745 }, { "epoch": 1.0873230668130842, "grad_norm": 2.9069486990287494, "learning_rate": 2.5897889057421216e-06, "loss": 0.264874267578125, "step": 125750 }, { "epoch": 1.0873663003346274, "grad_norm": 0.5650090826419563, "learning_rate": 2.589587087105653e-06, "loss": 0.06066970825195313, "step": 125755 }, { "epoch": 1.0874095338561707, "grad_norm": 10.392711971680951, "learning_rate": 2.5893852703619713e-06, "loss": 0.08326892852783203, "step": 125760 }, { "epoch": 1.087452767377714, "grad_norm": 22.980759182683588, "learning_rate": 2.5891834555120093e-06, "loss": 0.10613479614257812, "step": 125765 }, { "epoch": 1.0874960008992574, "grad_norm": 29.230547741685584, "learning_rate": 2.5889816425566957e-06, "loss": 0.2099853515625, "step": 125770 }, { "epoch": 1.0875392344208006, "grad_norm": 1.0299391336253445, "learning_rate": 2.588779831496964e-06, "loss": 0.16592979431152344, "step": 125775 }, { "epoch": 1.0875824679423438, "grad_norm": 1.1815605996836598, "learning_rate": 2.588578022333743e-06, "loss": 0.07610397338867188, "step": 125780 }, { "epoch": 1.087625701463887, "grad_norm": 2.5732292646893566, "learning_rate": 2.588376215067964e-06, "loss": 0.09249725341796874, "step": 125785 }, { "epoch": 1.0876689349854303, "grad_norm": 10.052278078822873, "learning_rate": 2.588174409700557e-06, "loss": 0.10550537109375, "step": 125790 }, { "epoch": 1.0877121685069735, "grad_norm": 15.691783136931932, "learning_rate": 2.5879726062324538e-06, "loss": 0.10125732421875, "step": 125795 }, { "epoch": 1.0877554020285167, "grad_norm": 61.434044164253336, "learning_rate": 2.5877708046645846e-06, "loss": 0.10738458633422851, "step": 125800 }, { "epoch": 1.0877986355500602, "grad_norm": 2.2043637747092295, "learning_rate": 2.5875690049978787e-06, "loss": 0.012194061279296875, "step": 125805 }, { "epoch": 1.0878418690716034, "grad_norm": 6.276900243875299, "learning_rate": 2.5873672072332696e-06, "loss": 0.015412139892578124, "step": 125810 }, { "epoch": 1.0878851025931466, "grad_norm": 10.547331771500145, "learning_rate": 2.587165411371686e-06, "loss": 0.07340660095214843, "step": 125815 }, { "epoch": 1.0879283361146899, "grad_norm": 2.300326973677208, "learning_rate": 2.5869636174140585e-06, "loss": 0.48090248107910155, "step": 125820 }, { "epoch": 1.087971569636233, "grad_norm": 2.0946694405008945, "learning_rate": 2.586761825361319e-06, "loss": 0.09488677978515625, "step": 125825 }, { "epoch": 1.0880148031577763, "grad_norm": 55.442706583513164, "learning_rate": 2.5865600352143975e-06, "loss": 0.11649932861328124, "step": 125830 }, { "epoch": 1.0880580366793198, "grad_norm": 43.997927924885005, "learning_rate": 2.5863582469742237e-06, "loss": 0.598541259765625, "step": 125835 }, { "epoch": 1.088101270200863, "grad_norm": 3.8382795565373136, "learning_rate": 2.5861564606417295e-06, "loss": 0.18003997802734376, "step": 125840 }, { "epoch": 1.0881445037224062, "grad_norm": 0.4209106050753982, "learning_rate": 2.585954676217845e-06, "loss": 0.2174509048461914, "step": 125845 }, { "epoch": 1.0881877372439495, "grad_norm": 4.888630611125046, "learning_rate": 2.585752893703502e-06, "loss": 0.03318367004394531, "step": 125850 }, { "epoch": 1.0882309707654927, "grad_norm": 1.1094050707195735, "learning_rate": 2.585551113099629e-06, "loss": 0.053081512451171875, "step": 125855 }, { "epoch": 1.088274204287036, "grad_norm": 0.661462857614849, "learning_rate": 2.5853493344071586e-06, "loss": 0.06896438598632812, "step": 125860 }, { "epoch": 1.0883174378085791, "grad_norm": 4.196879738145282, "learning_rate": 2.5851475576270187e-06, "loss": 0.29764862060546876, "step": 125865 }, { "epoch": 1.0883606713301226, "grad_norm": 2.9948941472817134, "learning_rate": 2.5849457827601433e-06, "loss": 0.03280181884765625, "step": 125870 }, { "epoch": 1.0884039048516658, "grad_norm": 3.568815531064413, "learning_rate": 2.5847440098074603e-06, "loss": 0.047339248657226565, "step": 125875 }, { "epoch": 1.088447138373209, "grad_norm": 2.111426283406367, "learning_rate": 2.584542238769902e-06, "loss": 0.0371429443359375, "step": 125880 }, { "epoch": 1.0884903718947523, "grad_norm": 0.43049947906568375, "learning_rate": 2.584340469648398e-06, "loss": 0.05040130615234375, "step": 125885 }, { "epoch": 1.0885336054162955, "grad_norm": 4.383535805672127, "learning_rate": 2.584138702443879e-06, "loss": 0.03286552429199219, "step": 125890 }, { "epoch": 1.0885768389378387, "grad_norm": 1.4921399349437419, "learning_rate": 2.583936937157275e-06, "loss": 0.03332443237304687, "step": 125895 }, { "epoch": 1.0886200724593822, "grad_norm": 9.25501585750999, "learning_rate": 2.5837351737895172e-06, "loss": 0.051129150390625, "step": 125900 }, { "epoch": 1.0886633059809254, "grad_norm": 4.142436550512779, "learning_rate": 2.5835334123415364e-06, "loss": 0.014232635498046875, "step": 125905 }, { "epoch": 1.0887065395024687, "grad_norm": 5.643951517900261, "learning_rate": 2.5833316528142625e-06, "loss": 0.032705497741699216, "step": 125910 }, { "epoch": 1.0887497730240119, "grad_norm": 0.3157689347439976, "learning_rate": 2.5831298952086268e-06, "loss": 0.06941680908203125, "step": 125915 }, { "epoch": 1.088793006545555, "grad_norm": 9.846832147540429, "learning_rate": 2.582928139525559e-06, "loss": 0.07600860595703125, "step": 125920 }, { "epoch": 1.0888362400670983, "grad_norm": 22.53324871357279, "learning_rate": 2.582726385765989e-06, "loss": 0.0928466796875, "step": 125925 }, { "epoch": 1.0888794735886418, "grad_norm": 2.49812250431258, "learning_rate": 2.582524633930848e-06, "loss": 0.022971343994140626, "step": 125930 }, { "epoch": 1.088922707110185, "grad_norm": 12.420501582027727, "learning_rate": 2.5823228840210664e-06, "loss": 0.1315399169921875, "step": 125935 }, { "epoch": 1.0889659406317282, "grad_norm": 0.27584257894341274, "learning_rate": 2.582121136037576e-06, "loss": 0.0465362548828125, "step": 125940 }, { "epoch": 1.0890091741532715, "grad_norm": 55.462059663508924, "learning_rate": 2.5819193899813056e-06, "loss": 0.11120681762695313, "step": 125945 }, { "epoch": 1.0890524076748147, "grad_norm": 0.327248180426448, "learning_rate": 2.581717645853186e-06, "loss": 0.042401123046875, "step": 125950 }, { "epoch": 1.089095641196358, "grad_norm": 3.995715690384805, "learning_rate": 2.581515903654147e-06, "loss": 0.035533905029296875, "step": 125955 }, { "epoch": 1.0891388747179014, "grad_norm": 0.404573883387141, "learning_rate": 2.5813141633851205e-06, "loss": 0.007073211669921875, "step": 125960 }, { "epoch": 1.0891821082394446, "grad_norm": 5.659531992941227, "learning_rate": 2.5811124250470343e-06, "loss": 0.0569580078125, "step": 125965 }, { "epoch": 1.0892253417609878, "grad_norm": 8.106490867180892, "learning_rate": 2.5809106886408223e-06, "loss": 0.03275909423828125, "step": 125970 }, { "epoch": 1.089268575282531, "grad_norm": 8.205949845166607, "learning_rate": 2.5807089541674133e-06, "loss": 0.03855476379394531, "step": 125975 }, { "epoch": 1.0893118088040743, "grad_norm": 6.119972666477863, "learning_rate": 2.5805072216277373e-06, "loss": 0.08980941772460938, "step": 125980 }, { "epoch": 1.0893550423256175, "grad_norm": 10.070431123548651, "learning_rate": 2.5803054910227244e-06, "loss": 0.059398651123046875, "step": 125985 }, { "epoch": 1.0893982758471608, "grad_norm": 9.807062371819022, "learning_rate": 2.580103762353306e-06, "loss": 0.1686767578125, "step": 125990 }, { "epoch": 1.0894415093687042, "grad_norm": 0.7629929309617719, "learning_rate": 2.5799020356204105e-06, "loss": 0.04203166961669922, "step": 125995 }, { "epoch": 1.0894847428902474, "grad_norm": 1.130117939371068, "learning_rate": 2.5797003108249717e-06, "loss": 0.01276092529296875, "step": 126000 }, { "epoch": 1.0895279764117907, "grad_norm": 1.4312817486187108, "learning_rate": 2.5794985879679174e-06, "loss": 0.11271200180053711, "step": 126005 }, { "epoch": 1.089571209933334, "grad_norm": 26.121283416517006, "learning_rate": 2.5792968670501784e-06, "loss": 0.12481155395507812, "step": 126010 }, { "epoch": 1.0896144434548771, "grad_norm": 6.659806268262051, "learning_rate": 2.5790951480726845e-06, "loss": 0.04219398498535156, "step": 126015 }, { "epoch": 1.0896576769764204, "grad_norm": 3.0621291380454805, "learning_rate": 2.5788934310363673e-06, "loss": 0.190380859375, "step": 126020 }, { "epoch": 1.0897009104979638, "grad_norm": 3.251282436575269, "learning_rate": 2.5786917159421553e-06, "loss": 0.048781967163085936, "step": 126025 }, { "epoch": 1.089744144019507, "grad_norm": 1.209034108844813, "learning_rate": 2.5784900027909804e-06, "loss": 0.0175323486328125, "step": 126030 }, { "epoch": 1.0897873775410503, "grad_norm": 2.5669689508869347, "learning_rate": 2.5782882915837733e-06, "loss": 0.1490142822265625, "step": 126035 }, { "epoch": 1.0898306110625935, "grad_norm": 0.9702729882184149, "learning_rate": 2.578086582321463e-06, "loss": 0.04560279846191406, "step": 126040 }, { "epoch": 1.0898738445841367, "grad_norm": 2.298395546434495, "learning_rate": 2.577884875004979e-06, "loss": 0.16833038330078126, "step": 126045 }, { "epoch": 1.08991707810568, "grad_norm": 2.1940770687361923, "learning_rate": 2.577683169635254e-06, "loss": 0.21212234497070312, "step": 126050 }, { "epoch": 1.0899603116272232, "grad_norm": 21.318307455755804, "learning_rate": 2.5774814662132166e-06, "loss": 0.09651260375976563, "step": 126055 }, { "epoch": 1.0900035451487666, "grad_norm": 1.2650595784806546, "learning_rate": 2.5772797647397958e-06, "loss": 0.021315765380859376, "step": 126060 }, { "epoch": 1.0900467786703099, "grad_norm": 9.26124513649271, "learning_rate": 2.577078065215925e-06, "loss": 0.0350799560546875, "step": 126065 }, { "epoch": 1.090090012191853, "grad_norm": 18.71191508620381, "learning_rate": 2.5768763676425323e-06, "loss": 0.04223976135253906, "step": 126070 }, { "epoch": 1.0901332457133963, "grad_norm": 9.372130391039445, "learning_rate": 2.5766746720205484e-06, "loss": 0.04644966125488281, "step": 126075 }, { "epoch": 1.0901764792349395, "grad_norm": 3.740575532865607, "learning_rate": 2.576472978350904e-06, "loss": 0.01908416748046875, "step": 126080 }, { "epoch": 1.0902197127564828, "grad_norm": 25.029048977902875, "learning_rate": 2.576271286634529e-06, "loss": 0.09224700927734375, "step": 126085 }, { "epoch": 1.0902629462780262, "grad_norm": 3.0444012776017275, "learning_rate": 2.5760695968723516e-06, "loss": 0.162139892578125, "step": 126090 }, { "epoch": 1.0903061797995695, "grad_norm": 2.8287896038860127, "learning_rate": 2.575867909065305e-06, "loss": 0.1351898193359375, "step": 126095 }, { "epoch": 1.0903494133211127, "grad_norm": 3.029447504964181, "learning_rate": 2.575666223214318e-06, "loss": 0.05283546447753906, "step": 126100 }, { "epoch": 1.090392646842656, "grad_norm": 6.174517418629725, "learning_rate": 2.5754645393203216e-06, "loss": 0.02294158935546875, "step": 126105 }, { "epoch": 1.0904358803641991, "grad_norm": 0.6209461177039588, "learning_rate": 2.5752628573842446e-06, "loss": 0.06289176940917969, "step": 126110 }, { "epoch": 1.0904791138857424, "grad_norm": 4.619609832009123, "learning_rate": 2.575061177407018e-06, "loss": 0.03323974609375, "step": 126115 }, { "epoch": 1.0905223474072856, "grad_norm": 0.9574008536244486, "learning_rate": 2.574859499389571e-06, "loss": 0.030841827392578125, "step": 126120 }, { "epoch": 1.090565580928829, "grad_norm": 1.9648782253015342, "learning_rate": 2.574657823332835e-06, "loss": 0.028015899658203124, "step": 126125 }, { "epoch": 1.0906088144503723, "grad_norm": 15.772664092555623, "learning_rate": 2.574456149237739e-06, "loss": 0.07410316467285157, "step": 126130 }, { "epoch": 1.0906520479719155, "grad_norm": 10.719661007907403, "learning_rate": 2.5742544771052146e-06, "loss": 0.035289764404296875, "step": 126135 }, { "epoch": 1.0906952814934587, "grad_norm": 34.94273987798253, "learning_rate": 2.5740528069361905e-06, "loss": 0.128009033203125, "step": 126140 }, { "epoch": 1.090738515015002, "grad_norm": 0.08198088451925004, "learning_rate": 2.5738511387315972e-06, "loss": 0.031907272338867185, "step": 126145 }, { "epoch": 1.0907817485365452, "grad_norm": 0.05680497523755723, "learning_rate": 2.5736494724923643e-06, "loss": 0.20530948638916016, "step": 126150 }, { "epoch": 1.0908249820580886, "grad_norm": 5.201738555004474, "learning_rate": 2.5734478082194226e-06, "loss": 0.04212799072265625, "step": 126155 }, { "epoch": 1.0908682155796319, "grad_norm": 25.31939937445175, "learning_rate": 2.573246145913702e-06, "loss": 0.0780726432800293, "step": 126160 }, { "epoch": 1.090911449101175, "grad_norm": 0.19237967738702816, "learning_rate": 2.5730444855761324e-06, "loss": 0.08054046630859375, "step": 126165 }, { "epoch": 1.0909546826227183, "grad_norm": 1.6649182469427373, "learning_rate": 2.572842827207645e-06, "loss": 0.02956390380859375, "step": 126170 }, { "epoch": 1.0909979161442616, "grad_norm": 4.555388004886682, "learning_rate": 2.572641170809168e-06, "loss": 0.0203033447265625, "step": 126175 }, { "epoch": 1.0910411496658048, "grad_norm": 33.815774252871414, "learning_rate": 2.5724395163816315e-06, "loss": 0.16572914123535157, "step": 126180 }, { "epoch": 1.0910843831873482, "grad_norm": 40.872567880909635, "learning_rate": 2.572237863925966e-06, "loss": 0.17661819458007813, "step": 126185 }, { "epoch": 1.0911276167088915, "grad_norm": 46.59103149361331, "learning_rate": 2.572036213443102e-06, "loss": 0.10164566040039062, "step": 126190 }, { "epoch": 1.0911708502304347, "grad_norm": 1.7850104461101732, "learning_rate": 2.57183456493397e-06, "loss": 0.0290679931640625, "step": 126195 }, { "epoch": 1.091214083751978, "grad_norm": 0.49425638375204034, "learning_rate": 2.5716329183994987e-06, "loss": 0.027681922912597655, "step": 126200 }, { "epoch": 1.0912573172735212, "grad_norm": 1.2805699382011453, "learning_rate": 2.571431273840619e-06, "loss": 0.03307938575744629, "step": 126205 }, { "epoch": 1.0913005507950644, "grad_norm": 2.996328820048216, "learning_rate": 2.5712296312582596e-06, "loss": 0.058011436462402345, "step": 126210 }, { "epoch": 1.0913437843166078, "grad_norm": 18.19829141416977, "learning_rate": 2.571027990653351e-06, "loss": 0.07431983947753906, "step": 126215 }, { "epoch": 1.091387017838151, "grad_norm": 0.1273607090397378, "learning_rate": 2.5708263520268237e-06, "loss": 0.08337211608886719, "step": 126220 }, { "epoch": 1.0914302513596943, "grad_norm": 20.40739193431919, "learning_rate": 2.5706247153796075e-06, "loss": 0.1288595199584961, "step": 126225 }, { "epoch": 1.0914734848812375, "grad_norm": 1.4002776903838245, "learning_rate": 2.570423080712633e-06, "loss": 0.0376373291015625, "step": 126230 }, { "epoch": 1.0915167184027808, "grad_norm": 0.62039421611293, "learning_rate": 2.5702214480268288e-06, "loss": 0.04801177978515625, "step": 126235 }, { "epoch": 1.091559951924324, "grad_norm": 0.26519226583523, "learning_rate": 2.5700198173231246e-06, "loss": 0.017247772216796874, "step": 126240 }, { "epoch": 1.0916031854458672, "grad_norm": 0.988670860286894, "learning_rate": 2.5698181886024516e-06, "loss": 0.10937728881835937, "step": 126245 }, { "epoch": 1.0916464189674107, "grad_norm": 1.012353891746183, "learning_rate": 2.569616561865738e-06, "loss": 0.27127227783203123, "step": 126250 }, { "epoch": 1.0916896524889539, "grad_norm": 3.0159873219751074, "learning_rate": 2.569414937113916e-06, "loss": 0.03813934326171875, "step": 126255 }, { "epoch": 1.0917328860104971, "grad_norm": 1.5946186165033993, "learning_rate": 2.5692133143479142e-06, "loss": 0.056317138671875, "step": 126260 }, { "epoch": 1.0917761195320403, "grad_norm": 2.2630814753786206, "learning_rate": 2.5690116935686626e-06, "loss": 0.0163299560546875, "step": 126265 }, { "epoch": 1.0918193530535836, "grad_norm": 1.7399809180231778, "learning_rate": 2.5688100747770907e-06, "loss": 0.05600738525390625, "step": 126270 }, { "epoch": 1.0918625865751268, "grad_norm": 14.26228159245031, "learning_rate": 2.5686084579741285e-06, "loss": 0.07850799560546876, "step": 126275 }, { "epoch": 1.0919058200966703, "grad_norm": 2.897724411377956, "learning_rate": 2.5684068431607052e-06, "loss": 0.0734527587890625, "step": 126280 }, { "epoch": 1.0919490536182135, "grad_norm": 23.99750568502485, "learning_rate": 2.5682052303377526e-06, "loss": 0.05619125366210938, "step": 126285 }, { "epoch": 1.0919922871397567, "grad_norm": 1.2078125059344662, "learning_rate": 2.5680036195061995e-06, "loss": 0.04102325439453125, "step": 126290 }, { "epoch": 1.0920355206613, "grad_norm": 1.9899664002555473, "learning_rate": 2.5678020106669746e-06, "loss": 0.03185005187988281, "step": 126295 }, { "epoch": 1.0920787541828432, "grad_norm": 2.840523184968063, "learning_rate": 2.5676004038210096e-06, "loss": 0.019893646240234375, "step": 126300 }, { "epoch": 1.0921219877043864, "grad_norm": 7.316384431182565, "learning_rate": 2.567398798969233e-06, "loss": 0.15887680053710937, "step": 126305 }, { "epoch": 1.0921652212259296, "grad_norm": 0.24127297635856132, "learning_rate": 2.567197196112575e-06, "loss": 0.009359931945800782, "step": 126310 }, { "epoch": 1.092208454747473, "grad_norm": 8.784169694905444, "learning_rate": 2.5669955952519643e-06, "loss": 0.0589569091796875, "step": 126315 }, { "epoch": 1.0922516882690163, "grad_norm": 0.3454507591853524, "learning_rate": 2.566793996388333e-06, "loss": 0.03307838439941406, "step": 126320 }, { "epoch": 1.0922949217905595, "grad_norm": 1.9282878824925453, "learning_rate": 2.566592399522608e-06, "loss": 0.045682525634765624, "step": 126325 }, { "epoch": 1.0923381553121028, "grad_norm": 1.0000822091805748, "learning_rate": 2.566390804655722e-06, "loss": 0.0220855712890625, "step": 126330 }, { "epoch": 1.092381388833646, "grad_norm": 25.19761407080171, "learning_rate": 2.566189211788603e-06, "loss": 0.30867538452148435, "step": 126335 }, { "epoch": 1.0924246223551892, "grad_norm": 0.30727032413539856, "learning_rate": 2.565987620922181e-06, "loss": 0.061376190185546874, "step": 126340 }, { "epoch": 1.0924678558767327, "grad_norm": 0.2324572521919113, "learning_rate": 2.5657860320573846e-06, "loss": 0.04014892578125, "step": 126345 }, { "epoch": 1.092511089398276, "grad_norm": 2.5693511385848335, "learning_rate": 2.5655844451951457e-06, "loss": 0.12418975830078124, "step": 126350 }, { "epoch": 1.0925543229198191, "grad_norm": 9.354218385414379, "learning_rate": 2.5653828603363925e-06, "loss": 0.03421630859375, "step": 126355 }, { "epoch": 1.0925975564413624, "grad_norm": 3.6041712072474335, "learning_rate": 2.565181277482056e-06, "loss": 0.02120208740234375, "step": 126360 }, { "epoch": 1.0926407899629056, "grad_norm": 0.4572242371700367, "learning_rate": 2.564979696633065e-06, "loss": 0.05143804550170898, "step": 126365 }, { "epoch": 1.0926840234844488, "grad_norm": 13.672156915467742, "learning_rate": 2.5647781177903488e-06, "loss": 0.24105911254882811, "step": 126370 }, { "epoch": 1.0927272570059923, "grad_norm": 1.119920200479603, "learning_rate": 2.564576540954836e-06, "loss": 0.0209228515625, "step": 126375 }, { "epoch": 1.0927704905275355, "grad_norm": 10.74895438189953, "learning_rate": 2.56437496612746e-06, "loss": 0.04196014404296875, "step": 126380 }, { "epoch": 1.0928137240490787, "grad_norm": 19.448044879303286, "learning_rate": 2.5641733933091466e-06, "loss": 0.07385330200195313, "step": 126385 }, { "epoch": 1.092856957570622, "grad_norm": 1.6126930726678377, "learning_rate": 2.5639718225008277e-06, "loss": 0.13748130798339844, "step": 126390 }, { "epoch": 1.0929001910921652, "grad_norm": 4.074215168783285, "learning_rate": 2.563770253703433e-06, "loss": 0.044451141357421876, "step": 126395 }, { "epoch": 1.0929434246137084, "grad_norm": 11.885088662032194, "learning_rate": 2.5635686869178907e-06, "loss": 0.08440132141113281, "step": 126400 }, { "epoch": 1.0929866581352519, "grad_norm": 2.3473741543261197, "learning_rate": 2.56336712214513e-06, "loss": 0.11869735717773437, "step": 126405 }, { "epoch": 1.093029891656795, "grad_norm": 17.997632231527625, "learning_rate": 2.5631655593860823e-06, "loss": 0.18024444580078125, "step": 126410 }, { "epoch": 1.0930731251783383, "grad_norm": 1.1395235605141474, "learning_rate": 2.5629639986416765e-06, "loss": 0.05663986206054687, "step": 126415 }, { "epoch": 1.0931163586998816, "grad_norm": 0.7847284415612186, "learning_rate": 2.5627624399128424e-06, "loss": 0.08871841430664062, "step": 126420 }, { "epoch": 1.0931595922214248, "grad_norm": 0.052561135283007834, "learning_rate": 2.5625608832005096e-06, "loss": 0.13736696243286134, "step": 126425 }, { "epoch": 1.093202825742968, "grad_norm": 0.16367780573489427, "learning_rate": 2.5623593285056068e-06, "loss": 0.06711158752441407, "step": 126430 }, { "epoch": 1.0932460592645112, "grad_norm": 7.194212902059272, "learning_rate": 2.562157775829064e-06, "loss": 0.3201080322265625, "step": 126435 }, { "epoch": 1.0932892927860547, "grad_norm": 0.7264828274687393, "learning_rate": 2.5619562251718106e-06, "loss": 0.04585418701171875, "step": 126440 }, { "epoch": 1.093332526307598, "grad_norm": 21.545435161545125, "learning_rate": 2.561754676534776e-06, "loss": 0.07808189392089844, "step": 126445 }, { "epoch": 1.0933757598291411, "grad_norm": 0.4605062323123653, "learning_rate": 2.5615531299188913e-06, "loss": 0.02704925537109375, "step": 126450 }, { "epoch": 1.0934189933506844, "grad_norm": 0.12473429208707512, "learning_rate": 2.561351585325085e-06, "loss": 0.06950836181640625, "step": 126455 }, { "epoch": 1.0934622268722276, "grad_norm": 18.625925641943862, "learning_rate": 2.561150042754286e-06, "loss": 0.2655021667480469, "step": 126460 }, { "epoch": 1.0935054603937708, "grad_norm": 0.28136246569924717, "learning_rate": 2.560948502207424e-06, "loss": 0.024978828430175782, "step": 126465 }, { "epoch": 1.0935486939153143, "grad_norm": 33.315325345958826, "learning_rate": 2.5607469636854272e-06, "loss": 0.16916656494140625, "step": 126470 }, { "epoch": 1.0935919274368575, "grad_norm": 3.729800778038888, "learning_rate": 2.560545427189229e-06, "loss": 0.08625907897949218, "step": 126475 }, { "epoch": 1.0936351609584007, "grad_norm": 1.7049576685006544, "learning_rate": 2.5603438927197555e-06, "loss": 0.06757965087890624, "step": 126480 }, { "epoch": 1.093678394479944, "grad_norm": 8.667867525318389, "learning_rate": 2.560142360277938e-06, "loss": 0.0507171630859375, "step": 126485 }, { "epoch": 1.0937216280014872, "grad_norm": 60.98829834274525, "learning_rate": 2.5599408298647038e-06, "loss": 0.34991607666015623, "step": 126490 }, { "epoch": 1.0937648615230304, "grad_norm": 1.1473408309982986, "learning_rate": 2.5597393014809842e-06, "loss": 0.017258071899414064, "step": 126495 }, { "epoch": 1.0938080950445737, "grad_norm": 6.537091284415796, "learning_rate": 2.5595377751277086e-06, "loss": 0.0627227783203125, "step": 126500 }, { "epoch": 1.093851328566117, "grad_norm": 0.38122714870888824, "learning_rate": 2.559336250805804e-06, "loss": 0.032693099975585935, "step": 126505 }, { "epoch": 1.0938945620876603, "grad_norm": 2.0991299704885065, "learning_rate": 2.559134728516203e-06, "loss": 0.19425201416015625, "step": 126510 }, { "epoch": 1.0939377956092036, "grad_norm": 2.412054073759596, "learning_rate": 2.558933208259834e-06, "loss": 0.015478515625, "step": 126515 }, { "epoch": 1.0939810291307468, "grad_norm": 2.25281017258805, "learning_rate": 2.5587316900376253e-06, "loss": 0.047734832763671874, "step": 126520 }, { "epoch": 1.09402426265229, "grad_norm": 6.742652362681152, "learning_rate": 2.5585301738505077e-06, "loss": 0.05656890869140625, "step": 126525 }, { "epoch": 1.0940674961738333, "grad_norm": 18.29014274663834, "learning_rate": 2.55832865969941e-06, "loss": 0.34366912841796876, "step": 126530 }, { "epoch": 1.0941107296953767, "grad_norm": 3.361682514792818, "learning_rate": 2.55812714758526e-06, "loss": 0.08774948120117188, "step": 126535 }, { "epoch": 1.09415396321692, "grad_norm": 2.2263121471481555, "learning_rate": 2.55792563750899e-06, "loss": 0.08189697265625, "step": 126540 }, { "epoch": 1.0941971967384632, "grad_norm": 2.080306317524999, "learning_rate": 2.557724129471528e-06, "loss": 0.04792575836181641, "step": 126545 }, { "epoch": 1.0942404302600064, "grad_norm": 2.2662930817757556, "learning_rate": 2.5575226234738024e-06, "loss": 0.0551788330078125, "step": 126550 }, { "epoch": 1.0942836637815496, "grad_norm": 1.466694100514354, "learning_rate": 2.557321119516744e-06, "loss": 0.02351226806640625, "step": 126555 }, { "epoch": 1.0943268973030928, "grad_norm": 8.638118783539623, "learning_rate": 2.5571196176012815e-06, "loss": 0.053851318359375, "step": 126560 }, { "epoch": 1.094370130824636, "grad_norm": 14.806263228087952, "learning_rate": 2.556918117728343e-06, "loss": 0.12444381713867188, "step": 126565 }, { "epoch": 1.0944133643461795, "grad_norm": 6.141162915218446, "learning_rate": 2.55671661989886e-06, "loss": 0.18307266235351563, "step": 126570 }, { "epoch": 1.0944565978677228, "grad_norm": 10.532554565517904, "learning_rate": 2.556515124113761e-06, "loss": 0.04589080810546875, "step": 126575 }, { "epoch": 1.094499831389266, "grad_norm": 40.292439326662304, "learning_rate": 2.5563136303739745e-06, "loss": 0.35198211669921875, "step": 126580 }, { "epoch": 1.0945430649108092, "grad_norm": 0.7056036637151603, "learning_rate": 2.556112138680431e-06, "loss": 0.0799560546875, "step": 126585 }, { "epoch": 1.0945862984323524, "grad_norm": 0.08977093345090696, "learning_rate": 2.5559106490340587e-06, "loss": 0.04991970062255859, "step": 126590 }, { "epoch": 1.0946295319538957, "grad_norm": 34.906385774820734, "learning_rate": 2.555709161435788e-06, "loss": 0.3605934143066406, "step": 126595 }, { "epoch": 1.0946727654754391, "grad_norm": 0.6070032986170061, "learning_rate": 2.5555076758865453e-06, "loss": 0.018102455139160156, "step": 126600 }, { "epoch": 1.0947159989969824, "grad_norm": 5.856917782092961, "learning_rate": 2.555306192387264e-06, "loss": 0.2399932861328125, "step": 126605 }, { "epoch": 1.0947592325185256, "grad_norm": 1.836387500519282, "learning_rate": 2.5551047109388698e-06, "loss": 0.021047592163085938, "step": 126610 }, { "epoch": 1.0948024660400688, "grad_norm": 19.721078111263143, "learning_rate": 2.554903231542294e-06, "loss": 0.0570037841796875, "step": 126615 }, { "epoch": 1.094845699561612, "grad_norm": 8.986431842088738, "learning_rate": 2.554701754198466e-06, "loss": 0.3056549072265625, "step": 126620 }, { "epoch": 1.0948889330831553, "grad_norm": 1.1425641354302292, "learning_rate": 2.5545002789083135e-06, "loss": 0.016759490966796874, "step": 126625 }, { "epoch": 1.0949321666046987, "grad_norm": 3.939713642931747, "learning_rate": 2.5542988056727653e-06, "loss": 0.023193359375, "step": 126630 }, { "epoch": 1.094975400126242, "grad_norm": 5.443499104180097, "learning_rate": 2.554097334492753e-06, "loss": 0.03409576416015625, "step": 126635 }, { "epoch": 1.0950186336477852, "grad_norm": 11.286134548550054, "learning_rate": 2.5538958653692034e-06, "loss": 0.15680389404296874, "step": 126640 }, { "epoch": 1.0950618671693284, "grad_norm": 0.3900438149643544, "learning_rate": 2.5536943983030476e-06, "loss": 0.1418182373046875, "step": 126645 }, { "epoch": 1.0951051006908716, "grad_norm": 6.068884382599252, "learning_rate": 2.5534929332952137e-06, "loss": 0.018990325927734374, "step": 126650 }, { "epoch": 1.0951483342124149, "grad_norm": 31.36992346721951, "learning_rate": 2.553291470346631e-06, "loss": 0.16757984161376954, "step": 126655 }, { "epoch": 1.0951915677339583, "grad_norm": 8.9469866130204, "learning_rate": 2.5530900094582277e-06, "loss": 0.14795989990234376, "step": 126660 }, { "epoch": 1.0952348012555015, "grad_norm": 0.14406123130892629, "learning_rate": 2.552888550630933e-06, "loss": 0.07484474182128906, "step": 126665 }, { "epoch": 1.0952780347770448, "grad_norm": 12.331078126807181, "learning_rate": 2.552687093865679e-06, "loss": 0.09056396484375, "step": 126670 }, { "epoch": 1.095321268298588, "grad_norm": 3.4740145022727966, "learning_rate": 2.5524856391633916e-06, "loss": 0.016197967529296874, "step": 126675 }, { "epoch": 1.0953645018201312, "grad_norm": 0.24356557215727873, "learning_rate": 2.5522841865250013e-06, "loss": 0.07015914916992187, "step": 126680 }, { "epoch": 1.0954077353416745, "grad_norm": 64.76648339658006, "learning_rate": 2.5520827359514368e-06, "loss": 0.2548679351806641, "step": 126685 }, { "epoch": 1.0954509688632177, "grad_norm": 15.922622726720196, "learning_rate": 2.5518812874436264e-06, "loss": 0.15788841247558594, "step": 126690 }, { "epoch": 1.0954942023847611, "grad_norm": 8.401001062011964, "learning_rate": 2.5516798410024997e-06, "loss": 0.2500953674316406, "step": 126695 }, { "epoch": 1.0955374359063044, "grad_norm": 0.8627523685087327, "learning_rate": 2.5514783966289864e-06, "loss": 0.18740310668945312, "step": 126700 }, { "epoch": 1.0955806694278476, "grad_norm": 6.088925413440865, "learning_rate": 2.5512769543240156e-06, "loss": 0.04059524536132812, "step": 126705 }, { "epoch": 1.0956239029493908, "grad_norm": 0.5523465708730337, "learning_rate": 2.5510755140885157e-06, "loss": 0.13385238647460937, "step": 126710 }, { "epoch": 1.095667136470934, "grad_norm": 0.09893585434844326, "learning_rate": 2.5508740759234155e-06, "loss": 0.04182844161987305, "step": 126715 }, { "epoch": 1.0957103699924773, "grad_norm": 0.5268735465671579, "learning_rate": 2.550672639829645e-06, "loss": 0.08160934448242188, "step": 126720 }, { "epoch": 1.0957536035140207, "grad_norm": 1.3677328658365941, "learning_rate": 2.550471205808131e-06, "loss": 0.04275054931640625, "step": 126725 }, { "epoch": 1.095796837035564, "grad_norm": 0.030091003279083563, "learning_rate": 2.5502697738598057e-06, "loss": 0.04223194122314453, "step": 126730 }, { "epoch": 1.0958400705571072, "grad_norm": 1.2241821174475638, "learning_rate": 2.5500683439855957e-06, "loss": 0.05161895751953125, "step": 126735 }, { "epoch": 1.0958833040786504, "grad_norm": 1.3217811082071103, "learning_rate": 2.5498669161864317e-06, "loss": 0.013220977783203126, "step": 126740 }, { "epoch": 1.0959265376001937, "grad_norm": 6.679770273568701, "learning_rate": 2.5496654904632405e-06, "loss": 0.08609390258789062, "step": 126745 }, { "epoch": 1.0959697711217369, "grad_norm": 4.718646381356783, "learning_rate": 2.549464066816953e-06, "loss": 0.08561248779296875, "step": 126750 }, { "epoch": 1.09601300464328, "grad_norm": 27.871104511267216, "learning_rate": 2.5492626452484977e-06, "loss": 0.12368192672729492, "step": 126755 }, { "epoch": 1.0960562381648236, "grad_norm": 81.19678201549128, "learning_rate": 2.5490612257588015e-06, "loss": 0.16068649291992188, "step": 126760 }, { "epoch": 1.0960994716863668, "grad_norm": 5.604459896047229, "learning_rate": 2.5488598083487965e-06, "loss": 0.19350090026855468, "step": 126765 }, { "epoch": 1.09614270520791, "grad_norm": 0.39292200528788523, "learning_rate": 2.5486583930194106e-06, "loss": 0.019193267822265624, "step": 126770 }, { "epoch": 1.0961859387294532, "grad_norm": 1.0951244687848172, "learning_rate": 2.5484569797715714e-06, "loss": 0.011670303344726563, "step": 126775 }, { "epoch": 1.0962291722509965, "grad_norm": 1.0370774326335612, "learning_rate": 2.548255568606209e-06, "loss": 0.13425559997558595, "step": 126780 }, { "epoch": 1.0962724057725397, "grad_norm": 0.8520466089012455, "learning_rate": 2.5480541595242525e-06, "loss": 0.131414794921875, "step": 126785 }, { "epoch": 1.0963156392940832, "grad_norm": 2.419360534146402, "learning_rate": 2.5478527525266287e-06, "loss": 0.14532470703125, "step": 126790 }, { "epoch": 1.0963588728156264, "grad_norm": 11.07381339905763, "learning_rate": 2.54765134761427e-06, "loss": 0.07726516723632812, "step": 126795 }, { "epoch": 1.0964021063371696, "grad_norm": 0.7304237313024694, "learning_rate": 2.5474499447881026e-06, "loss": 0.06923942565917969, "step": 126800 }, { "epoch": 1.0964453398587128, "grad_norm": 0.12190465464920058, "learning_rate": 2.5472485440490553e-06, "loss": 0.13769950866699218, "step": 126805 }, { "epoch": 1.096488573380256, "grad_norm": 1.34773379564205, "learning_rate": 2.5470471453980586e-06, "loss": 0.10070037841796875, "step": 126810 }, { "epoch": 1.0965318069017993, "grad_norm": 0.6040983194530404, "learning_rate": 2.546845748836041e-06, "loss": 0.104193115234375, "step": 126815 }, { "epoch": 1.0965750404233425, "grad_norm": 23.398838708134665, "learning_rate": 2.5466443543639286e-06, "loss": 0.07343597412109375, "step": 126820 }, { "epoch": 1.096618273944886, "grad_norm": 6.613351776408978, "learning_rate": 2.546442961982654e-06, "loss": 0.04634552001953125, "step": 126825 }, { "epoch": 1.0966615074664292, "grad_norm": 0.6408161223438532, "learning_rate": 2.546241571693145e-06, "loss": 0.06701812744140626, "step": 126830 }, { "epoch": 1.0967047409879724, "grad_norm": 0.2629889655452992, "learning_rate": 2.5460401834963283e-06, "loss": 0.026383209228515624, "step": 126835 }, { "epoch": 1.0967479745095157, "grad_norm": 17.604375244514873, "learning_rate": 2.545838797393135e-06, "loss": 0.03127098083496094, "step": 126840 }, { "epoch": 1.096791208031059, "grad_norm": 0.1534096997344158, "learning_rate": 2.545637413384493e-06, "loss": 0.056676483154296874, "step": 126845 }, { "epoch": 1.0968344415526021, "grad_norm": 3.247342742084378, "learning_rate": 2.5454360314713312e-06, "loss": 0.16759872436523438, "step": 126850 }, { "epoch": 1.0968776750741456, "grad_norm": 11.824774599092004, "learning_rate": 2.545234651654577e-06, "loss": 0.022796630859375, "step": 126855 }, { "epoch": 1.0969209085956888, "grad_norm": 0.28055120848329695, "learning_rate": 2.5450332739351617e-06, "loss": 0.03705520629882812, "step": 126860 }, { "epoch": 1.096964142117232, "grad_norm": 1.158086457427208, "learning_rate": 2.544831898314012e-06, "loss": 0.13382225036621093, "step": 126865 }, { "epoch": 1.0970073756387753, "grad_norm": 4.985351263675938, "learning_rate": 2.5446305247920583e-06, "loss": 0.2891273498535156, "step": 126870 }, { "epoch": 1.0970506091603185, "grad_norm": 2.980231015866108, "learning_rate": 2.5444291533702283e-06, "loss": 0.07153244018554687, "step": 126875 }, { "epoch": 1.0970938426818617, "grad_norm": 2.0296084655327964, "learning_rate": 2.5442277840494503e-06, "loss": 0.034322357177734374, "step": 126880 }, { "epoch": 1.0971370762034052, "grad_norm": 1.4184805643940963, "learning_rate": 2.544026416830653e-06, "loss": 0.10059852600097656, "step": 126885 }, { "epoch": 1.0971803097249484, "grad_norm": 77.35719126744911, "learning_rate": 2.5438250517147654e-06, "loss": 0.14730873107910156, "step": 126890 }, { "epoch": 1.0972235432464916, "grad_norm": 32.05659430719519, "learning_rate": 2.5436236887027172e-06, "loss": 0.11796302795410156, "step": 126895 }, { "epoch": 1.0972667767680349, "grad_norm": 0.9039589987844138, "learning_rate": 2.5434223277954362e-06, "loss": 0.08088531494140624, "step": 126900 }, { "epoch": 1.097310010289578, "grad_norm": 0.6430904270773754, "learning_rate": 2.5432209689938516e-06, "loss": 0.02356109619140625, "step": 126905 }, { "epoch": 1.0973532438111213, "grad_norm": 0.3939967111630261, "learning_rate": 2.543019612298891e-06, "loss": 0.07890625, "step": 126910 }, { "epoch": 1.0973964773326648, "grad_norm": 0.20301258704998168, "learning_rate": 2.542818257711483e-06, "loss": 0.054046630859375, "step": 126915 }, { "epoch": 1.097439710854208, "grad_norm": 17.756027265196, "learning_rate": 2.542616905232556e-06, "loss": 0.092718505859375, "step": 126920 }, { "epoch": 1.0974829443757512, "grad_norm": 5.438964758672557, "learning_rate": 2.5424155548630413e-06, "loss": 0.09976043701171874, "step": 126925 }, { "epoch": 1.0975261778972945, "grad_norm": 2.856081681623022, "learning_rate": 2.5422142066038655e-06, "loss": 0.049483108520507815, "step": 126930 }, { "epoch": 1.0975694114188377, "grad_norm": 4.225729492168167, "learning_rate": 2.542012860455957e-06, "loss": 0.01273965835571289, "step": 126935 }, { "epoch": 1.097612644940381, "grad_norm": 2.854157701502331, "learning_rate": 2.541811516420244e-06, "loss": 0.013062286376953124, "step": 126940 }, { "epoch": 1.0976558784619241, "grad_norm": 22.4667649249973, "learning_rate": 2.541610174497657e-06, "loss": 0.05207500457763672, "step": 126945 }, { "epoch": 1.0976991119834676, "grad_norm": 30.025587363168686, "learning_rate": 2.5414088346891214e-06, "loss": 0.261962890625, "step": 126950 }, { "epoch": 1.0977423455050108, "grad_norm": 2.7383406006881583, "learning_rate": 2.5412074969955695e-06, "loss": 0.02628173828125, "step": 126955 }, { "epoch": 1.097785579026554, "grad_norm": 0.8614924520548833, "learning_rate": 2.5410061614179283e-06, "loss": 0.06175155639648437, "step": 126960 }, { "epoch": 1.0978288125480973, "grad_norm": 42.700124062911954, "learning_rate": 2.5408048279571253e-06, "loss": 0.10916175842285156, "step": 126965 }, { "epoch": 1.0978720460696405, "grad_norm": 3.312810976111082, "learning_rate": 2.54060349661409e-06, "loss": 0.0786651611328125, "step": 126970 }, { "epoch": 1.0979152795911837, "grad_norm": 14.462753786559622, "learning_rate": 2.540402167389751e-06, "loss": 0.05170516967773438, "step": 126975 }, { "epoch": 1.0979585131127272, "grad_norm": 4.737962011572079, "learning_rate": 2.540200840285036e-06, "loss": 0.0447479248046875, "step": 126980 }, { "epoch": 1.0980017466342704, "grad_norm": 3.02619735519031, "learning_rate": 2.5399995153008744e-06, "loss": 0.0532135009765625, "step": 126985 }, { "epoch": 1.0980449801558136, "grad_norm": 0.5115660274378127, "learning_rate": 2.5397981924381947e-06, "loss": 0.33786163330078123, "step": 126990 }, { "epoch": 1.0980882136773569, "grad_norm": 13.535833249438436, "learning_rate": 2.539596871697925e-06, "loss": 0.0755218505859375, "step": 126995 }, { "epoch": 1.0981314471989, "grad_norm": 0.08987802941370056, "learning_rate": 2.5393955530809933e-06, "loss": 0.05259361267089844, "step": 127000 }, { "epoch": 1.0981746807204433, "grad_norm": 0.8570812373826054, "learning_rate": 2.5391942365883296e-06, "loss": 0.0220184326171875, "step": 127005 }, { "epoch": 1.0982179142419866, "grad_norm": 13.197091864950073, "learning_rate": 2.5389929222208606e-06, "loss": 0.0779052734375, "step": 127010 }, { "epoch": 1.09826114776353, "grad_norm": 11.944991994470996, "learning_rate": 2.5387916099795154e-06, "loss": 0.06171207427978516, "step": 127015 }, { "epoch": 1.0983043812850732, "grad_norm": 0.8905445927811746, "learning_rate": 2.5385902998652228e-06, "loss": 0.1351837158203125, "step": 127020 }, { "epoch": 1.0983476148066165, "grad_norm": 22.308511878187236, "learning_rate": 2.538388991878911e-06, "loss": 0.09975032806396485, "step": 127025 }, { "epoch": 1.0983908483281597, "grad_norm": 0.6177895029401392, "learning_rate": 2.538187686021508e-06, "loss": 0.0383880615234375, "step": 127030 }, { "epoch": 1.098434081849703, "grad_norm": 1.1127040893520568, "learning_rate": 2.5379863822939433e-06, "loss": 0.15639801025390626, "step": 127035 }, { "epoch": 1.0984773153712462, "grad_norm": 4.701185742083848, "learning_rate": 2.5377850806971447e-06, "loss": 0.025128555297851563, "step": 127040 }, { "epoch": 1.0985205488927896, "grad_norm": 4.476387784247552, "learning_rate": 2.537583781232039e-06, "loss": 0.22574310302734374, "step": 127045 }, { "epoch": 1.0985637824143328, "grad_norm": 4.037542037015628, "learning_rate": 2.537382483899557e-06, "loss": 0.017414951324462892, "step": 127050 }, { "epoch": 1.098607015935876, "grad_norm": 2.9882962654023295, "learning_rate": 2.5371811887006267e-06, "loss": 0.02310791015625, "step": 127055 }, { "epoch": 1.0986502494574193, "grad_norm": 9.309444024931043, "learning_rate": 2.536979895636175e-06, "loss": 0.07610244750976562, "step": 127060 }, { "epoch": 1.0986934829789625, "grad_norm": 4.462942803992017, "learning_rate": 2.536778604707132e-06, "loss": 0.107452392578125, "step": 127065 }, { "epoch": 1.0987367165005058, "grad_norm": 0.6185248879198165, "learning_rate": 2.5365773159144246e-06, "loss": 0.09075469970703125, "step": 127070 }, { "epoch": 1.098779950022049, "grad_norm": 1.8682413085249556, "learning_rate": 2.536376029258981e-06, "loss": 0.07170867919921875, "step": 127075 }, { "epoch": 1.0988231835435924, "grad_norm": 2.561447058909358, "learning_rate": 2.536174744741731e-06, "loss": 0.1230743408203125, "step": 127080 }, { "epoch": 1.0988664170651357, "grad_norm": 3.853070814471175, "learning_rate": 2.535973462363602e-06, "loss": 0.2787639617919922, "step": 127085 }, { "epoch": 1.0989096505866789, "grad_norm": 5.985680296481991, "learning_rate": 2.535772182125522e-06, "loss": 0.06918487548828126, "step": 127090 }, { "epoch": 1.0989528841082221, "grad_norm": 49.01577461089804, "learning_rate": 2.5355709040284203e-06, "loss": 0.061589813232421874, "step": 127095 }, { "epoch": 1.0989961176297653, "grad_norm": 98.44345940934716, "learning_rate": 2.5353696280732246e-06, "loss": 0.22613525390625, "step": 127100 }, { "epoch": 1.0990393511513088, "grad_norm": 3.212147779588484, "learning_rate": 2.535168354260863e-06, "loss": 0.09549560546875, "step": 127105 }, { "epoch": 1.099082584672852, "grad_norm": 0.2983340037655175, "learning_rate": 2.534967082592263e-06, "loss": 0.09268875122070312, "step": 127110 }, { "epoch": 1.0991258181943953, "grad_norm": 4.895856104584654, "learning_rate": 2.5347658130683537e-06, "loss": 0.035732269287109375, "step": 127115 }, { "epoch": 1.0991690517159385, "grad_norm": 10.336709720782755, "learning_rate": 2.534564545690064e-06, "loss": 0.049893951416015624, "step": 127120 }, { "epoch": 1.0992122852374817, "grad_norm": 37.4389176844435, "learning_rate": 2.534363280458322e-06, "loss": 0.14273300170898437, "step": 127125 }, { "epoch": 1.099255518759025, "grad_norm": 2.9789967775119024, "learning_rate": 2.5341620173740552e-06, "loss": 0.08452033996582031, "step": 127130 }, { "epoch": 1.0992987522805682, "grad_norm": 13.036414041870545, "learning_rate": 2.5339607564381915e-06, "loss": 0.137860107421875, "step": 127135 }, { "epoch": 1.0993419858021116, "grad_norm": 18.165519530500042, "learning_rate": 2.5337594976516594e-06, "loss": 0.07625961303710938, "step": 127140 }, { "epoch": 1.0993852193236548, "grad_norm": 9.93925627845199, "learning_rate": 2.5335582410153874e-06, "loss": 0.13929595947265624, "step": 127145 }, { "epoch": 1.099428452845198, "grad_norm": 39.41355351792275, "learning_rate": 2.5333569865303042e-06, "loss": 0.19319000244140624, "step": 127150 }, { "epoch": 1.0994716863667413, "grad_norm": 4.531021963916031, "learning_rate": 2.5331557341973374e-06, "loss": 0.220587158203125, "step": 127155 }, { "epoch": 1.0995149198882845, "grad_norm": 0.2928553758357049, "learning_rate": 2.532954484017415e-06, "loss": 0.07268447875976562, "step": 127160 }, { "epoch": 1.0995581534098278, "grad_norm": 5.303036932808788, "learning_rate": 2.5327532359914648e-06, "loss": 0.18446502685546876, "step": 127165 }, { "epoch": 1.0996013869313712, "grad_norm": 0.913622723409364, "learning_rate": 2.532551990120415e-06, "loss": 0.167254638671875, "step": 127170 }, { "epoch": 1.0996446204529144, "grad_norm": 0.5256718351740639, "learning_rate": 2.5323507464051947e-06, "loss": 0.058623123168945315, "step": 127175 }, { "epoch": 1.0996878539744577, "grad_norm": 0.39925821397817673, "learning_rate": 2.5321495048467317e-06, "loss": 0.018684768676757814, "step": 127180 }, { "epoch": 1.099731087496001, "grad_norm": 0.07872581988405497, "learning_rate": 2.531948265445954e-06, "loss": 0.07454833984375, "step": 127185 }, { "epoch": 1.0997743210175441, "grad_norm": 0.5500944180927356, "learning_rate": 2.531747028203789e-06, "loss": 0.11940288543701172, "step": 127190 }, { "epoch": 1.0998175545390874, "grad_norm": 7.935557928020133, "learning_rate": 2.5315457931211653e-06, "loss": 0.03408946990966797, "step": 127195 }, { "epoch": 1.0998607880606306, "grad_norm": 0.6193155443992058, "learning_rate": 2.5313445601990117e-06, "loss": 0.012562942504882813, "step": 127200 }, { "epoch": 1.099904021582174, "grad_norm": 2.2775303882840947, "learning_rate": 2.5311433294382537e-06, "loss": 0.023836517333984376, "step": 127205 }, { "epoch": 1.0999472551037173, "grad_norm": 7.183234606413204, "learning_rate": 2.530942100839823e-06, "loss": 0.035407638549804686, "step": 127210 }, { "epoch": 1.0999904886252605, "grad_norm": 0.2630982744053344, "learning_rate": 2.5307408744046463e-06, "loss": 0.0434417724609375, "step": 127215 }, { "epoch": 1.1000337221468037, "grad_norm": 3.6245242538082407, "learning_rate": 2.53053965013365e-06, "loss": 0.05380439758300781, "step": 127220 }, { "epoch": 1.100076955668347, "grad_norm": 1.3842665756562431, "learning_rate": 2.530338428027764e-06, "loss": 0.0402862548828125, "step": 127225 }, { "epoch": 1.1001201891898902, "grad_norm": 1.2935671068231178, "learning_rate": 2.5301372080879153e-06, "loss": 0.034176063537597653, "step": 127230 }, { "epoch": 1.1001634227114336, "grad_norm": 1.6628812887070643, "learning_rate": 2.5299359903150315e-06, "loss": 0.020551300048828124, "step": 127235 }, { "epoch": 1.1002066562329769, "grad_norm": 21.63307657911172, "learning_rate": 2.529734774710043e-06, "loss": 0.060368824005126956, "step": 127240 }, { "epoch": 1.10024988975452, "grad_norm": 8.423018958299144, "learning_rate": 2.529533561273875e-06, "loss": 0.1809417724609375, "step": 127245 }, { "epoch": 1.1002931232760633, "grad_norm": 4.884167060895026, "learning_rate": 2.5293323500074576e-06, "loss": 0.02772998809814453, "step": 127250 }, { "epoch": 1.1003363567976066, "grad_norm": 0.4643350699533376, "learning_rate": 2.5291311409117174e-06, "loss": 0.058917236328125, "step": 127255 }, { "epoch": 1.1003795903191498, "grad_norm": 19.213102833380276, "learning_rate": 2.5289299339875825e-06, "loss": 0.121246337890625, "step": 127260 }, { "epoch": 1.100422823840693, "grad_norm": 0.241626623476212, "learning_rate": 2.528728729235982e-06, "loss": 0.05409164428710937, "step": 127265 }, { "epoch": 1.1004660573622365, "grad_norm": 4.25680438625649, "learning_rate": 2.528527526657841e-06, "loss": 0.037359619140625, "step": 127270 }, { "epoch": 1.1005092908837797, "grad_norm": 6.056306058660274, "learning_rate": 2.5283263262540912e-06, "loss": 0.05934600830078125, "step": 127275 }, { "epoch": 1.100552524405323, "grad_norm": 0.4068688616932093, "learning_rate": 2.5281251280256584e-06, "loss": 0.45074806213378904, "step": 127280 }, { "epoch": 1.1005957579268661, "grad_norm": 9.609628182061572, "learning_rate": 2.52792393197347e-06, "loss": 0.19665908813476562, "step": 127285 }, { "epoch": 1.1006389914484094, "grad_norm": 3.5971070823515308, "learning_rate": 2.5277227380984554e-06, "loss": 0.05870552062988281, "step": 127290 }, { "epoch": 1.1006822249699526, "grad_norm": 12.28128144242855, "learning_rate": 2.5275215464015423e-06, "loss": 0.10170097351074218, "step": 127295 }, { "epoch": 1.100725458491496, "grad_norm": 22.40515336252549, "learning_rate": 2.527320356883656e-06, "loss": 0.0956817626953125, "step": 127300 }, { "epoch": 1.1007686920130393, "grad_norm": 0.17191106410303392, "learning_rate": 2.527119169545729e-06, "loss": 0.25529632568359373, "step": 127305 }, { "epoch": 1.1008119255345825, "grad_norm": 26.282892108454867, "learning_rate": 2.526917984388685e-06, "loss": 0.10313568115234376, "step": 127310 }, { "epoch": 1.1008551590561257, "grad_norm": 3.1739099172527614, "learning_rate": 2.526716801413454e-06, "loss": 0.03709259033203125, "step": 127315 }, { "epoch": 1.100898392577669, "grad_norm": 4.04190188057112, "learning_rate": 2.5265156206209634e-06, "loss": 0.21924896240234376, "step": 127320 }, { "epoch": 1.1009416260992122, "grad_norm": 53.26258858533458, "learning_rate": 2.5263144420121413e-06, "loss": 0.2661285400390625, "step": 127325 }, { "epoch": 1.1009848596207557, "grad_norm": 0.44085267043924986, "learning_rate": 2.5261132655879133e-06, "loss": 0.041169357299804685, "step": 127330 }, { "epoch": 1.1010280931422989, "grad_norm": 3.833721590870117, "learning_rate": 2.5259120913492107e-06, "loss": 0.03784255981445313, "step": 127335 }, { "epoch": 1.101071326663842, "grad_norm": 66.21203252508826, "learning_rate": 2.525710919296959e-06, "loss": 0.382708740234375, "step": 127340 }, { "epoch": 1.1011145601853853, "grad_norm": 3.081373703115743, "learning_rate": 2.5255097494320875e-06, "loss": 0.12411956787109375, "step": 127345 }, { "epoch": 1.1011577937069286, "grad_norm": 8.668845797748453, "learning_rate": 2.5253085817555228e-06, "loss": 0.049615478515625, "step": 127350 }, { "epoch": 1.1012010272284718, "grad_norm": 2.4390814521540363, "learning_rate": 2.525107416268193e-06, "loss": 0.0713714599609375, "step": 127355 }, { "epoch": 1.1012442607500152, "grad_norm": 3.8350118757155784, "learning_rate": 2.524906252971025e-06, "loss": 0.015135383605957032, "step": 127360 }, { "epoch": 1.1012874942715585, "grad_norm": 2.403802405196802, "learning_rate": 2.5247050918649474e-06, "loss": 0.17098922729492189, "step": 127365 }, { "epoch": 1.1013307277931017, "grad_norm": 23.83473217583461, "learning_rate": 2.524503932950888e-06, "loss": 0.06944198608398437, "step": 127370 }, { "epoch": 1.101373961314645, "grad_norm": 12.864842135199353, "learning_rate": 2.5243027762297755e-06, "loss": 0.09786834716796874, "step": 127375 }, { "epoch": 1.1014171948361882, "grad_norm": 2.826932923460401, "learning_rate": 2.5241016217025363e-06, "loss": 0.010895538330078124, "step": 127380 }, { "epoch": 1.1014604283577314, "grad_norm": 26.65656847955935, "learning_rate": 2.5239004693700983e-06, "loss": 0.09330215454101562, "step": 127385 }, { "epoch": 1.1015036618792746, "grad_norm": 19.314627323423295, "learning_rate": 2.5236993192333887e-06, "loss": 0.23737640380859376, "step": 127390 }, { "epoch": 1.101546895400818, "grad_norm": 4.704578318108213, "learning_rate": 2.523498171293336e-06, "loss": 0.13384742736816407, "step": 127395 }, { "epoch": 1.1015901289223613, "grad_norm": 3.527864753668211, "learning_rate": 2.5232970255508668e-06, "loss": 0.04236335754394531, "step": 127400 }, { "epoch": 1.1016333624439045, "grad_norm": 0.30972714934186557, "learning_rate": 2.5230958820069106e-06, "loss": 0.014772796630859375, "step": 127405 }, { "epoch": 1.1016765959654478, "grad_norm": 1.562740599381434, "learning_rate": 2.5228947406623946e-06, "loss": 0.08317050933837891, "step": 127410 }, { "epoch": 1.101719829486991, "grad_norm": 29.567166944038725, "learning_rate": 2.5226936015182457e-06, "loss": 0.21581954956054689, "step": 127415 }, { "epoch": 1.1017630630085342, "grad_norm": 5.356129331053014, "learning_rate": 2.522492464575391e-06, "loss": 0.04698944091796875, "step": 127420 }, { "epoch": 1.1018062965300777, "grad_norm": 10.654825048606344, "learning_rate": 2.5222913298347583e-06, "loss": 0.022833633422851562, "step": 127425 }, { "epoch": 1.101849530051621, "grad_norm": 1.8776871134299293, "learning_rate": 2.5220901972972767e-06, "loss": 0.031855010986328126, "step": 127430 }, { "epoch": 1.1018927635731641, "grad_norm": 5.824149280355347, "learning_rate": 2.5218890669638726e-06, "loss": 0.11652336120605469, "step": 127435 }, { "epoch": 1.1019359970947074, "grad_norm": 2.940747688387108, "learning_rate": 2.5216879388354743e-06, "loss": 0.265325927734375, "step": 127440 }, { "epoch": 1.1019792306162506, "grad_norm": 2.63011747988559, "learning_rate": 2.521486812913009e-06, "loss": 0.019387054443359374, "step": 127445 }, { "epoch": 1.1020224641377938, "grad_norm": 30.050806669248747, "learning_rate": 2.521285689197404e-06, "loss": 0.13074283599853515, "step": 127450 }, { "epoch": 1.102065697659337, "grad_norm": 2.4926874408136923, "learning_rate": 2.5210845676895872e-06, "loss": 0.017548465728759767, "step": 127455 }, { "epoch": 1.1021089311808805, "grad_norm": 5.943786396687291, "learning_rate": 2.5208834483904847e-06, "loss": 0.027843284606933593, "step": 127460 }, { "epoch": 1.1021521647024237, "grad_norm": 2.459725929291512, "learning_rate": 2.520682331301027e-06, "loss": 0.28995094299316404, "step": 127465 }, { "epoch": 1.102195398223967, "grad_norm": 1.4119361730246713, "learning_rate": 2.52048121642214e-06, "loss": 0.033159637451171876, "step": 127470 }, { "epoch": 1.1022386317455102, "grad_norm": 10.797667142790221, "learning_rate": 2.5202801037547514e-06, "loss": 0.09208106994628906, "step": 127475 }, { "epoch": 1.1022818652670534, "grad_norm": 3.9527397695612785, "learning_rate": 2.5200789932997876e-06, "loss": 0.04429931640625, "step": 127480 }, { "epoch": 1.1023250987885966, "grad_norm": 8.14761830778086, "learning_rate": 2.519877885058178e-06, "loss": 0.04005889892578125, "step": 127485 }, { "epoch": 1.10236833231014, "grad_norm": 0.1958982998327337, "learning_rate": 2.5196767790308476e-06, "loss": 0.030185699462890625, "step": 127490 }, { "epoch": 1.1024115658316833, "grad_norm": 0.4476114371801397, "learning_rate": 2.519475675218727e-06, "loss": 0.067633056640625, "step": 127495 }, { "epoch": 1.1024547993532265, "grad_norm": 0.6135619840629819, "learning_rate": 2.5192745736227417e-06, "loss": 0.14574308395385743, "step": 127500 }, { "epoch": 1.1024980328747698, "grad_norm": 4.1967195431397135, "learning_rate": 2.51907347424382e-06, "loss": 0.0654541015625, "step": 127505 }, { "epoch": 1.102541266396313, "grad_norm": 11.94275933337, "learning_rate": 2.518872377082888e-06, "loss": 0.10273818969726563, "step": 127510 }, { "epoch": 1.1025844999178562, "grad_norm": 0.5310876873146597, "learning_rate": 2.518671282140875e-06, "loss": 0.055828857421875, "step": 127515 }, { "epoch": 1.1026277334393995, "grad_norm": 3.5841344323551394, "learning_rate": 2.5184701894187065e-06, "loss": 0.013318634033203125, "step": 127520 }, { "epoch": 1.102670966960943, "grad_norm": 55.669025571281814, "learning_rate": 2.5182690989173113e-06, "loss": 0.39524497985839846, "step": 127525 }, { "epoch": 1.1027142004824861, "grad_norm": 0.5238803877416146, "learning_rate": 2.5180680106376175e-06, "loss": 0.22019500732421876, "step": 127530 }, { "epoch": 1.1027574340040294, "grad_norm": 2.8017571326042887, "learning_rate": 2.51786692458055e-06, "loss": 0.11536407470703125, "step": 127535 }, { "epoch": 1.1028006675255726, "grad_norm": 0.6253171400812044, "learning_rate": 2.517665840747039e-06, "loss": 0.15098342895507813, "step": 127540 }, { "epoch": 1.1028439010471158, "grad_norm": 25.971998975793618, "learning_rate": 2.51746475913801e-06, "loss": 0.0815399169921875, "step": 127545 }, { "epoch": 1.102887134568659, "grad_norm": 4.165664144764669, "learning_rate": 2.517263679754391e-06, "loss": 0.028017616271972655, "step": 127550 }, { "epoch": 1.1029303680902025, "grad_norm": 0.8670725295010133, "learning_rate": 2.517062602597108e-06, "loss": 0.02489166259765625, "step": 127555 }, { "epoch": 1.1029736016117457, "grad_norm": 10.054094765239945, "learning_rate": 2.5168615276670904e-06, "loss": 0.11307296752929688, "step": 127560 }, { "epoch": 1.103016835133289, "grad_norm": 5.422807687039429, "learning_rate": 2.516660454965265e-06, "loss": 0.1229888916015625, "step": 127565 }, { "epoch": 1.1030600686548322, "grad_norm": 9.786229172547248, "learning_rate": 2.516459384492559e-06, "loss": 0.08174362182617187, "step": 127570 }, { "epoch": 1.1031033021763754, "grad_norm": 1.9069288856731665, "learning_rate": 2.5162583162498996e-06, "loss": 0.014740371704101562, "step": 127575 }, { "epoch": 1.1031465356979187, "grad_norm": 0.10984054961225495, "learning_rate": 2.516057250238214e-06, "loss": 0.014012908935546875, "step": 127580 }, { "epoch": 1.103189769219462, "grad_norm": 0.5210090075473915, "learning_rate": 2.5158561864584286e-06, "loss": 0.013081645965576172, "step": 127585 }, { "epoch": 1.1032330027410053, "grad_norm": 2.201760669687016, "learning_rate": 2.515655124911473e-06, "loss": 0.06486320495605469, "step": 127590 }, { "epoch": 1.1032762362625486, "grad_norm": 3.439107803575216, "learning_rate": 2.5154540655982725e-06, "loss": 0.07898635864257812, "step": 127595 }, { "epoch": 1.1033194697840918, "grad_norm": 3.7266121155529075, "learning_rate": 2.5152530085197556e-06, "loss": 0.02809600830078125, "step": 127600 }, { "epoch": 1.103362703305635, "grad_norm": 7.300988519671526, "learning_rate": 2.5150519536768487e-06, "loss": 0.030767440795898438, "step": 127605 }, { "epoch": 1.1034059368271782, "grad_norm": 55.09200741589161, "learning_rate": 2.51485090107048e-06, "loss": 0.2603607177734375, "step": 127610 }, { "epoch": 1.1034491703487217, "grad_norm": 2.104566599816924, "learning_rate": 2.5146498507015748e-06, "loss": 0.08379669189453125, "step": 127615 }, { "epoch": 1.103492403870265, "grad_norm": 0.5916444267718093, "learning_rate": 2.514448802571062e-06, "loss": 0.02451353073120117, "step": 127620 }, { "epoch": 1.1035356373918082, "grad_norm": 32.38449770712314, "learning_rate": 2.5142477566798685e-06, "loss": 0.13473129272460938, "step": 127625 }, { "epoch": 1.1035788709133514, "grad_norm": 0.36629942690420236, "learning_rate": 2.5140467130289214e-06, "loss": 0.0356658935546875, "step": 127630 }, { "epoch": 1.1036221044348946, "grad_norm": 1.9803449549742305, "learning_rate": 2.5138456716191486e-06, "loss": 0.171173095703125, "step": 127635 }, { "epoch": 1.1036653379564378, "grad_norm": 0.11132847150636456, "learning_rate": 2.5136446324514763e-06, "loss": 0.05691070556640625, "step": 127640 }, { "epoch": 1.103708571477981, "grad_norm": 0.41659447169957947, "learning_rate": 2.5134435955268314e-06, "loss": 0.02701416015625, "step": 127645 }, { "epoch": 1.1037518049995245, "grad_norm": 3.623254737037456, "learning_rate": 2.5132425608461416e-06, "loss": 0.06753578186035156, "step": 127650 }, { "epoch": 1.1037950385210678, "grad_norm": 0.2653771366466892, "learning_rate": 2.513041528410334e-06, "loss": 0.018187713623046876, "step": 127655 }, { "epoch": 1.103838272042611, "grad_norm": 2.7378576299558968, "learning_rate": 2.5128404982203366e-06, "loss": 0.19309463500976562, "step": 127660 }, { "epoch": 1.1038815055641542, "grad_norm": 4.231540474251472, "learning_rate": 2.512639470277076e-06, "loss": 0.006848716735839843, "step": 127665 }, { "epoch": 1.1039247390856974, "grad_norm": 51.29352853191653, "learning_rate": 2.5124384445814788e-06, "loss": 0.20087966918945313, "step": 127670 }, { "epoch": 1.1039679726072407, "grad_norm": 1.0686900474452143, "learning_rate": 2.512237421134472e-06, "loss": 0.0436004638671875, "step": 127675 }, { "epoch": 1.1040112061287841, "grad_norm": 39.574754287221836, "learning_rate": 2.5120363999369826e-06, "loss": 0.159136962890625, "step": 127680 }, { "epoch": 1.1040544396503273, "grad_norm": 17.94795401951669, "learning_rate": 2.511835380989939e-06, "loss": 0.123529052734375, "step": 127685 }, { "epoch": 1.1040976731718706, "grad_norm": 1.2477317335709832, "learning_rate": 2.5116343642942672e-06, "loss": 0.05896816253662109, "step": 127690 }, { "epoch": 1.1041409066934138, "grad_norm": 1.3008453783960283, "learning_rate": 2.5114333498508954e-06, "loss": 0.014672088623046874, "step": 127695 }, { "epoch": 1.104184140214957, "grad_norm": 0.29095336042373154, "learning_rate": 2.5112323376607493e-06, "loss": 0.06581268310546876, "step": 127700 }, { "epoch": 1.1042273737365003, "grad_norm": 1.2715594134485577, "learning_rate": 2.511031327724756e-06, "loss": 0.04583930969238281, "step": 127705 }, { "epoch": 1.1042706072580435, "grad_norm": 106.78335553736885, "learning_rate": 2.5108303200438437e-06, "loss": 0.5045120239257812, "step": 127710 }, { "epoch": 1.104313840779587, "grad_norm": 0.10174593876454101, "learning_rate": 2.5106293146189375e-06, "loss": 0.04908943176269531, "step": 127715 }, { "epoch": 1.1043570743011302, "grad_norm": 1.1558239526383867, "learning_rate": 2.510428311450967e-06, "loss": 0.12300033569335937, "step": 127720 }, { "epoch": 1.1044003078226734, "grad_norm": 0.39089074347640396, "learning_rate": 2.510227310540858e-06, "loss": 0.04902706146240234, "step": 127725 }, { "epoch": 1.1044435413442166, "grad_norm": 0.4574547439908715, "learning_rate": 2.510026311889537e-06, "loss": 0.19370670318603517, "step": 127730 }, { "epoch": 1.1044867748657599, "grad_norm": 1.247871427905514, "learning_rate": 2.5098253154979306e-06, "loss": 0.0340118408203125, "step": 127735 }, { "epoch": 1.104530008387303, "grad_norm": 9.632280898118989, "learning_rate": 2.5096243213669674e-06, "loss": 0.0788818359375, "step": 127740 }, { "epoch": 1.1045732419088465, "grad_norm": 0.7269339365908695, "learning_rate": 2.5094233294975724e-06, "loss": 0.013411712646484376, "step": 127745 }, { "epoch": 1.1046164754303898, "grad_norm": 0.06931187208570032, "learning_rate": 2.509222339890675e-06, "loss": 0.012879562377929688, "step": 127750 }, { "epoch": 1.104659708951933, "grad_norm": 1.173896016382211, "learning_rate": 2.5090213525472e-06, "loss": 0.017243194580078124, "step": 127755 }, { "epoch": 1.1047029424734762, "grad_norm": 20.844891399309546, "learning_rate": 2.5088203674680754e-06, "loss": 0.12577438354492188, "step": 127760 }, { "epoch": 1.1047461759950195, "grad_norm": 0.38025692641221237, "learning_rate": 2.508619384654228e-06, "loss": 0.08081207275390626, "step": 127765 }, { "epoch": 1.1047894095165627, "grad_norm": 19.866241095600568, "learning_rate": 2.508418404106585e-06, "loss": 0.057042694091796874, "step": 127770 }, { "epoch": 1.104832643038106, "grad_norm": 4.548785603331432, "learning_rate": 2.508217425826071e-06, "loss": 0.019091796875, "step": 127775 }, { "epoch": 1.1048758765596494, "grad_norm": 5.2029954067924775, "learning_rate": 2.5080164498136165e-06, "loss": 0.056451416015625, "step": 127780 }, { "epoch": 1.1049191100811926, "grad_norm": 1.8586908193823741, "learning_rate": 2.507815476070147e-06, "loss": 0.008142662048339844, "step": 127785 }, { "epoch": 1.1049623436027358, "grad_norm": 19.869838164972908, "learning_rate": 2.5076145045965874e-06, "loss": 0.27341346740722655, "step": 127790 }, { "epoch": 1.105005577124279, "grad_norm": 0.26701813259210605, "learning_rate": 2.5074135353938673e-06, "loss": 0.07656936645507813, "step": 127795 }, { "epoch": 1.1050488106458223, "grad_norm": 11.957059319666602, "learning_rate": 2.507212568462913e-06, "loss": 0.0537628173828125, "step": 127800 }, { "epoch": 1.1050920441673655, "grad_norm": 0.4506671586937861, "learning_rate": 2.50701160380465e-06, "loss": 0.17034225463867186, "step": 127805 }, { "epoch": 1.105135277688909, "grad_norm": 4.738937122183093, "learning_rate": 2.506810641420005e-06, "loss": 0.05400238037109375, "step": 127810 }, { "epoch": 1.1051785112104522, "grad_norm": 5.297874308626703, "learning_rate": 2.506609681309907e-06, "loss": 0.0417388916015625, "step": 127815 }, { "epoch": 1.1052217447319954, "grad_norm": 9.746476632029681, "learning_rate": 2.506408723475281e-06, "loss": 0.023974609375, "step": 127820 }, { "epoch": 1.1052649782535386, "grad_norm": 13.412944860831297, "learning_rate": 2.5062077679170548e-06, "loss": 0.06099853515625, "step": 127825 }, { "epoch": 1.1053082117750819, "grad_norm": 30.573875745793256, "learning_rate": 2.506006814636155e-06, "loss": 0.0848052978515625, "step": 127830 }, { "epoch": 1.105351445296625, "grad_norm": 8.849108746111447, "learning_rate": 2.505805863633508e-06, "loss": 0.06045875549316406, "step": 127835 }, { "epoch": 1.1053946788181686, "grad_norm": 8.193278706490862, "learning_rate": 2.5056049149100395e-06, "loss": 0.07678260803222656, "step": 127840 }, { "epoch": 1.1054379123397118, "grad_norm": 7.295432321796844, "learning_rate": 2.5054039684666785e-06, "loss": 0.12874279022216797, "step": 127845 }, { "epoch": 1.105481145861255, "grad_norm": 4.745013924817945, "learning_rate": 2.50520302430435e-06, "loss": 0.048328399658203125, "step": 127850 }, { "epoch": 1.1055243793827982, "grad_norm": 0.12750734181486387, "learning_rate": 2.5050020824239822e-06, "loss": 0.03651371002197266, "step": 127855 }, { "epoch": 1.1055676129043415, "grad_norm": 0.5740353081164992, "learning_rate": 2.5048011428265016e-06, "loss": 0.21185150146484374, "step": 127860 }, { "epoch": 1.1056108464258847, "grad_norm": 3.1360637843106813, "learning_rate": 2.5046002055128335e-06, "loss": 0.0760162353515625, "step": 127865 }, { "epoch": 1.1056540799474281, "grad_norm": 0.4814401022936033, "learning_rate": 2.5043992704839046e-06, "loss": 0.021514129638671876, "step": 127870 }, { "epoch": 1.1056973134689714, "grad_norm": 2.984155360358486, "learning_rate": 2.5041983377406434e-06, "loss": 0.04073905944824219, "step": 127875 }, { "epoch": 1.1057405469905146, "grad_norm": 7.71063003443004, "learning_rate": 2.5039974072839755e-06, "loss": 0.1240203857421875, "step": 127880 }, { "epoch": 1.1057837805120578, "grad_norm": 7.043157516087351, "learning_rate": 2.5037964791148286e-06, "loss": 0.0416107177734375, "step": 127885 }, { "epoch": 1.105827014033601, "grad_norm": 39.94061067177454, "learning_rate": 2.503595553234128e-06, "loss": 0.07963409423828124, "step": 127890 }, { "epoch": 1.1058702475551443, "grad_norm": 4.775607763752272, "learning_rate": 2.5033946296428005e-06, "loss": 0.13946113586425782, "step": 127895 }, { "epoch": 1.1059134810766875, "grad_norm": 3.709272312695761, "learning_rate": 2.503193708341773e-06, "loss": 0.03159408569335938, "step": 127900 }, { "epoch": 1.105956714598231, "grad_norm": 0.08244208874549204, "learning_rate": 2.502992789331972e-06, "loss": 0.018611907958984375, "step": 127905 }, { "epoch": 1.1059999481197742, "grad_norm": 35.35650990020282, "learning_rate": 2.5027918726143244e-06, "loss": 0.1156585693359375, "step": 127910 }, { "epoch": 1.1060431816413174, "grad_norm": 12.633638689608082, "learning_rate": 2.502590958189758e-06, "loss": 0.03255882263183594, "step": 127915 }, { "epoch": 1.1060864151628607, "grad_norm": 6.823880892680755, "learning_rate": 2.5023900460591975e-06, "loss": 0.42181930541992185, "step": 127920 }, { "epoch": 1.1061296486844039, "grad_norm": 12.225429727933747, "learning_rate": 2.5021891362235696e-06, "loss": 0.05786895751953125, "step": 127925 }, { "epoch": 1.1061728822059471, "grad_norm": 3.7630285966460684, "learning_rate": 2.5019882286838017e-06, "loss": 0.015865516662597657, "step": 127930 }, { "epoch": 1.1062161157274906, "grad_norm": 11.913594967742187, "learning_rate": 2.5017873234408194e-06, "loss": 0.07052803039550781, "step": 127935 }, { "epoch": 1.1062593492490338, "grad_norm": 0.21514030409804977, "learning_rate": 2.501586420495551e-06, "loss": 0.14121856689453124, "step": 127940 }, { "epoch": 1.106302582770577, "grad_norm": 0.4616162030006324, "learning_rate": 2.5013855198489214e-06, "loss": 0.003179168701171875, "step": 127945 }, { "epoch": 1.1063458162921203, "grad_norm": 6.635984965834555, "learning_rate": 2.5011846215018584e-06, "loss": 0.1706817626953125, "step": 127950 }, { "epoch": 1.1063890498136635, "grad_norm": 9.375005983696923, "learning_rate": 2.500983725455288e-06, "loss": 0.034784698486328126, "step": 127955 }, { "epoch": 1.1064322833352067, "grad_norm": 8.231637615024088, "learning_rate": 2.5007828317101354e-06, "loss": 0.21204032897949218, "step": 127960 }, { "epoch": 1.10647551685675, "grad_norm": 1.376182018470664, "learning_rate": 2.500581940267329e-06, "loss": 0.14305191040039061, "step": 127965 }, { "epoch": 1.1065187503782934, "grad_norm": 2.984544764477735, "learning_rate": 2.5003810511277938e-06, "loss": 0.05528564453125, "step": 127970 }, { "epoch": 1.1065619838998366, "grad_norm": 5.89155128168492, "learning_rate": 2.500180164292458e-06, "loss": 0.2806816101074219, "step": 127975 }, { "epoch": 1.1066052174213798, "grad_norm": 0.6649520942220347, "learning_rate": 2.4999792797622468e-06, "loss": 0.18996543884277345, "step": 127980 }, { "epoch": 1.106648450942923, "grad_norm": 0.7956841027456066, "learning_rate": 2.4997783975380866e-06, "loss": 0.061235809326171876, "step": 127985 }, { "epoch": 1.1066916844644663, "grad_norm": 8.811974961315759, "learning_rate": 2.499577517620905e-06, "loss": 0.2232452392578125, "step": 127990 }, { "epoch": 1.1067349179860095, "grad_norm": 0.4356622815458921, "learning_rate": 2.4993766400116278e-06, "loss": 0.07556304931640626, "step": 127995 }, { "epoch": 1.106778151507553, "grad_norm": 2.801565821142107, "learning_rate": 2.4991757647111793e-06, "loss": 0.21451797485351562, "step": 128000 }, { "epoch": 1.1068213850290962, "grad_norm": 0.35269704633601406, "learning_rate": 2.49897489172049e-06, "loss": 0.090966796875, "step": 128005 }, { "epoch": 1.1068646185506394, "grad_norm": 2.08445050139646, "learning_rate": 2.4987740210404837e-06, "loss": 0.06862335205078125, "step": 128010 }, { "epoch": 1.1069078520721827, "grad_norm": 1.0173495224350961, "learning_rate": 2.4985731526720873e-06, "loss": 0.04008541107177734, "step": 128015 }, { "epoch": 1.106951085593726, "grad_norm": 8.099138102911207, "learning_rate": 2.498372286616227e-06, "loss": 0.19085102081298827, "step": 128020 }, { "epoch": 1.1069943191152691, "grad_norm": 0.18230089960472357, "learning_rate": 2.49817142287383e-06, "loss": 0.062202739715576175, "step": 128025 }, { "epoch": 1.1070375526368126, "grad_norm": 45.02324408883329, "learning_rate": 2.497970561445821e-06, "loss": 0.24711761474609376, "step": 128030 }, { "epoch": 1.1070807861583558, "grad_norm": 5.410042593639285, "learning_rate": 2.497769702333128e-06, "loss": 0.05330963134765625, "step": 128035 }, { "epoch": 1.107124019679899, "grad_norm": 0.8307919560063542, "learning_rate": 2.497568845536677e-06, "loss": 0.030096435546875, "step": 128040 }, { "epoch": 1.1071672532014423, "grad_norm": 0.8156261269447451, "learning_rate": 2.4973679910573937e-06, "loss": 0.02305145263671875, "step": 128045 }, { "epoch": 1.1072104867229855, "grad_norm": 0.20385977952959955, "learning_rate": 2.497167138896205e-06, "loss": 0.040570831298828124, "step": 128050 }, { "epoch": 1.1072537202445287, "grad_norm": 47.36295921819866, "learning_rate": 2.4969662890540373e-06, "loss": 0.12126007080078124, "step": 128055 }, { "epoch": 1.1072969537660722, "grad_norm": 0.9179090145068984, "learning_rate": 2.4967654415318167e-06, "loss": 0.1287445068359375, "step": 128060 }, { "epoch": 1.1073401872876154, "grad_norm": 15.945983744844593, "learning_rate": 2.496564596330468e-06, "loss": 0.1381500244140625, "step": 128065 }, { "epoch": 1.1073834208091586, "grad_norm": 18.851230205181576, "learning_rate": 2.49636375345092e-06, "loss": 0.132855224609375, "step": 128070 }, { "epoch": 1.1074266543307019, "grad_norm": 2.335743451306148, "learning_rate": 2.4961629128940975e-06, "loss": 0.023119735717773437, "step": 128075 }, { "epoch": 1.107469887852245, "grad_norm": 10.10085957198504, "learning_rate": 2.4959620746609277e-06, "loss": 0.06400890350341797, "step": 128080 }, { "epoch": 1.1075131213737883, "grad_norm": 0.26709598206875507, "learning_rate": 2.4957612387523365e-06, "loss": 0.04861793518066406, "step": 128085 }, { "epoch": 1.1075563548953316, "grad_norm": 11.845330386658596, "learning_rate": 2.4955604051692496e-06, "loss": 0.16818618774414062, "step": 128090 }, { "epoch": 1.107599588416875, "grad_norm": 3.8519798353125942, "learning_rate": 2.4953595739125925e-06, "loss": 0.14674072265625, "step": 128095 }, { "epoch": 1.1076428219384182, "grad_norm": 0.18326399138721278, "learning_rate": 2.4951587449832936e-06, "loss": 0.024198150634765624, "step": 128100 }, { "epoch": 1.1076860554599615, "grad_norm": 0.6896870272988314, "learning_rate": 2.4949579183822773e-06, "loss": 0.018868064880371092, "step": 128105 }, { "epoch": 1.1077292889815047, "grad_norm": 3.252762157118422, "learning_rate": 2.494757094110471e-06, "loss": 0.03197784423828125, "step": 128110 }, { "epoch": 1.107772522503048, "grad_norm": 1.279634832721045, "learning_rate": 2.4945562721688005e-06, "loss": 0.11417179107666016, "step": 128115 }, { "epoch": 1.1078157560245911, "grad_norm": 0.55849258769185, "learning_rate": 2.4943554525581917e-06, "loss": 0.00832672119140625, "step": 128120 }, { "epoch": 1.1078589895461346, "grad_norm": 12.29986573754067, "learning_rate": 2.4941546352795696e-06, "loss": 0.025952529907226563, "step": 128125 }, { "epoch": 1.1079022230676778, "grad_norm": 0.31571948416682855, "learning_rate": 2.493953820333863e-06, "loss": 0.011966514587402343, "step": 128130 }, { "epoch": 1.107945456589221, "grad_norm": 1.7020195797257685, "learning_rate": 2.493753007721996e-06, "loss": 0.02056427001953125, "step": 128135 }, { "epoch": 1.1079886901107643, "grad_norm": 10.970155587637601, "learning_rate": 2.493552197444896e-06, "loss": 0.02491912841796875, "step": 128140 }, { "epoch": 1.1080319236323075, "grad_norm": 37.45479108766299, "learning_rate": 2.4933513895034885e-06, "loss": 0.051218414306640626, "step": 128145 }, { "epoch": 1.1080751571538507, "grad_norm": 4.438215414558965, "learning_rate": 2.4931505838986995e-06, "loss": 0.033466148376464847, "step": 128150 }, { "epoch": 1.108118390675394, "grad_norm": 2.841982459086268, "learning_rate": 2.492949780631455e-06, "loss": 0.06522064208984375, "step": 128155 }, { "epoch": 1.1081616241969374, "grad_norm": 3.709823310449401, "learning_rate": 2.4927489797026805e-06, "loss": 0.264959716796875, "step": 128160 }, { "epoch": 1.1082048577184807, "grad_norm": 2.1321671316207285, "learning_rate": 2.492548181113304e-06, "loss": 0.1204193115234375, "step": 128165 }, { "epoch": 1.1082480912400239, "grad_norm": 15.41959183073856, "learning_rate": 2.4923473848642504e-06, "loss": 0.04283447265625, "step": 128170 }, { "epoch": 1.108291324761567, "grad_norm": 0.1522178062806543, "learning_rate": 2.492146590956446e-06, "loss": 0.022121238708496093, "step": 128175 }, { "epoch": 1.1083345582831103, "grad_norm": 2.174507286649399, "learning_rate": 2.4919457993908164e-06, "loss": 0.04073333740234375, "step": 128180 }, { "epoch": 1.1083777918046536, "grad_norm": 18.526301627686312, "learning_rate": 2.4917450101682875e-06, "loss": 0.11039133071899414, "step": 128185 }, { "epoch": 1.108421025326197, "grad_norm": 13.02502892588257, "learning_rate": 2.491544223289785e-06, "loss": 0.06599273681640624, "step": 128190 }, { "epoch": 1.1084642588477402, "grad_norm": 0.5171021061890388, "learning_rate": 2.491343438756237e-06, "loss": 0.034906768798828126, "step": 128195 }, { "epoch": 1.1085074923692835, "grad_norm": 26.181222371811796, "learning_rate": 2.491142656568568e-06, "loss": 0.2290283203125, "step": 128200 }, { "epoch": 1.1085507258908267, "grad_norm": 0.13911727669180388, "learning_rate": 2.490941876727704e-06, "loss": 0.017912673950195312, "step": 128205 }, { "epoch": 1.10859395941237, "grad_norm": 0.3912580497219467, "learning_rate": 2.4907410992345704e-06, "loss": 0.029862213134765624, "step": 128210 }, { "epoch": 1.1086371929339132, "grad_norm": 0.6041438574064841, "learning_rate": 2.4905403240900945e-06, "loss": 0.1489990234375, "step": 128215 }, { "epoch": 1.1086804264554564, "grad_norm": 0.34349134897639794, "learning_rate": 2.4903395512952004e-06, "loss": 0.09855499267578124, "step": 128220 }, { "epoch": 1.1087236599769998, "grad_norm": 1.1379871849744185, "learning_rate": 2.490138780850817e-06, "loss": 0.013252639770507812, "step": 128225 }, { "epoch": 1.108766893498543, "grad_norm": 1.056249620983224, "learning_rate": 2.4899380127578682e-06, "loss": 0.029641342163085938, "step": 128230 }, { "epoch": 1.1088101270200863, "grad_norm": 0.7180671638837529, "learning_rate": 2.4897372470172797e-06, "loss": 0.06623764038085937, "step": 128235 }, { "epoch": 1.1088533605416295, "grad_norm": 26.027738116510076, "learning_rate": 2.4895364836299777e-06, "loss": 0.1487457275390625, "step": 128240 }, { "epoch": 1.1088965940631728, "grad_norm": 0.30278170715374303, "learning_rate": 2.489335722596889e-06, "loss": 0.046355819702148436, "step": 128245 }, { "epoch": 1.108939827584716, "grad_norm": 7.53027989784622, "learning_rate": 2.4891349639189387e-06, "loss": 0.07585334777832031, "step": 128250 }, { "epoch": 1.1089830611062594, "grad_norm": 15.668282613896706, "learning_rate": 2.4889342075970516e-06, "loss": 0.09761314392089844, "step": 128255 }, { "epoch": 1.1090262946278027, "grad_norm": 9.315210240140242, "learning_rate": 2.488733453632156e-06, "loss": 0.041839218139648436, "step": 128260 }, { "epoch": 1.109069528149346, "grad_norm": 1.1846167441661, "learning_rate": 2.488532702025177e-06, "loss": 0.05684375762939453, "step": 128265 }, { "epoch": 1.1091127616708891, "grad_norm": 8.913019632932889, "learning_rate": 2.4883319527770386e-06, "loss": 0.16549224853515626, "step": 128270 }, { "epoch": 1.1091559951924324, "grad_norm": 16.47047697720694, "learning_rate": 2.4881312058886692e-06, "loss": 0.09337539672851562, "step": 128275 }, { "epoch": 1.1091992287139756, "grad_norm": 2.1615282607640975, "learning_rate": 2.487930461360993e-06, "loss": 0.09237213134765625, "step": 128280 }, { "epoch": 1.109242462235519, "grad_norm": 9.984032594688456, "learning_rate": 2.4877297191949356e-06, "loss": 0.09886245727539063, "step": 128285 }, { "epoch": 1.1092856957570623, "grad_norm": 0.7788406161473383, "learning_rate": 2.487528979391424e-06, "loss": 0.04998855590820313, "step": 128290 }, { "epoch": 1.1093289292786055, "grad_norm": 0.08263298292028046, "learning_rate": 2.487328241951384e-06, "loss": 0.019959259033203124, "step": 128295 }, { "epoch": 1.1093721628001487, "grad_norm": 1.0931286082379992, "learning_rate": 2.48712750687574e-06, "loss": 0.3080482482910156, "step": 128300 }, { "epoch": 1.109415396321692, "grad_norm": 19.27019494366795, "learning_rate": 2.4869267741654194e-06, "loss": 0.07622833251953125, "step": 128305 }, { "epoch": 1.1094586298432352, "grad_norm": 6.244209732645498, "learning_rate": 2.486726043821347e-06, "loss": 0.04474029541015625, "step": 128310 }, { "epoch": 1.1095018633647786, "grad_norm": 1.4609095879283887, "learning_rate": 2.486525315844449e-06, "loss": 0.03409423828125, "step": 128315 }, { "epoch": 1.1095450968863219, "grad_norm": 8.827039316033531, "learning_rate": 2.4863245902356495e-06, "loss": 0.08356380462646484, "step": 128320 }, { "epoch": 1.109588330407865, "grad_norm": 5.433522873170884, "learning_rate": 2.4861238669958767e-06, "loss": 0.152020263671875, "step": 128325 }, { "epoch": 1.1096315639294083, "grad_norm": 4.03380528197156, "learning_rate": 2.4859231461260547e-06, "loss": 0.04441642761230469, "step": 128330 }, { "epoch": 1.1096747974509515, "grad_norm": 0.34022743788079407, "learning_rate": 2.48572242762711e-06, "loss": 0.01196136474609375, "step": 128335 }, { "epoch": 1.1097180309724948, "grad_norm": 1.1283479384271293, "learning_rate": 2.4855217114999687e-06, "loss": 0.1275327682495117, "step": 128340 }, { "epoch": 1.109761264494038, "grad_norm": 8.904483673680215, "learning_rate": 2.4853209977455554e-06, "loss": 0.10310955047607422, "step": 128345 }, { "epoch": 1.1098044980155815, "grad_norm": 8.04234066810237, "learning_rate": 2.4851202863647953e-06, "loss": 0.087060546875, "step": 128350 }, { "epoch": 1.1098477315371247, "grad_norm": 0.30613007889317795, "learning_rate": 2.484919577358616e-06, "loss": 0.044788360595703125, "step": 128355 }, { "epoch": 1.109890965058668, "grad_norm": 0.8566429345262514, "learning_rate": 2.484718870727941e-06, "loss": 0.034540557861328126, "step": 128360 }, { "epoch": 1.1099341985802111, "grad_norm": 0.1484691522013297, "learning_rate": 2.4845181664736982e-06, "loss": 0.46805572509765625, "step": 128365 }, { "epoch": 1.1099774321017544, "grad_norm": 11.956198364726879, "learning_rate": 2.4843174645968117e-06, "loss": 0.053824996948242186, "step": 128370 }, { "epoch": 1.1100206656232976, "grad_norm": 2.123283122297173, "learning_rate": 2.4841167650982078e-06, "loss": 0.05436248779296875, "step": 128375 }, { "epoch": 1.110063899144841, "grad_norm": 2.8382529495803457, "learning_rate": 2.4839160679788105e-06, "loss": 0.03891754150390625, "step": 128380 }, { "epoch": 1.1101071326663843, "grad_norm": 2.3208658972460983, "learning_rate": 2.483715373239547e-06, "loss": 0.024231719970703124, "step": 128385 }, { "epoch": 1.1101503661879275, "grad_norm": 8.14705296385232, "learning_rate": 2.4835146808813436e-06, "loss": 0.0927154541015625, "step": 128390 }, { "epoch": 1.1101935997094707, "grad_norm": 0.5313361834436635, "learning_rate": 2.4833139909051247e-06, "loss": 0.03260955810546875, "step": 128395 }, { "epoch": 1.110236833231014, "grad_norm": 6.1296506832514135, "learning_rate": 2.4831133033118158e-06, "loss": 0.04607200622558594, "step": 128400 }, { "epoch": 1.1102800667525572, "grad_norm": 0.4823645197769045, "learning_rate": 2.4829126181023426e-06, "loss": 0.07298736572265625, "step": 128405 }, { "epoch": 1.1103233002741004, "grad_norm": 11.656777382746773, "learning_rate": 2.482711935277631e-06, "loss": 0.11403465270996094, "step": 128410 }, { "epoch": 1.1103665337956439, "grad_norm": 5.906433316493705, "learning_rate": 2.482511254838605e-06, "loss": 0.03730316162109375, "step": 128415 }, { "epoch": 1.110409767317187, "grad_norm": 7.665839813524601, "learning_rate": 2.482310576786193e-06, "loss": 0.06486129760742188, "step": 128420 }, { "epoch": 1.1104530008387303, "grad_norm": 5.805346550955966, "learning_rate": 2.482109901121318e-06, "loss": 0.03571662902832031, "step": 128425 }, { "epoch": 1.1104962343602736, "grad_norm": 3.1557951072806594, "learning_rate": 2.4819092278449065e-06, "loss": 0.052973175048828126, "step": 128430 }, { "epoch": 1.1105394678818168, "grad_norm": 0.4952458649210248, "learning_rate": 2.481708556957884e-06, "loss": 0.028166580200195312, "step": 128435 }, { "epoch": 1.11058270140336, "grad_norm": 1.7733403389931064, "learning_rate": 2.481507888461176e-06, "loss": 0.03940582275390625, "step": 128440 }, { "epoch": 1.1106259349249035, "grad_norm": 3.480627439713972, "learning_rate": 2.481307222355707e-06, "loss": 0.047483396530151364, "step": 128445 }, { "epoch": 1.1106691684464467, "grad_norm": 3.628319319553592, "learning_rate": 2.481106558642404e-06, "loss": 0.0436370849609375, "step": 128450 }, { "epoch": 1.11071240196799, "grad_norm": 10.225576512243242, "learning_rate": 2.4809058973221914e-06, "loss": 0.06779708862304687, "step": 128455 }, { "epoch": 1.1107556354895332, "grad_norm": 0.9976935167487453, "learning_rate": 2.4807052383959955e-06, "loss": 0.01554107666015625, "step": 128460 }, { "epoch": 1.1107988690110764, "grad_norm": 21.513997001901963, "learning_rate": 2.480504581864741e-06, "loss": 0.33359832763671876, "step": 128465 }, { "epoch": 1.1108421025326196, "grad_norm": 1.280999793223249, "learning_rate": 2.480303927729353e-06, "loss": 0.06932373046875, "step": 128470 }, { "epoch": 1.1108853360541628, "grad_norm": 5.269493875555309, "learning_rate": 2.4801032759907565e-06, "loss": 0.11346282958984374, "step": 128475 }, { "epoch": 1.1109285695757063, "grad_norm": 26.083833588460482, "learning_rate": 2.479902626649879e-06, "loss": 0.06863212585449219, "step": 128480 }, { "epoch": 1.1109718030972495, "grad_norm": 0.7943265806141693, "learning_rate": 2.479701979707645e-06, "loss": 0.0416351318359375, "step": 128485 }, { "epoch": 1.1110150366187928, "grad_norm": 37.62764323341035, "learning_rate": 2.479501335164979e-06, "loss": 0.1715087890625, "step": 128490 }, { "epoch": 1.111058270140336, "grad_norm": 2.5369072996839495, "learning_rate": 2.4793006930228068e-06, "loss": 0.021750259399414062, "step": 128495 }, { "epoch": 1.1111015036618792, "grad_norm": 3.515959276050768, "learning_rate": 2.4791000532820537e-06, "loss": 0.035166168212890626, "step": 128500 }, { "epoch": 1.1111447371834224, "grad_norm": 19.24703077254366, "learning_rate": 2.4788994159436457e-06, "loss": 0.1340728759765625, "step": 128505 }, { "epoch": 1.1111879707049659, "grad_norm": 0.2918043683723793, "learning_rate": 2.478698781008506e-06, "loss": 0.059566497802734375, "step": 128510 }, { "epoch": 1.1112312042265091, "grad_norm": 0.2777469842383504, "learning_rate": 2.478498148477563e-06, "loss": 0.062499618530273436, "step": 128515 }, { "epoch": 1.1112744377480523, "grad_norm": 1.1525382808425488, "learning_rate": 2.47829751835174e-06, "loss": 0.09638595581054688, "step": 128520 }, { "epoch": 1.1113176712695956, "grad_norm": 5.397667656422858, "learning_rate": 2.4780968906319624e-06, "loss": 0.29540481567382815, "step": 128525 }, { "epoch": 1.1113609047911388, "grad_norm": 26.579651057841005, "learning_rate": 2.477896265319157e-06, "loss": 0.047620201110839845, "step": 128530 }, { "epoch": 1.111404138312682, "grad_norm": 2.3685090117720877, "learning_rate": 2.477695642414247e-06, "loss": 0.010355377197265625, "step": 128535 }, { "epoch": 1.1114473718342255, "grad_norm": 79.74472359172312, "learning_rate": 2.4774950219181575e-06, "loss": 0.30062255859375, "step": 128540 }, { "epoch": 1.1114906053557687, "grad_norm": 88.72007270655982, "learning_rate": 2.477294403831816e-06, "loss": 0.19116287231445311, "step": 128545 }, { "epoch": 1.111533838877312, "grad_norm": 11.78832860002922, "learning_rate": 2.477093788156147e-06, "loss": 0.05665283203125, "step": 128550 }, { "epoch": 1.1115770723988552, "grad_norm": 5.153620098853675, "learning_rate": 2.4768931748920743e-06, "loss": 0.03179779052734375, "step": 128555 }, { "epoch": 1.1116203059203984, "grad_norm": 29.419433575473192, "learning_rate": 2.4766925640405247e-06, "loss": 0.09038658142089843, "step": 128560 }, { "epoch": 1.1116635394419416, "grad_norm": 10.611559833732626, "learning_rate": 2.4764919556024226e-06, "loss": 0.07852325439453126, "step": 128565 }, { "epoch": 1.111706772963485, "grad_norm": 11.029222554806841, "learning_rate": 2.4762913495786935e-06, "loss": 0.11558742523193359, "step": 128570 }, { "epoch": 1.1117500064850283, "grad_norm": 4.618354102756301, "learning_rate": 2.4760907459702615e-06, "loss": 0.03607254028320313, "step": 128575 }, { "epoch": 1.1117932400065715, "grad_norm": 6.3780029067211474, "learning_rate": 2.4758901447780532e-06, "loss": 0.0487945556640625, "step": 128580 }, { "epoch": 1.1118364735281148, "grad_norm": 17.351940345143895, "learning_rate": 2.4756895460029933e-06, "loss": 0.10655593872070312, "step": 128585 }, { "epoch": 1.111879707049658, "grad_norm": 9.004492212432334, "learning_rate": 2.475488949646007e-06, "loss": 0.09480705261230468, "step": 128590 }, { "epoch": 1.1119229405712012, "grad_norm": 0.37612681711217333, "learning_rate": 2.4752883557080196e-06, "loss": 0.014239883422851563, "step": 128595 }, { "epoch": 1.1119661740927445, "grad_norm": 25.08736920760967, "learning_rate": 2.475087764189956e-06, "loss": 0.20391769409179689, "step": 128600 }, { "epoch": 1.112009407614288, "grad_norm": 0.11039961671412989, "learning_rate": 2.47488717509274e-06, "loss": 0.10171737670898437, "step": 128605 }, { "epoch": 1.1120526411358311, "grad_norm": 1.3695130653548924, "learning_rate": 2.4746865884172982e-06, "loss": 0.0358489990234375, "step": 128610 }, { "epoch": 1.1120958746573744, "grad_norm": 0.46777935688340344, "learning_rate": 2.474486004164556e-06, "loss": 0.20883941650390625, "step": 128615 }, { "epoch": 1.1121391081789176, "grad_norm": 2.9415148698051174, "learning_rate": 2.4742854223354386e-06, "loss": 0.1318897247314453, "step": 128620 }, { "epoch": 1.1121823417004608, "grad_norm": 65.01441724447392, "learning_rate": 2.4740848429308698e-06, "loss": 0.4215518951416016, "step": 128625 }, { "epoch": 1.112225575222004, "grad_norm": 6.8564297406545585, "learning_rate": 2.473884265951775e-06, "loss": 0.0390625, "step": 128630 }, { "epoch": 1.1122688087435475, "grad_norm": 24.566685999531746, "learning_rate": 2.473683691399079e-06, "loss": 0.10760574340820313, "step": 128635 }, { "epoch": 1.1123120422650907, "grad_norm": 1.6690061305274502, "learning_rate": 2.473483119273708e-06, "loss": 0.037908172607421874, "step": 128640 }, { "epoch": 1.112355275786634, "grad_norm": 13.545329712656685, "learning_rate": 2.4732825495765866e-06, "loss": 0.09880523681640625, "step": 128645 }, { "epoch": 1.1123985093081772, "grad_norm": 0.9468397488081716, "learning_rate": 2.473081982308639e-06, "loss": 0.019762611389160155, "step": 128650 }, { "epoch": 1.1124417428297204, "grad_norm": 2.6853534854554546, "learning_rate": 2.4728814174707914e-06, "loss": 0.0269195556640625, "step": 128655 }, { "epoch": 1.1124849763512636, "grad_norm": 4.93339150221075, "learning_rate": 2.4726808550639673e-06, "loss": 0.10246238708496094, "step": 128660 }, { "epoch": 1.1125282098728069, "grad_norm": 5.131122891178282, "learning_rate": 2.4724802950890933e-06, "loss": 0.08380661010742188, "step": 128665 }, { "epoch": 1.1125714433943503, "grad_norm": 9.69318518655052, "learning_rate": 2.472279737547092e-06, "loss": 0.03834457397460937, "step": 128670 }, { "epoch": 1.1126146769158936, "grad_norm": 6.302064993427666, "learning_rate": 2.4720791824388915e-06, "loss": 0.0340667724609375, "step": 128675 }, { "epoch": 1.1126579104374368, "grad_norm": 0.9355113306223594, "learning_rate": 2.471878629765415e-06, "loss": 0.10605583190917969, "step": 128680 }, { "epoch": 1.11270114395898, "grad_norm": 0.12658934111666556, "learning_rate": 2.471678079527588e-06, "loss": 0.05934906005859375, "step": 128685 }, { "epoch": 1.1127443774805232, "grad_norm": 0.16221819098024176, "learning_rate": 2.4714775317263336e-06, "loss": 0.06027030944824219, "step": 128690 }, { "epoch": 1.1127876110020665, "grad_norm": 1.0419200024015933, "learning_rate": 2.4712769863625793e-06, "loss": 0.041191482543945314, "step": 128695 }, { "epoch": 1.11283084452361, "grad_norm": 6.778049874156387, "learning_rate": 2.4710764434372474e-06, "loss": 0.02994232177734375, "step": 128700 }, { "epoch": 1.1128740780451531, "grad_norm": 4.294197555105622, "learning_rate": 2.4708759029512654e-06, "loss": 0.018209075927734374, "step": 128705 }, { "epoch": 1.1129173115666964, "grad_norm": 0.2056530013939974, "learning_rate": 2.470675364905557e-06, "loss": 0.1766387939453125, "step": 128710 }, { "epoch": 1.1129605450882396, "grad_norm": 1.94911740075105, "learning_rate": 2.470474829301047e-06, "loss": 0.06995849609375, "step": 128715 }, { "epoch": 1.1130037786097828, "grad_norm": 0.20845075814399686, "learning_rate": 2.4702742961386595e-06, "loss": 0.03762664794921875, "step": 128720 }, { "epoch": 1.113047012131326, "grad_norm": 3.8653704157491426, "learning_rate": 2.4700737654193214e-06, "loss": 0.016852569580078126, "step": 128725 }, { "epoch": 1.1130902456528693, "grad_norm": 5.113215477087886, "learning_rate": 2.4698732371439546e-06, "loss": 0.027898597717285156, "step": 128730 }, { "epoch": 1.1131334791744127, "grad_norm": 0.07521851991998214, "learning_rate": 2.4696727113134865e-06, "loss": 0.022142410278320312, "step": 128735 }, { "epoch": 1.113176712695956, "grad_norm": 4.0458776821260205, "learning_rate": 2.4694721879288418e-06, "loss": 0.319244384765625, "step": 128740 }, { "epoch": 1.1132199462174992, "grad_norm": 5.802268677278056, "learning_rate": 2.469271666990944e-06, "loss": 0.02206573486328125, "step": 128745 }, { "epoch": 1.1132631797390424, "grad_norm": 9.800074236012902, "learning_rate": 2.4690711485007178e-06, "loss": 0.126639461517334, "step": 128750 }, { "epoch": 1.1133064132605857, "grad_norm": 1.18454960432322, "learning_rate": 2.4688706324590893e-06, "loss": 0.010637664794921875, "step": 128755 }, { "epoch": 1.1133496467821289, "grad_norm": 0.47289167994715275, "learning_rate": 2.4686701188669823e-06, "loss": 0.07029190063476562, "step": 128760 }, { "epoch": 1.1133928803036723, "grad_norm": 5.517949052857015, "learning_rate": 2.468469607725321e-06, "loss": 0.09764785766601562, "step": 128765 }, { "epoch": 1.1134361138252156, "grad_norm": 0.16880436800042084, "learning_rate": 2.4682690990350316e-06, "loss": 0.08714828491210938, "step": 128770 }, { "epoch": 1.1134793473467588, "grad_norm": 0.19910502390091758, "learning_rate": 2.4680685927970382e-06, "loss": 0.11855459213256836, "step": 128775 }, { "epoch": 1.113522580868302, "grad_norm": 17.07725078853877, "learning_rate": 2.467868089012265e-06, "loss": 0.0954132080078125, "step": 128780 }, { "epoch": 1.1135658143898453, "grad_norm": 6.407707165465006, "learning_rate": 2.4676675876816374e-06, "loss": 0.09162673950195313, "step": 128785 }, { "epoch": 1.1136090479113885, "grad_norm": 0.5024148016414017, "learning_rate": 2.4674670888060803e-06, "loss": 0.024100542068481445, "step": 128790 }, { "epoch": 1.113652281432932, "grad_norm": 19.268722627630204, "learning_rate": 2.4672665923865164e-06, "loss": 0.134942626953125, "step": 128795 }, { "epoch": 1.1136955149544752, "grad_norm": 2.902876059752807, "learning_rate": 2.4670660984238736e-06, "loss": 0.126873779296875, "step": 128800 }, { "epoch": 1.1137387484760184, "grad_norm": 7.00480563980368, "learning_rate": 2.4668656069190735e-06, "loss": 0.04051647186279297, "step": 128805 }, { "epoch": 1.1137819819975616, "grad_norm": 0.6168238356761221, "learning_rate": 2.4666651178730432e-06, "loss": 0.02293853759765625, "step": 128810 }, { "epoch": 1.1138252155191048, "grad_norm": 21.849950144359518, "learning_rate": 2.4664646312867064e-06, "loss": 0.12610321044921874, "step": 128815 }, { "epoch": 1.113868449040648, "grad_norm": 1.1320397476419055, "learning_rate": 2.4662641471609876e-06, "loss": 0.09395923614501953, "step": 128820 }, { "epoch": 1.1139116825621915, "grad_norm": 11.726657628660467, "learning_rate": 2.4660636654968096e-06, "loss": 0.058496856689453126, "step": 128825 }, { "epoch": 1.1139549160837348, "grad_norm": 17.05948534834884, "learning_rate": 2.4658631862951004e-06, "loss": 0.09270172119140625, "step": 128830 }, { "epoch": 1.113998149605278, "grad_norm": 4.592787626338808, "learning_rate": 2.4656627095567824e-06, "loss": 0.023855209350585938, "step": 128835 }, { "epoch": 1.1140413831268212, "grad_norm": 2.8672629204313176, "learning_rate": 2.4654622352827814e-06, "loss": 0.052405166625976565, "step": 128840 }, { "epoch": 1.1140846166483644, "grad_norm": 0.11622629005859907, "learning_rate": 2.465261763474021e-06, "loss": 0.09842300415039062, "step": 128845 }, { "epoch": 1.1141278501699077, "grad_norm": 1.6282329164397618, "learning_rate": 2.465061294131427e-06, "loss": 0.136492919921875, "step": 128850 }, { "epoch": 1.114171083691451, "grad_norm": 9.450757489964547, "learning_rate": 2.4648608272559215e-06, "loss": 0.11243476867675781, "step": 128855 }, { "epoch": 1.1142143172129944, "grad_norm": 12.904370025324507, "learning_rate": 2.464660362848431e-06, "loss": 0.0309539794921875, "step": 128860 }, { "epoch": 1.1142575507345376, "grad_norm": 0.0750652038359846, "learning_rate": 2.4644599009098795e-06, "loss": 0.0033756256103515624, "step": 128865 }, { "epoch": 1.1143007842560808, "grad_norm": 5.291413497580685, "learning_rate": 2.464259441441192e-06, "loss": 0.27081146240234377, "step": 128870 }, { "epoch": 1.114344017777624, "grad_norm": 0.31276442759029405, "learning_rate": 2.4640589844432927e-06, "loss": 0.0459136962890625, "step": 128875 }, { "epoch": 1.1143872512991673, "grad_norm": 0.695019964098629, "learning_rate": 2.4638585299171063e-06, "loss": 0.01963958740234375, "step": 128880 }, { "epoch": 1.1144304848207105, "grad_norm": 0.05106156313926565, "learning_rate": 2.463658077863556e-06, "loss": 0.01900625228881836, "step": 128885 }, { "epoch": 1.114473718342254, "grad_norm": 0.12660486630043546, "learning_rate": 2.463457628283568e-06, "loss": 0.023853492736816407, "step": 128890 }, { "epoch": 1.1145169518637972, "grad_norm": 5.817511648155389, "learning_rate": 2.463257181178065e-06, "loss": 0.12703857421875, "step": 128895 }, { "epoch": 1.1145601853853404, "grad_norm": 0.6359467064885671, "learning_rate": 2.4630567365479735e-06, "loss": 0.13144378662109374, "step": 128900 }, { "epoch": 1.1146034189068836, "grad_norm": 0.9484244639154906, "learning_rate": 2.462856294394217e-06, "loss": 0.06836700439453125, "step": 128905 }, { "epoch": 1.1146466524284269, "grad_norm": 6.901328891146763, "learning_rate": 2.4626558547177197e-06, "loss": 0.0341796875, "step": 128910 }, { "epoch": 1.11468988594997, "grad_norm": 109.03328183675386, "learning_rate": 2.4624554175194054e-06, "loss": 0.13748016357421874, "step": 128915 }, { "epoch": 1.1147331194715133, "grad_norm": 1.2862813269629378, "learning_rate": 2.4622549828002e-06, "loss": 0.2707611083984375, "step": 128920 }, { "epoch": 1.1147763529930568, "grad_norm": 1.935896224246226, "learning_rate": 2.462054550561026e-06, "loss": 0.05865345001220703, "step": 128925 }, { "epoch": 1.1148195865146, "grad_norm": 1.7545835583855354, "learning_rate": 2.46185412080281e-06, "loss": 0.1125579833984375, "step": 128930 }, { "epoch": 1.1148628200361432, "grad_norm": 0.17706436391630023, "learning_rate": 2.461653693526475e-06, "loss": 0.019864654541015624, "step": 128935 }, { "epoch": 1.1149060535576865, "grad_norm": 6.242268168725739, "learning_rate": 2.461453268732946e-06, "loss": 0.06516456604003906, "step": 128940 }, { "epoch": 1.1149492870792297, "grad_norm": 58.31104433517442, "learning_rate": 2.4612528464231458e-06, "loss": 0.1902141571044922, "step": 128945 }, { "epoch": 1.114992520600773, "grad_norm": 7.176947246293251, "learning_rate": 2.461052426598001e-06, "loss": 0.074053955078125, "step": 128950 }, { "epoch": 1.1150357541223164, "grad_norm": 0.839034886781232, "learning_rate": 2.4608520092584335e-06, "loss": 0.05518035888671875, "step": 128955 }, { "epoch": 1.1150789876438596, "grad_norm": 23.351960791851244, "learning_rate": 2.4606515944053696e-06, "loss": 0.19757041931152344, "step": 128960 }, { "epoch": 1.1151222211654028, "grad_norm": 1.4454271979010138, "learning_rate": 2.460451182039733e-06, "loss": 0.10550003051757813, "step": 128965 }, { "epoch": 1.115165454686946, "grad_norm": 5.028378418183841, "learning_rate": 2.4602507721624483e-06, "loss": 0.1897125244140625, "step": 128970 }, { "epoch": 1.1152086882084893, "grad_norm": 5.167092437434355, "learning_rate": 2.4600503647744387e-06, "loss": 0.09083709716796876, "step": 128975 }, { "epoch": 1.1152519217300325, "grad_norm": 0.27755966139602284, "learning_rate": 2.4598499598766298e-06, "loss": 0.08263778686523438, "step": 128980 }, { "epoch": 1.115295155251576, "grad_norm": 6.468421757447942, "learning_rate": 2.459649557469944e-06, "loss": 0.06519699096679688, "step": 128985 }, { "epoch": 1.1153383887731192, "grad_norm": 9.51826738429706, "learning_rate": 2.459449157555307e-06, "loss": 0.168994140625, "step": 128990 }, { "epoch": 1.1153816222946624, "grad_norm": 23.048559089109105, "learning_rate": 2.4592487601336435e-06, "loss": 0.20736541748046874, "step": 128995 }, { "epoch": 1.1154248558162057, "grad_norm": 16.222813532571394, "learning_rate": 2.459048365205877e-06, "loss": 0.11198768615722657, "step": 129000 }, { "epoch": 1.1154680893377489, "grad_norm": 16.35885809254722, "learning_rate": 2.4588479727729307e-06, "loss": 0.06026802062988281, "step": 129005 }, { "epoch": 1.115511322859292, "grad_norm": 8.566860098552745, "learning_rate": 2.4586475828357306e-06, "loss": 0.033329010009765625, "step": 129010 }, { "epoch": 1.1155545563808356, "grad_norm": 7.558647000792401, "learning_rate": 2.4584471953951996e-06, "loss": 0.08492813110351563, "step": 129015 }, { "epoch": 1.1155977899023788, "grad_norm": 3.0782187430751664, "learning_rate": 2.4582468104522616e-06, "loss": 0.0167694091796875, "step": 129020 }, { "epoch": 1.115641023423922, "grad_norm": 12.414176071547748, "learning_rate": 2.4580464280078425e-06, "loss": 0.6591766357421875, "step": 129025 }, { "epoch": 1.1156842569454652, "grad_norm": 4.113946255695669, "learning_rate": 2.4578460480628644e-06, "loss": 0.026751708984375, "step": 129030 }, { "epoch": 1.1157274904670085, "grad_norm": 9.194795422092787, "learning_rate": 2.4576456706182534e-06, "loss": 0.02647819519042969, "step": 129035 }, { "epoch": 1.1157707239885517, "grad_norm": 107.33451867045729, "learning_rate": 2.457445295674933e-06, "loss": 0.21303482055664064, "step": 129040 }, { "epoch": 1.115813957510095, "grad_norm": 3.609034520276888, "learning_rate": 2.457244923233826e-06, "loss": 0.239019775390625, "step": 129045 }, { "epoch": 1.1158571910316384, "grad_norm": 6.868452021535879, "learning_rate": 2.4570445532958564e-06, "loss": 0.4442718505859375, "step": 129050 }, { "epoch": 1.1159004245531816, "grad_norm": 2.2723306452734313, "learning_rate": 2.4568441858619513e-06, "loss": 0.06168785095214844, "step": 129055 }, { "epoch": 1.1159436580747248, "grad_norm": 2.5145222908176597, "learning_rate": 2.456643820933032e-06, "loss": 0.02373199462890625, "step": 129060 }, { "epoch": 1.115986891596268, "grad_norm": 0.12373028508780196, "learning_rate": 2.4564434585100237e-06, "loss": 0.013599967956542969, "step": 129065 }, { "epoch": 1.1160301251178113, "grad_norm": 0.4426312813362358, "learning_rate": 2.4562430985938503e-06, "loss": 0.05526580810546875, "step": 129070 }, { "epoch": 1.1160733586393545, "grad_norm": 2.4832179335332, "learning_rate": 2.4560427411854356e-06, "loss": 0.0371185302734375, "step": 129075 }, { "epoch": 1.116116592160898, "grad_norm": 4.935807043030666, "learning_rate": 2.455842386285702e-06, "loss": 0.20384368896484376, "step": 129080 }, { "epoch": 1.1161598256824412, "grad_norm": 4.139821313908692, "learning_rate": 2.4556420338955774e-06, "loss": 0.008806228637695312, "step": 129085 }, { "epoch": 1.1162030592039844, "grad_norm": 18.093266521961727, "learning_rate": 2.4554416840159825e-06, "loss": 0.22956695556640624, "step": 129090 }, { "epoch": 1.1162462927255277, "grad_norm": 3.526497396100708, "learning_rate": 2.4552413366478432e-06, "loss": 0.07874298095703125, "step": 129095 }, { "epoch": 1.116289526247071, "grad_norm": 10.348001001704285, "learning_rate": 2.455040991792083e-06, "loss": 0.1040740966796875, "step": 129100 }, { "epoch": 1.1163327597686141, "grad_norm": 39.356075518889234, "learning_rate": 2.454840649449625e-06, "loss": 0.3778387069702148, "step": 129105 }, { "epoch": 1.1163759932901574, "grad_norm": 1.6908054770861192, "learning_rate": 2.4546403096213933e-06, "loss": 0.02889862060546875, "step": 129110 }, { "epoch": 1.1164192268117008, "grad_norm": 1.0099127173535467, "learning_rate": 2.4544399723083127e-06, "loss": 0.046095848083496094, "step": 129115 }, { "epoch": 1.116462460333244, "grad_norm": 0.8140034140301695, "learning_rate": 2.454239637511306e-06, "loss": 0.1416778564453125, "step": 129120 }, { "epoch": 1.1165056938547873, "grad_norm": 4.499911425576928, "learning_rate": 2.454039305231299e-06, "loss": 0.06763992309570313, "step": 129125 }, { "epoch": 1.1165489273763305, "grad_norm": 18.820733752384186, "learning_rate": 2.4538389754692145e-06, "loss": 0.0863433837890625, "step": 129130 }, { "epoch": 1.1165921608978737, "grad_norm": 16.500947503453375, "learning_rate": 2.4536386482259766e-06, "loss": 0.12577285766601562, "step": 129135 }, { "epoch": 1.116635394419417, "grad_norm": 1.4415404731265387, "learning_rate": 2.4534383235025077e-06, "loss": 0.04596786499023438, "step": 129140 }, { "epoch": 1.1166786279409604, "grad_norm": 31.8109056647859, "learning_rate": 2.4532380012997334e-06, "loss": 0.09944496154785157, "step": 129145 }, { "epoch": 1.1167218614625036, "grad_norm": 4.387398371005212, "learning_rate": 2.453037681618577e-06, "loss": 0.021966552734375, "step": 129150 }, { "epoch": 1.1167650949840469, "grad_norm": 8.90360756495817, "learning_rate": 2.452837364459963e-06, "loss": 0.087939453125, "step": 129155 }, { "epoch": 1.11680832850559, "grad_norm": 0.8897386885111613, "learning_rate": 2.4526370498248153e-06, "loss": 0.016481399536132812, "step": 129160 }, { "epoch": 1.1168515620271333, "grad_norm": 0.6755651690333191, "learning_rate": 2.4524367377140566e-06, "loss": 0.13145751953125, "step": 129165 }, { "epoch": 1.1168947955486765, "grad_norm": 1.069944486895794, "learning_rate": 2.4522364281286105e-06, "loss": 0.029886627197265626, "step": 129170 }, { "epoch": 1.1169380290702198, "grad_norm": 10.517615196400142, "learning_rate": 2.452036121069402e-06, "loss": 0.05557594299316406, "step": 129175 }, { "epoch": 1.1169812625917632, "grad_norm": 2.253329249267648, "learning_rate": 2.4518358165373546e-06, "loss": 0.08336639404296875, "step": 129180 }, { "epoch": 1.1170244961133065, "grad_norm": 0.4820098694308349, "learning_rate": 2.451635514533392e-06, "loss": 0.009965133666992188, "step": 129185 }, { "epoch": 1.1170677296348497, "grad_norm": 8.669611490234749, "learning_rate": 2.4514352150584384e-06, "loss": 0.1832763671875, "step": 129190 }, { "epoch": 1.117110963156393, "grad_norm": 0.2868082749692815, "learning_rate": 2.4512349181134175e-06, "loss": 0.03393096923828125, "step": 129195 }, { "epoch": 1.1171541966779361, "grad_norm": 5.476730545036447, "learning_rate": 2.4510346236992518e-06, "loss": 0.15752029418945312, "step": 129200 }, { "epoch": 1.1171974301994794, "grad_norm": 3.6336509495525973, "learning_rate": 2.450834331816866e-06, "loss": 0.09536361694335938, "step": 129205 }, { "epoch": 1.1172406637210228, "grad_norm": 1.3672197784100368, "learning_rate": 2.4506340424671826e-06, "loss": 0.03809967041015625, "step": 129210 }, { "epoch": 1.117283897242566, "grad_norm": 7.106344025993316, "learning_rate": 2.450433755651128e-06, "loss": 0.10278396606445313, "step": 129215 }, { "epoch": 1.1173271307641093, "grad_norm": 7.485958134810211, "learning_rate": 2.4502334713696242e-06, "loss": 0.11156120300292968, "step": 129220 }, { "epoch": 1.1173703642856525, "grad_norm": 6.666525150807128, "learning_rate": 2.4500331896235952e-06, "loss": 0.042597198486328126, "step": 129225 }, { "epoch": 1.1174135978071957, "grad_norm": 0.7547005296772529, "learning_rate": 2.4498329104139636e-06, "loss": 0.1266754150390625, "step": 129230 }, { "epoch": 1.117456831328739, "grad_norm": 4.68266706235304, "learning_rate": 2.449632633741655e-06, "loss": 0.07550277709960937, "step": 129235 }, { "epoch": 1.1175000648502824, "grad_norm": 0.08320650251815176, "learning_rate": 2.449432359607591e-06, "loss": 0.013329315185546874, "step": 129240 }, { "epoch": 1.1175432983718256, "grad_norm": 1.8881077528799624, "learning_rate": 2.449232088012697e-06, "loss": 0.048366546630859375, "step": 129245 }, { "epoch": 1.1175865318933689, "grad_norm": 0.35561758731201504, "learning_rate": 2.449031818957896e-06, "loss": 0.023885726928710938, "step": 129250 }, { "epoch": 1.117629765414912, "grad_norm": 0.3478401736052178, "learning_rate": 2.448831552444111e-06, "loss": 0.1354595184326172, "step": 129255 }, { "epoch": 1.1176729989364553, "grad_norm": 10.4768519487916, "learning_rate": 2.448631288472267e-06, "loss": 0.05406036376953125, "step": 129260 }, { "epoch": 1.1177162324579986, "grad_norm": 0.16140925794615507, "learning_rate": 2.448431027043287e-06, "loss": 0.13077850341796876, "step": 129265 }, { "epoch": 1.117759465979542, "grad_norm": 3.6030191547081536, "learning_rate": 2.448230768158094e-06, "loss": 0.04654388427734375, "step": 129270 }, { "epoch": 1.1178026995010852, "grad_norm": 9.31523562439022, "learning_rate": 2.4480305118176108e-06, "loss": 0.05986976623535156, "step": 129275 }, { "epoch": 1.1178459330226285, "grad_norm": 4.133533815277936, "learning_rate": 2.447830258022763e-06, "loss": 0.046661376953125, "step": 129280 }, { "epoch": 1.1178891665441717, "grad_norm": 0.12752318631146436, "learning_rate": 2.447630006774473e-06, "loss": 0.032160186767578126, "step": 129285 }, { "epoch": 1.117932400065715, "grad_norm": 0.6643042331104896, "learning_rate": 2.4474297580736654e-06, "loss": 0.01519012451171875, "step": 129290 }, { "epoch": 1.1179756335872582, "grad_norm": 4.124062422922054, "learning_rate": 2.4472295119212627e-06, "loss": 0.11646575927734375, "step": 129295 }, { "epoch": 1.1180188671088014, "grad_norm": 28.687639414007045, "learning_rate": 2.4470292683181887e-06, "loss": 0.2136455535888672, "step": 129300 }, { "epoch": 1.1180621006303448, "grad_norm": 3.281613961581954, "learning_rate": 2.4468290272653652e-06, "loss": 0.053156280517578126, "step": 129305 }, { "epoch": 1.118105334151888, "grad_norm": 1.558469249964307, "learning_rate": 2.4466287887637195e-06, "loss": 0.08493804931640625, "step": 129310 }, { "epoch": 1.1181485676734313, "grad_norm": 0.41007191742602705, "learning_rate": 2.4464285528141713e-06, "loss": 0.06744537353515626, "step": 129315 }, { "epoch": 1.1181918011949745, "grad_norm": 22.118788770948875, "learning_rate": 2.446228319417647e-06, "loss": 0.07305755615234374, "step": 129320 }, { "epoch": 1.1182350347165178, "grad_norm": 6.180464252053541, "learning_rate": 2.4460280885750682e-06, "loss": 0.1985177993774414, "step": 129325 }, { "epoch": 1.118278268238061, "grad_norm": 3.116553828639024, "learning_rate": 2.4458278602873595e-06, "loss": 0.062404727935791014, "step": 129330 }, { "epoch": 1.1183215017596044, "grad_norm": 1.094958522851053, "learning_rate": 2.4456276345554423e-06, "loss": 0.11815185546875, "step": 129335 }, { "epoch": 1.1183647352811477, "grad_norm": 1.1849626251480596, "learning_rate": 2.4454274113802423e-06, "loss": 0.01590099334716797, "step": 129340 }, { "epoch": 1.1184079688026909, "grad_norm": 0.5659556833102706, "learning_rate": 2.445227190762682e-06, "loss": 0.058530235290527345, "step": 129345 }, { "epoch": 1.1184512023242341, "grad_norm": 4.058453233028733, "learning_rate": 2.4450269727036856e-06, "loss": 0.0762237548828125, "step": 129350 }, { "epoch": 1.1184944358457773, "grad_norm": 3.022250608489265, "learning_rate": 2.4448267572041754e-06, "loss": 0.05557174682617187, "step": 129355 }, { "epoch": 1.1185376693673206, "grad_norm": 3.413543695878069, "learning_rate": 2.444626544265075e-06, "loss": 0.012615966796875, "step": 129360 }, { "epoch": 1.1185809028888638, "grad_norm": 5.053285679387739, "learning_rate": 2.444426333887307e-06, "loss": 0.09356842041015626, "step": 129365 }, { "epoch": 1.1186241364104073, "grad_norm": 0.04086089351238834, "learning_rate": 2.4442261260717966e-06, "loss": 0.08013343811035156, "step": 129370 }, { "epoch": 1.1186673699319505, "grad_norm": 3.6528846480396218, "learning_rate": 2.4440259208194656e-06, "loss": 0.03523101806640625, "step": 129375 }, { "epoch": 1.1187106034534937, "grad_norm": 3.388086416405599, "learning_rate": 2.443825718131239e-06, "loss": 0.09580860137939454, "step": 129380 }, { "epoch": 1.118753836975037, "grad_norm": 1.092958255956287, "learning_rate": 2.4436255180080386e-06, "loss": 0.0196044921875, "step": 129385 }, { "epoch": 1.1187970704965802, "grad_norm": 35.797733393107734, "learning_rate": 2.4434253204507885e-06, "loss": 0.02130126953125, "step": 129390 }, { "epoch": 1.1188403040181234, "grad_norm": 42.04616623725835, "learning_rate": 2.443225125460411e-06, "loss": 0.22657470703125, "step": 129395 }, { "epoch": 1.1188835375396669, "grad_norm": 2.052792785954826, "learning_rate": 2.4430249330378296e-06, "loss": 0.08375625610351563, "step": 129400 }, { "epoch": 1.11892677106121, "grad_norm": 0.5011105530035483, "learning_rate": 2.4428247431839677e-06, "loss": 0.04423942565917969, "step": 129405 }, { "epoch": 1.1189700045827533, "grad_norm": 0.5963910219797406, "learning_rate": 2.4426245558997505e-06, "loss": 0.03286819458007813, "step": 129410 }, { "epoch": 1.1190132381042965, "grad_norm": 8.2656008131628, "learning_rate": 2.442424371186099e-06, "loss": 0.07642440795898438, "step": 129415 }, { "epoch": 1.1190564716258398, "grad_norm": 5.7501606330326505, "learning_rate": 2.442224189043937e-06, "loss": 0.05054931640625, "step": 129420 }, { "epoch": 1.119099705147383, "grad_norm": 5.988016792522696, "learning_rate": 2.4420240094741875e-06, "loss": 0.10507888793945312, "step": 129425 }, { "epoch": 1.1191429386689262, "grad_norm": 2.697509495005077, "learning_rate": 2.441823832477773e-06, "loss": 0.11071243286132812, "step": 129430 }, { "epoch": 1.1191861721904697, "grad_norm": 4.2308443547202135, "learning_rate": 2.441623658055619e-06, "loss": 0.0299163818359375, "step": 129435 }, { "epoch": 1.119229405712013, "grad_norm": 16.399222210422455, "learning_rate": 2.4414234862086474e-06, "loss": 0.10907783508300781, "step": 129440 }, { "epoch": 1.1192726392335561, "grad_norm": 39.758290976557554, "learning_rate": 2.4412233169377814e-06, "loss": 0.15641021728515625, "step": 129445 }, { "epoch": 1.1193158727550994, "grad_norm": 23.93100962693997, "learning_rate": 2.4410231502439437e-06, "loss": 0.11891450881958007, "step": 129450 }, { "epoch": 1.1193591062766426, "grad_norm": 0.246108005405333, "learning_rate": 2.4408229861280576e-06, "loss": 0.02971649169921875, "step": 129455 }, { "epoch": 1.1194023397981858, "grad_norm": 4.118743161175308, "learning_rate": 2.440622824591047e-06, "loss": 0.027733230590820314, "step": 129460 }, { "epoch": 1.1194455733197293, "grad_norm": 0.6256948549258573, "learning_rate": 2.4404226656338334e-06, "loss": 0.013328170776367188, "step": 129465 }, { "epoch": 1.1194888068412725, "grad_norm": 0.7964346040594993, "learning_rate": 2.440222509257342e-06, "loss": 0.0277435302734375, "step": 129470 }, { "epoch": 1.1195320403628157, "grad_norm": 58.752534676793104, "learning_rate": 2.440022355462495e-06, "loss": 0.1400165557861328, "step": 129475 }, { "epoch": 1.119575273884359, "grad_norm": 21.27392460891537, "learning_rate": 2.439822204250215e-06, "loss": 0.057872772216796875, "step": 129480 }, { "epoch": 1.1196185074059022, "grad_norm": 6.84742071545106, "learning_rate": 2.4396220556214257e-06, "loss": 0.032133865356445315, "step": 129485 }, { "epoch": 1.1196617409274454, "grad_norm": 7.752985752934132, "learning_rate": 2.43942190957705e-06, "loss": 0.08486328125, "step": 129490 }, { "epoch": 1.1197049744489889, "grad_norm": 1.03356297376939, "learning_rate": 2.43922176611801e-06, "loss": 0.008939552307128906, "step": 129495 }, { "epoch": 1.119748207970532, "grad_norm": 0.2546149473499946, "learning_rate": 2.439021625245231e-06, "loss": 0.04080390930175781, "step": 129500 }, { "epoch": 1.1197914414920753, "grad_norm": 0.4757219524292941, "learning_rate": 2.4388214869596346e-06, "loss": 0.08426666259765625, "step": 129505 }, { "epoch": 1.1198346750136186, "grad_norm": 4.194725186007996, "learning_rate": 2.438621351262143e-06, "loss": 0.021083831787109375, "step": 129510 }, { "epoch": 1.1198779085351618, "grad_norm": 20.805247215487828, "learning_rate": 2.438421218153681e-06, "loss": 0.05172882080078125, "step": 129515 }, { "epoch": 1.119921142056705, "grad_norm": 13.872291510803887, "learning_rate": 2.4382210876351707e-06, "loss": 0.1251251220703125, "step": 129520 }, { "epoch": 1.1199643755782485, "grad_norm": 6.339675136275614, "learning_rate": 2.4380209597075335e-06, "loss": 0.04058818817138672, "step": 129525 }, { "epoch": 1.1200076090997917, "grad_norm": 2.458122998987788, "learning_rate": 2.437820834371696e-06, "loss": 0.21901893615722656, "step": 129530 }, { "epoch": 1.120050842621335, "grad_norm": 5.598718157760477, "learning_rate": 2.437620711628578e-06, "loss": 0.044928359985351565, "step": 129535 }, { "epoch": 1.1200940761428781, "grad_norm": 16.091141201804408, "learning_rate": 2.437420591479104e-06, "loss": 0.04638595581054687, "step": 129540 }, { "epoch": 1.1201373096644214, "grad_norm": 1.436261938218011, "learning_rate": 2.4372204739241966e-06, "loss": 0.1976837158203125, "step": 129545 }, { "epoch": 1.1201805431859646, "grad_norm": 15.096216280375657, "learning_rate": 2.4370203589647786e-06, "loss": 0.14300918579101562, "step": 129550 }, { "epoch": 1.1202237767075078, "grad_norm": 0.4349432207733599, "learning_rate": 2.4368202466017738e-06, "loss": 0.03554534912109375, "step": 129555 }, { "epoch": 1.1202670102290513, "grad_norm": 2.1419531935677885, "learning_rate": 2.4366201368361023e-06, "loss": 0.215087890625, "step": 129560 }, { "epoch": 1.1203102437505945, "grad_norm": 7.984575830048299, "learning_rate": 2.43642002966869e-06, "loss": 0.03888721466064453, "step": 129565 }, { "epoch": 1.1203534772721377, "grad_norm": 1.6477726633234882, "learning_rate": 2.4362199251004584e-06, "loss": 0.00708770751953125, "step": 129570 }, { "epoch": 1.120396710793681, "grad_norm": 26.60016392990625, "learning_rate": 2.4360198231323316e-06, "loss": 0.045040130615234375, "step": 129575 }, { "epoch": 1.1204399443152242, "grad_norm": 39.571773928977066, "learning_rate": 2.4358197237652313e-06, "loss": 0.18645896911621093, "step": 129580 }, { "epoch": 1.1204831778367674, "grad_norm": 36.91934777501643, "learning_rate": 2.4356196270000806e-06, "loss": 0.09421577453613281, "step": 129585 }, { "epoch": 1.1205264113583109, "grad_norm": 17.836100663016165, "learning_rate": 2.435419532837801e-06, "loss": 0.05631179809570312, "step": 129590 }, { "epoch": 1.120569644879854, "grad_norm": 0.0746087771419101, "learning_rate": 2.4352194412793173e-06, "loss": 0.1833059310913086, "step": 129595 }, { "epoch": 1.1206128784013973, "grad_norm": 15.828818758074387, "learning_rate": 2.435019352325552e-06, "loss": 0.05340118408203125, "step": 129600 }, { "epoch": 1.1206561119229406, "grad_norm": 2.101689534207482, "learning_rate": 2.4348192659774276e-06, "loss": 0.01017608642578125, "step": 129605 }, { "epoch": 1.1206993454444838, "grad_norm": 1.959512920520942, "learning_rate": 2.4346191822358668e-06, "loss": 0.030478668212890626, "step": 129610 }, { "epoch": 1.120742578966027, "grad_norm": 14.116752292624199, "learning_rate": 2.4344191011017927e-06, "loss": 0.08555374145507813, "step": 129615 }, { "epoch": 1.1207858124875703, "grad_norm": 28.289993117716396, "learning_rate": 2.434219022576127e-06, "loss": 0.2812961578369141, "step": 129620 }, { "epoch": 1.1208290460091137, "grad_norm": 4.895680152196263, "learning_rate": 2.4340189466597927e-06, "loss": 0.09074525833129883, "step": 129625 }, { "epoch": 1.120872279530657, "grad_norm": 0.8033454669604191, "learning_rate": 2.433818873353714e-06, "loss": 0.015899658203125, "step": 129630 }, { "epoch": 1.1209155130522002, "grad_norm": 1.2934473075658985, "learning_rate": 2.4336188026588126e-06, "loss": 0.028504180908203124, "step": 129635 }, { "epoch": 1.1209587465737434, "grad_norm": 0.8362283863491842, "learning_rate": 2.433418734576011e-06, "loss": 0.021947479248046874, "step": 129640 }, { "epoch": 1.1210019800952866, "grad_norm": 1.0102292175293384, "learning_rate": 2.433218669106232e-06, "loss": 0.13198699951171874, "step": 129645 }, { "epoch": 1.1210452136168298, "grad_norm": 0.2333617568071232, "learning_rate": 2.433018606250399e-06, "loss": 0.026702022552490233, "step": 129650 }, { "epoch": 1.1210884471383733, "grad_norm": 8.517484662105508, "learning_rate": 2.432818546009432e-06, "loss": 0.0743377685546875, "step": 129655 }, { "epoch": 1.1211316806599165, "grad_norm": 3.392555751123456, "learning_rate": 2.4326184883842577e-06, "loss": 0.0771240234375, "step": 129660 }, { "epoch": 1.1211749141814598, "grad_norm": 0.9193406730674825, "learning_rate": 2.4324184333757972e-06, "loss": 0.028565216064453124, "step": 129665 }, { "epoch": 1.121218147703003, "grad_norm": 0.20125857950409745, "learning_rate": 2.4322183809849722e-06, "loss": 0.100897216796875, "step": 129670 }, { "epoch": 1.1212613812245462, "grad_norm": 3.1722487843111247, "learning_rate": 2.432018331212705e-06, "loss": 0.05390167236328125, "step": 129675 }, { "epoch": 1.1213046147460894, "grad_norm": 25.980714737916003, "learning_rate": 2.43181828405992e-06, "loss": 0.17925796508789063, "step": 129680 }, { "epoch": 1.1213478482676327, "grad_norm": 26.653026478251437, "learning_rate": 2.4316182395275375e-06, "loss": 0.06979827880859375, "step": 129685 }, { "epoch": 1.1213910817891761, "grad_norm": 4.813060820705038, "learning_rate": 2.4314181976164833e-06, "loss": 0.0214263916015625, "step": 129690 }, { "epoch": 1.1214343153107194, "grad_norm": 4.035252494806043, "learning_rate": 2.4312181583276774e-06, "loss": 0.03651351928710937, "step": 129695 }, { "epoch": 1.1214775488322626, "grad_norm": 15.50069051288528, "learning_rate": 2.4310181216620436e-06, "loss": 0.098638916015625, "step": 129700 }, { "epoch": 1.1215207823538058, "grad_norm": 24.04083417143351, "learning_rate": 2.430818087620503e-06, "loss": 0.12762069702148438, "step": 129705 }, { "epoch": 1.121564015875349, "grad_norm": 0.1442259566003493, "learning_rate": 2.43061805620398e-06, "loss": 0.17641983032226563, "step": 129710 }, { "epoch": 1.1216072493968925, "grad_norm": 6.67903977104618, "learning_rate": 2.4304180274133956e-06, "loss": 0.2449462890625, "step": 129715 }, { "epoch": 1.1216504829184357, "grad_norm": 2.2186287590823435, "learning_rate": 2.4302180012496723e-06, "loss": 0.021591567993164064, "step": 129720 }, { "epoch": 1.121693716439979, "grad_norm": 3.078201918541156, "learning_rate": 2.4300179777137344e-06, "loss": 0.02807273864746094, "step": 129725 }, { "epoch": 1.1217369499615222, "grad_norm": 28.442712809045585, "learning_rate": 2.429817956806503e-06, "loss": 0.16834449768066406, "step": 129730 }, { "epoch": 1.1217801834830654, "grad_norm": 0.2683904737392309, "learning_rate": 2.4296179385289e-06, "loss": 0.019685745239257812, "step": 129735 }, { "epoch": 1.1218234170046086, "grad_norm": 1.134823643910375, "learning_rate": 2.42941792288185e-06, "loss": 0.018853378295898438, "step": 129740 }, { "epoch": 1.1218666505261519, "grad_norm": 2.389589816287315, "learning_rate": 2.4292179098662738e-06, "loss": 0.194659423828125, "step": 129745 }, { "epoch": 1.1219098840476953, "grad_norm": 0.582761923367258, "learning_rate": 2.4290178994830923e-06, "loss": 0.011437797546386718, "step": 129750 }, { "epoch": 1.1219531175692385, "grad_norm": 3.73410843083112, "learning_rate": 2.4288178917332317e-06, "loss": 0.2260406494140625, "step": 129755 }, { "epoch": 1.1219963510907818, "grad_norm": 0.2189597684905988, "learning_rate": 2.4286178866176126e-06, "loss": 0.024475860595703124, "step": 129760 }, { "epoch": 1.122039584612325, "grad_norm": 7.190229159024774, "learning_rate": 2.4284178841371562e-06, "loss": 0.0779541015625, "step": 129765 }, { "epoch": 1.1220828181338682, "grad_norm": 0.43826381507673645, "learning_rate": 2.428217884292787e-06, "loss": 0.06812515258789062, "step": 129770 }, { "epoch": 1.1221260516554115, "grad_norm": 6.666376144408245, "learning_rate": 2.4280178870854263e-06, "loss": 0.1099395751953125, "step": 129775 }, { "epoch": 1.122169285176955, "grad_norm": 1.1785768877276763, "learning_rate": 2.427817892515995e-06, "loss": 0.043233489990234374, "step": 129780 }, { "epoch": 1.1222125186984981, "grad_norm": 12.015339076295046, "learning_rate": 2.4276179005854188e-06, "loss": 0.03632774353027344, "step": 129785 }, { "epoch": 1.1222557522200414, "grad_norm": 26.491373925881458, "learning_rate": 2.427417911294618e-06, "loss": 0.129595947265625, "step": 129790 }, { "epoch": 1.1222989857415846, "grad_norm": 1.2821376183535755, "learning_rate": 2.4272179246445146e-06, "loss": 0.0536041259765625, "step": 129795 }, { "epoch": 1.1223422192631278, "grad_norm": 1.2905227133331278, "learning_rate": 2.4270179406360325e-06, "loss": 0.0154541015625, "step": 129800 }, { "epoch": 1.122385452784671, "grad_norm": 0.5214401150670556, "learning_rate": 2.4268179592700924e-06, "loss": 0.026103973388671875, "step": 129805 }, { "epoch": 1.1224286863062143, "grad_norm": 14.568070886437113, "learning_rate": 2.4266179805476176e-06, "loss": 0.06459083557128906, "step": 129810 }, { "epoch": 1.1224719198277577, "grad_norm": 21.083297492578684, "learning_rate": 2.4264180044695284e-06, "loss": 0.11588592529296875, "step": 129815 }, { "epoch": 1.122515153349301, "grad_norm": 5.098225861670774, "learning_rate": 2.4262180310367503e-06, "loss": 0.06291885375976562, "step": 129820 }, { "epoch": 1.1225583868708442, "grad_norm": 0.19850888763086194, "learning_rate": 2.4260180602502034e-06, "loss": 0.07981491088867188, "step": 129825 }, { "epoch": 1.1226016203923874, "grad_norm": 4.0008582289927865, "learning_rate": 2.4258180921108107e-06, "loss": 0.028519439697265624, "step": 129830 }, { "epoch": 1.1226448539139307, "grad_norm": 4.314487177676377, "learning_rate": 2.425618126619495e-06, "loss": 0.0189971923828125, "step": 129835 }, { "epoch": 1.1226880874354739, "grad_norm": 0.09933296757614568, "learning_rate": 2.425418163777177e-06, "loss": 0.00412750244140625, "step": 129840 }, { "epoch": 1.1227313209570173, "grad_norm": 1.2865966343940325, "learning_rate": 2.425218203584778e-06, "loss": 0.051439666748046876, "step": 129845 }, { "epoch": 1.1227745544785606, "grad_norm": 19.72750206691372, "learning_rate": 2.425018246043224e-06, "loss": 0.08943462371826172, "step": 129850 }, { "epoch": 1.1228177880001038, "grad_norm": 4.863109489997008, "learning_rate": 2.424818291153434e-06, "loss": 0.03917388916015625, "step": 129855 }, { "epoch": 1.122861021521647, "grad_norm": 4.347311819367541, "learning_rate": 2.4246183389163324e-06, "loss": 0.03983001708984375, "step": 129860 }, { "epoch": 1.1229042550431902, "grad_norm": 13.603750433809994, "learning_rate": 2.4244183893328396e-06, "loss": 0.14839324951171876, "step": 129865 }, { "epoch": 1.1229474885647335, "grad_norm": 2.223797128600227, "learning_rate": 2.4242184424038786e-06, "loss": 0.0722747802734375, "step": 129870 }, { "epoch": 1.1229907220862767, "grad_norm": 2.6023436637101622, "learning_rate": 2.42401849813037e-06, "loss": 0.019399261474609374, "step": 129875 }, { "epoch": 1.1230339556078202, "grad_norm": 21.130087141883518, "learning_rate": 2.4238185565132375e-06, "loss": 0.14051895141601561, "step": 129880 }, { "epoch": 1.1230771891293634, "grad_norm": 0.17328602828812933, "learning_rate": 2.423618617553404e-06, "loss": 0.11939926147460937, "step": 129885 }, { "epoch": 1.1231204226509066, "grad_norm": 1.1449084012110229, "learning_rate": 2.4234186812517897e-06, "loss": 0.01788330078125, "step": 129890 }, { "epoch": 1.1231636561724498, "grad_norm": 2.043521986634794, "learning_rate": 2.423218747609318e-06, "loss": 0.023251724243164063, "step": 129895 }, { "epoch": 1.123206889693993, "grad_norm": 20.843394584855673, "learning_rate": 2.4230188166269103e-06, "loss": 0.035076904296875, "step": 129900 }, { "epoch": 1.1232501232155363, "grad_norm": 14.49170281700355, "learning_rate": 2.422818888305489e-06, "loss": 0.04379539489746094, "step": 129905 }, { "epoch": 1.1232933567370798, "grad_norm": 23.222635685568303, "learning_rate": 2.4226189626459747e-06, "loss": 0.07717971801757813, "step": 129910 }, { "epoch": 1.123336590258623, "grad_norm": 0.8538125415180645, "learning_rate": 2.4224190396492923e-06, "loss": 0.03067779541015625, "step": 129915 }, { "epoch": 1.1233798237801662, "grad_norm": 3.7141128391538185, "learning_rate": 2.422219119316362e-06, "loss": 0.05117321014404297, "step": 129920 }, { "epoch": 1.1234230573017094, "grad_norm": 28.666159576393444, "learning_rate": 2.422019201648106e-06, "loss": 0.06418380737304688, "step": 129925 }, { "epoch": 1.1234662908232527, "grad_norm": 0.7903319858724523, "learning_rate": 2.421819286645446e-06, "loss": 0.09811515808105468, "step": 129930 }, { "epoch": 1.123509524344796, "grad_norm": 1.7153314516505036, "learning_rate": 2.4216193743093047e-06, "loss": 0.037934494018554685, "step": 129935 }, { "epoch": 1.1235527578663393, "grad_norm": 1.5472718285085518, "learning_rate": 2.4214194646406027e-06, "loss": 0.0308349609375, "step": 129940 }, { "epoch": 1.1235959913878826, "grad_norm": 1.948036033743227, "learning_rate": 2.4212195576402636e-06, "loss": 0.029730224609375, "step": 129945 }, { "epoch": 1.1236392249094258, "grad_norm": 0.3440359181655275, "learning_rate": 2.4210196533092096e-06, "loss": 0.02484588623046875, "step": 129950 }, { "epoch": 1.123682458430969, "grad_norm": 3.7948437921799085, "learning_rate": 2.4208197516483613e-06, "loss": 0.047052001953125, "step": 129955 }, { "epoch": 1.1237256919525123, "grad_norm": 12.296186438194454, "learning_rate": 2.420619852658641e-06, "loss": 0.07321701049804688, "step": 129960 }, { "epoch": 1.1237689254740555, "grad_norm": 1.6539774270533687, "learning_rate": 2.420419956340971e-06, "loss": 0.11542205810546875, "step": 129965 }, { "epoch": 1.123812158995599, "grad_norm": 0.29055284920252455, "learning_rate": 2.4202200626962732e-06, "loss": 0.17341690063476561, "step": 129970 }, { "epoch": 1.1238553925171422, "grad_norm": 9.784549095261385, "learning_rate": 2.4200201717254678e-06, "loss": 0.16091766357421874, "step": 129975 }, { "epoch": 1.1238986260386854, "grad_norm": 1.6167475855674172, "learning_rate": 2.4198202834294793e-06, "loss": 0.063262939453125, "step": 129980 }, { "epoch": 1.1239418595602286, "grad_norm": 1.3705422079264644, "learning_rate": 2.4196203978092285e-06, "loss": 0.11477928161621094, "step": 129985 }, { "epoch": 1.1239850930817719, "grad_norm": 0.5469401180343632, "learning_rate": 2.4194205148656365e-06, "loss": 0.14423446655273436, "step": 129990 }, { "epoch": 1.124028326603315, "grad_norm": 5.381559150797281, "learning_rate": 2.4192206345996262e-06, "loss": 0.04717597961425781, "step": 129995 }, { "epoch": 1.1240715601248583, "grad_norm": 1.0867822491342127, "learning_rate": 2.4190207570121196e-06, "loss": 0.01404876708984375, "step": 130000 }, { "epoch": 1.1241147936464018, "grad_norm": 24.105745626300507, "learning_rate": 2.418820882104036e-06, "loss": 0.10480518341064453, "step": 130005 }, { "epoch": 1.124158027167945, "grad_norm": 1.969151629474722, "learning_rate": 2.418621009876301e-06, "loss": 0.021805572509765624, "step": 130010 }, { "epoch": 1.1242012606894882, "grad_norm": 14.421677199258081, "learning_rate": 2.4184211403298342e-06, "loss": 0.03281688690185547, "step": 130015 }, { "epoch": 1.1242444942110315, "grad_norm": 9.687045171178545, "learning_rate": 2.418221273465557e-06, "loss": 0.024824142456054688, "step": 130020 }, { "epoch": 1.1242877277325747, "grad_norm": 0.6532212739083526, "learning_rate": 2.418021409284393e-06, "loss": 0.01830120086669922, "step": 130025 }, { "epoch": 1.124330961254118, "grad_norm": 2.8712918562596608, "learning_rate": 2.417821547787262e-06, "loss": 0.019451141357421875, "step": 130030 }, { "epoch": 1.1243741947756614, "grad_norm": 8.8276354057482, "learning_rate": 2.417621688975086e-06, "loss": 0.02166461944580078, "step": 130035 }, { "epoch": 1.1244174282972046, "grad_norm": 5.000482079642082, "learning_rate": 2.4174218328487883e-06, "loss": 0.03842620849609375, "step": 130040 }, { "epoch": 1.1244606618187478, "grad_norm": 3.333950134912475, "learning_rate": 2.4172219794092894e-06, "loss": 0.19676132202148439, "step": 130045 }, { "epoch": 1.124503895340291, "grad_norm": 0.578304495571765, "learning_rate": 2.4170221286575116e-06, "loss": 0.044217681884765624, "step": 130050 }, { "epoch": 1.1245471288618343, "grad_norm": 1.183549421591273, "learning_rate": 2.416822280594376e-06, "loss": 0.03266448974609375, "step": 130055 }, { "epoch": 1.1245903623833775, "grad_norm": 0.31629381576886006, "learning_rate": 2.4166224352208046e-06, "loss": 0.035915374755859375, "step": 130060 }, { "epoch": 1.1246335959049207, "grad_norm": 19.86234626475194, "learning_rate": 2.416422592537719e-06, "loss": 0.06229743957519531, "step": 130065 }, { "epoch": 1.1246768294264642, "grad_norm": 5.02767564187529, "learning_rate": 2.4162227525460396e-06, "loss": 0.07434234619140626, "step": 130070 }, { "epoch": 1.1247200629480074, "grad_norm": 1.4976483782686287, "learning_rate": 2.41602291524669e-06, "loss": 0.0525177001953125, "step": 130075 }, { "epoch": 1.1247632964695506, "grad_norm": 27.122285676348817, "learning_rate": 2.4158230806405916e-06, "loss": 0.05092620849609375, "step": 130080 }, { "epoch": 1.1248065299910939, "grad_norm": 0.3340537780724422, "learning_rate": 2.4156232487286654e-06, "loss": 0.05706024169921875, "step": 130085 }, { "epoch": 1.124849763512637, "grad_norm": 5.730812307596468, "learning_rate": 2.4154234195118338e-06, "loss": 0.07664337158203124, "step": 130090 }, { "epoch": 1.1248929970341803, "grad_norm": 0.3387074263348743, "learning_rate": 2.415223592991017e-06, "loss": 0.04565010070800781, "step": 130095 }, { "epoch": 1.1249362305557238, "grad_norm": 19.98365530689773, "learning_rate": 2.4150237691671364e-06, "loss": 0.2855854034423828, "step": 130100 }, { "epoch": 1.124979464077267, "grad_norm": 1.2982385587482173, "learning_rate": 2.4148239480411148e-06, "loss": 0.06724071502685547, "step": 130105 }, { "epoch": 1.1250226975988102, "grad_norm": 16.20112431623182, "learning_rate": 2.4146241296138747e-06, "loss": 0.09181270599365235, "step": 130110 }, { "epoch": 1.1250659311203535, "grad_norm": 0.36238685993188696, "learning_rate": 2.414424313886336e-06, "loss": 0.01257781982421875, "step": 130115 }, { "epoch": 1.1251091646418967, "grad_norm": 0.22633783845293326, "learning_rate": 2.4142245008594204e-06, "loss": 0.1639190673828125, "step": 130120 }, { "epoch": 1.12515239816344, "grad_norm": 4.125399781884792, "learning_rate": 2.4140246905340493e-06, "loss": 0.0485687255859375, "step": 130125 }, { "epoch": 1.1251956316849832, "grad_norm": 19.44771134393238, "learning_rate": 2.4138248829111444e-06, "loss": 0.17191619873046876, "step": 130130 }, { "epoch": 1.1252388652065266, "grad_norm": 2.1878607244454837, "learning_rate": 2.4136250779916273e-06, "loss": 0.02235565185546875, "step": 130135 }, { "epoch": 1.1252820987280698, "grad_norm": 3.329899529705424, "learning_rate": 2.4134252757764204e-06, "loss": 0.15943603515625, "step": 130140 }, { "epoch": 1.125325332249613, "grad_norm": 15.664039652310846, "learning_rate": 2.4132254762664445e-06, "loss": 0.03887100219726562, "step": 130145 }, { "epoch": 1.1253685657711563, "grad_norm": 0.2537432175815919, "learning_rate": 2.4130256794626203e-06, "loss": 0.06259384155273437, "step": 130150 }, { "epoch": 1.1254117992926995, "grad_norm": 44.34633443038543, "learning_rate": 2.4128258853658697e-06, "loss": 0.23897705078125, "step": 130155 }, { "epoch": 1.125455032814243, "grad_norm": 0.8499610015026176, "learning_rate": 2.4126260939771144e-06, "loss": 0.025521469116210938, "step": 130160 }, { "epoch": 1.1254982663357862, "grad_norm": 0.07830659472528445, "learning_rate": 2.4124263052972747e-06, "loss": 0.010802841186523438, "step": 130165 }, { "epoch": 1.1255414998573294, "grad_norm": 1.6426983536573267, "learning_rate": 2.412226519327274e-06, "loss": 0.028675079345703125, "step": 130170 }, { "epoch": 1.1255847333788727, "grad_norm": 0.19927631113184954, "learning_rate": 2.4120267360680328e-06, "loss": 0.2770587921142578, "step": 130175 }, { "epoch": 1.1256279669004159, "grad_norm": 5.567512026011445, "learning_rate": 2.411826955520472e-06, "loss": 0.06480979919433594, "step": 130180 }, { "epoch": 1.1256712004219591, "grad_norm": 1.3154703566197692, "learning_rate": 2.4116271776855132e-06, "loss": 0.22843017578125, "step": 130185 }, { "epoch": 1.1257144339435023, "grad_norm": 32.41698220758414, "learning_rate": 2.4114274025640786e-06, "loss": 0.13696670532226562, "step": 130190 }, { "epoch": 1.1257576674650456, "grad_norm": 6.193773267090808, "learning_rate": 2.411227630157087e-06, "loss": 0.061224365234375, "step": 130195 }, { "epoch": 1.125800900986589, "grad_norm": 5.015784714101083, "learning_rate": 2.411027860465463e-06, "loss": 0.06276321411132812, "step": 130200 }, { "epoch": 1.1258441345081323, "grad_norm": 1.4448836597053767, "learning_rate": 2.4108280934901267e-06, "loss": 0.14979782104492187, "step": 130205 }, { "epoch": 1.1258873680296755, "grad_norm": 0.30672795226264077, "learning_rate": 2.410628329231999e-06, "loss": 0.0608551025390625, "step": 130210 }, { "epoch": 1.1259306015512187, "grad_norm": 3.8475008876005243, "learning_rate": 2.4104285676920003e-06, "loss": 0.03739013671875, "step": 130215 }, { "epoch": 1.125973835072762, "grad_norm": 1.5154772452025025, "learning_rate": 2.4102288088710545e-06, "loss": 0.01399688720703125, "step": 130220 }, { "epoch": 1.1260170685943054, "grad_norm": 0.4934967246273204, "learning_rate": 2.4100290527700806e-06, "loss": 0.09933547973632813, "step": 130225 }, { "epoch": 1.1260603021158486, "grad_norm": 0.33958116303489216, "learning_rate": 2.4098292993899992e-06, "loss": 0.052036285400390625, "step": 130230 }, { "epoch": 1.1261035356373919, "grad_norm": 1.3249907775530163, "learning_rate": 2.409629548731735e-06, "loss": 0.36257190704345704, "step": 130235 }, { "epoch": 1.126146769158935, "grad_norm": 0.08650849693018096, "learning_rate": 2.4094298007962066e-06, "loss": 0.03408660888671875, "step": 130240 }, { "epoch": 1.1261900026804783, "grad_norm": 55.03872389734682, "learning_rate": 2.409230055584335e-06, "loss": 0.13957633972167968, "step": 130245 }, { "epoch": 1.1262332362020215, "grad_norm": 3.4071373206235185, "learning_rate": 2.409030313097043e-06, "loss": 0.022745513916015626, "step": 130250 }, { "epoch": 1.1262764697235648, "grad_norm": 1.2337424396989318, "learning_rate": 2.4088305733352514e-06, "loss": 0.0808563232421875, "step": 130255 }, { "epoch": 1.1263197032451082, "grad_norm": 90.14426591665985, "learning_rate": 2.408630836299879e-06, "loss": 0.20479278564453124, "step": 130260 }, { "epoch": 1.1263629367666514, "grad_norm": 6.131007136385726, "learning_rate": 2.4084311019918508e-06, "loss": 0.023312759399414063, "step": 130265 }, { "epoch": 1.1264061702881947, "grad_norm": 12.087698143295768, "learning_rate": 2.4082313704120855e-06, "loss": 0.1130615234375, "step": 130270 }, { "epoch": 1.126449403809738, "grad_norm": 42.151902721687, "learning_rate": 2.4080316415615045e-06, "loss": 0.1490558624267578, "step": 130275 }, { "epoch": 1.1264926373312811, "grad_norm": 3.321899586156715, "learning_rate": 2.40783191544103e-06, "loss": 0.0179656982421875, "step": 130280 }, { "epoch": 1.1265358708528244, "grad_norm": 1.1213947233393333, "learning_rate": 2.407632192051582e-06, "loss": 0.023278045654296874, "step": 130285 }, { "epoch": 1.1265791043743678, "grad_norm": 17.356231539586688, "learning_rate": 2.407432471394081e-06, "loss": 0.046785736083984376, "step": 130290 }, { "epoch": 1.126622337895911, "grad_norm": 8.638595918823354, "learning_rate": 2.4072327534694504e-06, "loss": 0.02599945068359375, "step": 130295 }, { "epoch": 1.1266655714174543, "grad_norm": 0.8903719686479115, "learning_rate": 2.4070330382786092e-06, "loss": 0.018367767333984375, "step": 130300 }, { "epoch": 1.1267088049389975, "grad_norm": 47.38543659852822, "learning_rate": 2.40683332582248e-06, "loss": 0.22424659729003907, "step": 130305 }, { "epoch": 1.1267520384605407, "grad_norm": 25.44204648560008, "learning_rate": 2.4066336161019827e-06, "loss": 0.09042739868164062, "step": 130310 }, { "epoch": 1.126795271982084, "grad_norm": 3.699088528907577, "learning_rate": 2.406433909118039e-06, "loss": 0.05389251708984375, "step": 130315 }, { "epoch": 1.1268385055036272, "grad_norm": 3.4866347982266315, "learning_rate": 2.40623420487157e-06, "loss": 0.029985427856445312, "step": 130320 }, { "epoch": 1.1268817390251706, "grad_norm": 3.432791287365315, "learning_rate": 2.4060345033634947e-06, "loss": 0.04243927001953125, "step": 130325 }, { "epoch": 1.1269249725467139, "grad_norm": 1.8607362906813405, "learning_rate": 2.405834804594737e-06, "loss": 0.14273395538330078, "step": 130330 }, { "epoch": 1.126968206068257, "grad_norm": 4.905986357887532, "learning_rate": 2.4056351085662163e-06, "loss": 0.04957160949707031, "step": 130335 }, { "epoch": 1.1270114395898003, "grad_norm": 2.3239442245663224, "learning_rate": 2.405435415278855e-06, "loss": 0.1741161346435547, "step": 130340 }, { "epoch": 1.1270546731113436, "grad_norm": 2.825524958263059, "learning_rate": 2.4052357247335723e-06, "loss": 0.015578460693359376, "step": 130345 }, { "epoch": 1.1270979066328868, "grad_norm": 4.081144409859378, "learning_rate": 2.4050360369312896e-06, "loss": 0.016614532470703124, "step": 130350 }, { "epoch": 1.1271411401544302, "grad_norm": 4.349673723246907, "learning_rate": 2.404836351872928e-06, "loss": 0.13524017333984376, "step": 130355 }, { "epoch": 1.1271843736759735, "grad_norm": 0.7553751451898975, "learning_rate": 2.4046366695594087e-06, "loss": 0.04749565124511719, "step": 130360 }, { "epoch": 1.1272276071975167, "grad_norm": 2.1208318961290993, "learning_rate": 2.404436989991653e-06, "loss": 0.01803245544433594, "step": 130365 }, { "epoch": 1.12727084071906, "grad_norm": 23.269326298389498, "learning_rate": 2.404237313170581e-06, "loss": 0.1262725830078125, "step": 130370 }, { "epoch": 1.1273140742406031, "grad_norm": 9.449425618101715, "learning_rate": 2.404037639097114e-06, "loss": 0.04236106872558594, "step": 130375 }, { "epoch": 1.1273573077621464, "grad_norm": 0.28216038114517683, "learning_rate": 2.4038379677721725e-06, "loss": 0.23225059509277343, "step": 130380 }, { "epoch": 1.1274005412836896, "grad_norm": 2.5526085886926366, "learning_rate": 2.403638299196677e-06, "loss": 0.0345184326171875, "step": 130385 }, { "epoch": 1.127443774805233, "grad_norm": 32.70928733135999, "learning_rate": 2.4034386333715497e-06, "loss": 0.07659025192260742, "step": 130390 }, { "epoch": 1.1274870083267763, "grad_norm": 8.45091167412519, "learning_rate": 2.4032389702977107e-06, "loss": 0.091998291015625, "step": 130395 }, { "epoch": 1.1275302418483195, "grad_norm": 5.294757099156754, "learning_rate": 2.4030393099760813e-06, "loss": 0.14073944091796875, "step": 130400 }, { "epoch": 1.1275734753698627, "grad_norm": 21.418266122279018, "learning_rate": 2.4028396524075816e-06, "loss": 0.08676719665527344, "step": 130405 }, { "epoch": 1.127616708891406, "grad_norm": 29.78562332680584, "learning_rate": 2.402639997593132e-06, "loss": 0.16210289001464845, "step": 130410 }, { "epoch": 1.1276599424129494, "grad_norm": 1.106257305995257, "learning_rate": 2.4024403455336547e-06, "loss": 0.06763687133789062, "step": 130415 }, { "epoch": 1.1277031759344927, "grad_norm": 1.1214738774791322, "learning_rate": 2.4022406962300684e-06, "loss": 0.09829330444335938, "step": 130420 }, { "epoch": 1.1277464094560359, "grad_norm": 32.666849554857365, "learning_rate": 2.4020410496832966e-06, "loss": 0.15104751586914061, "step": 130425 }, { "epoch": 1.127789642977579, "grad_norm": 3.6937755711753186, "learning_rate": 2.4018414058942586e-06, "loss": 0.06689929962158203, "step": 130430 }, { "epoch": 1.1278328764991223, "grad_norm": 4.001091052468305, "learning_rate": 2.401641764863875e-06, "loss": 0.09403305053710938, "step": 130435 }, { "epoch": 1.1278761100206656, "grad_norm": 5.980216026405885, "learning_rate": 2.4014421265930663e-06, "loss": 0.17611236572265626, "step": 130440 }, { "epoch": 1.1279193435422088, "grad_norm": 0.30115816211090396, "learning_rate": 2.401242491082754e-06, "loss": 0.00945587158203125, "step": 130445 }, { "epoch": 1.1279625770637522, "grad_norm": 0.7167089268383885, "learning_rate": 2.4010428583338574e-06, "loss": 0.049932861328125, "step": 130450 }, { "epoch": 1.1280058105852955, "grad_norm": 8.659829026999311, "learning_rate": 2.4008432283472992e-06, "loss": 0.034334754943847655, "step": 130455 }, { "epoch": 1.1280490441068387, "grad_norm": 0.5046145441077828, "learning_rate": 2.4006436011239993e-06, "loss": 0.017794036865234376, "step": 130460 }, { "epoch": 1.128092277628382, "grad_norm": 5.449856944842705, "learning_rate": 2.4004439766648783e-06, "loss": 0.03371734619140625, "step": 130465 }, { "epoch": 1.1281355111499252, "grad_norm": 2.807741371304039, "learning_rate": 2.4002443549708557e-06, "loss": 0.03457221984863281, "step": 130470 }, { "epoch": 1.1281787446714684, "grad_norm": 0.6780923005764238, "learning_rate": 2.400044736042854e-06, "loss": 0.029914093017578126, "step": 130475 }, { "epoch": 1.1282219781930118, "grad_norm": 7.265038216692382, "learning_rate": 2.3998451198817915e-06, "loss": 0.1776031494140625, "step": 130480 }, { "epoch": 1.128265211714555, "grad_norm": 3.7830521812761466, "learning_rate": 2.399645506488592e-06, "loss": 0.073095703125, "step": 130485 }, { "epoch": 1.1283084452360983, "grad_norm": 4.1853913856534435, "learning_rate": 2.399445895864174e-06, "loss": 0.02955322265625, "step": 130490 }, { "epoch": 1.1283516787576415, "grad_norm": 1.1215049983800967, "learning_rate": 2.3992462880094584e-06, "loss": 0.0096923828125, "step": 130495 }, { "epoch": 1.1283949122791848, "grad_norm": 4.962525584174519, "learning_rate": 2.3990466829253652e-06, "loss": 0.048828125, "step": 130500 }, { "epoch": 1.128438145800728, "grad_norm": 68.34790741608663, "learning_rate": 2.398847080612817e-06, "loss": 0.09713516235351563, "step": 130505 }, { "epoch": 1.1284813793222712, "grad_norm": 4.596172238021557, "learning_rate": 2.3986474810727317e-06, "loss": 0.010513114929199218, "step": 130510 }, { "epoch": 1.1285246128438147, "grad_norm": 1.1698324684553212, "learning_rate": 2.3984478843060307e-06, "loss": 0.044847869873046876, "step": 130515 }, { "epoch": 1.128567846365358, "grad_norm": 14.267178750495415, "learning_rate": 2.3982482903136358e-06, "loss": 0.04775238037109375, "step": 130520 }, { "epoch": 1.1286110798869011, "grad_norm": 3.0888974613010687, "learning_rate": 2.3980486990964664e-06, "loss": 0.02202911376953125, "step": 130525 }, { "epoch": 1.1286543134084444, "grad_norm": 2.516431759760648, "learning_rate": 2.397849110655443e-06, "loss": 0.03640289306640625, "step": 130530 }, { "epoch": 1.1286975469299876, "grad_norm": 6.276250293444347, "learning_rate": 2.397649524991487e-06, "loss": 0.06855697631835937, "step": 130535 }, { "epoch": 1.1287407804515308, "grad_norm": 0.21324907678518445, "learning_rate": 2.397449942105518e-06, "loss": 0.05607795715332031, "step": 130540 }, { "epoch": 1.1287840139730743, "grad_norm": 1.706487591127589, "learning_rate": 2.3972503619984555e-06, "loss": 0.04973869323730469, "step": 130545 }, { "epoch": 1.1288272474946175, "grad_norm": 40.21627502590464, "learning_rate": 2.397050784671222e-06, "loss": 0.08361968994140626, "step": 130550 }, { "epoch": 1.1288704810161607, "grad_norm": 1.5127972702926604, "learning_rate": 2.3968512101247367e-06, "loss": 0.049530029296875, "step": 130555 }, { "epoch": 1.128913714537704, "grad_norm": 0.17518038214215312, "learning_rate": 2.3966516383599208e-06, "loss": 0.11936874389648437, "step": 130560 }, { "epoch": 1.1289569480592472, "grad_norm": 24.776814457010612, "learning_rate": 2.396452069377694e-06, "loss": 0.31107635498046876, "step": 130565 }, { "epoch": 1.1290001815807904, "grad_norm": 3.298573577429537, "learning_rate": 2.3962525031789772e-06, "loss": 0.14271926879882812, "step": 130570 }, { "epoch": 1.1290434151023336, "grad_norm": 0.04714515809178726, "learning_rate": 2.39605293976469e-06, "loss": 0.064288330078125, "step": 130575 }, { "epoch": 1.129086648623877, "grad_norm": 2.4366023280519538, "learning_rate": 2.395853379135753e-06, "loss": 0.15999908447265626, "step": 130580 }, { "epoch": 1.1291298821454203, "grad_norm": 3.7105689693501103, "learning_rate": 2.395653821293087e-06, "loss": 0.09331893920898438, "step": 130585 }, { "epoch": 1.1291731156669635, "grad_norm": 0.5997908450941423, "learning_rate": 2.3954542662376127e-06, "loss": 0.17507171630859375, "step": 130590 }, { "epoch": 1.1292163491885068, "grad_norm": 1.2480810422573363, "learning_rate": 2.3952547139702497e-06, "loss": 0.012926101684570312, "step": 130595 }, { "epoch": 1.12925958271005, "grad_norm": 5.854697338871371, "learning_rate": 2.395055164491919e-06, "loss": 0.013048362731933594, "step": 130600 }, { "epoch": 1.1293028162315932, "grad_norm": 19.185083118365682, "learning_rate": 2.3948556178035395e-06, "loss": 0.065545654296875, "step": 130605 }, { "epoch": 1.1293460497531367, "grad_norm": 6.090475667649019, "learning_rate": 2.3946560739060325e-06, "loss": 0.08683958053588867, "step": 130610 }, { "epoch": 1.12938928327468, "grad_norm": 9.839412829249843, "learning_rate": 2.3944565328003182e-06, "loss": 0.12168121337890625, "step": 130615 }, { "epoch": 1.1294325167962231, "grad_norm": 0.11959893054036173, "learning_rate": 2.3942569944873177e-06, "loss": 0.04172592163085938, "step": 130620 }, { "epoch": 1.1294757503177664, "grad_norm": 5.238494635812987, "learning_rate": 2.39405745896795e-06, "loss": 0.11026725769042969, "step": 130625 }, { "epoch": 1.1295189838393096, "grad_norm": 0.768690883687102, "learning_rate": 2.393857926243136e-06, "loss": 0.04550991058349609, "step": 130630 }, { "epoch": 1.1295622173608528, "grad_norm": 9.74787686488396, "learning_rate": 2.393658396313795e-06, "loss": 0.06011505126953125, "step": 130635 }, { "epoch": 1.129605450882396, "grad_norm": 11.133656903970442, "learning_rate": 2.393458869180848e-06, "loss": 0.05550689697265625, "step": 130640 }, { "epoch": 1.1296486844039395, "grad_norm": 4.700646205916001, "learning_rate": 2.3932593448452147e-06, "loss": 0.02383003234863281, "step": 130645 }, { "epoch": 1.1296919179254827, "grad_norm": 0.9131948197482903, "learning_rate": 2.3930598233078173e-06, "loss": 0.10628852844238282, "step": 130650 }, { "epoch": 1.129735151447026, "grad_norm": 0.7846072069875861, "learning_rate": 2.3928603045695733e-06, "loss": 0.32866058349609373, "step": 130655 }, { "epoch": 1.1297783849685692, "grad_norm": 0.5443731934182863, "learning_rate": 2.3926607886314042e-06, "loss": 0.06565513610839843, "step": 130660 }, { "epoch": 1.1298216184901124, "grad_norm": 11.328769763238359, "learning_rate": 2.3924612754942295e-06, "loss": 0.030529022216796875, "step": 130665 }, { "epoch": 1.1298648520116559, "grad_norm": 5.393569624735401, "learning_rate": 2.3922617651589704e-06, "loss": 0.08470039367675782, "step": 130670 }, { "epoch": 1.129908085533199, "grad_norm": 9.51137410736586, "learning_rate": 2.392062257626545e-06, "loss": 0.03761749267578125, "step": 130675 }, { "epoch": 1.1299513190547423, "grad_norm": 22.518822507773073, "learning_rate": 2.391862752897876e-06, "loss": 0.05949859619140625, "step": 130680 }, { "epoch": 1.1299945525762856, "grad_norm": 5.877176055919506, "learning_rate": 2.391663250973882e-06, "loss": 0.03220062255859375, "step": 130685 }, { "epoch": 1.1300377860978288, "grad_norm": 0.2084260574431441, "learning_rate": 2.391463751855483e-06, "loss": 0.200634765625, "step": 130690 }, { "epoch": 1.130081019619372, "grad_norm": 5.098561747657495, "learning_rate": 2.3912642555436e-06, "loss": 0.21053385734558105, "step": 130695 }, { "epoch": 1.1301242531409152, "grad_norm": 3.5864101704039477, "learning_rate": 2.391064762039152e-06, "loss": 0.17829971313476561, "step": 130700 }, { "epoch": 1.1301674866624587, "grad_norm": 24.464507114516294, "learning_rate": 2.3908652713430587e-06, "loss": 0.061370849609375, "step": 130705 }, { "epoch": 1.130210720184002, "grad_norm": 5.90651905912604, "learning_rate": 2.390665783456242e-06, "loss": 0.26253204345703124, "step": 130710 }, { "epoch": 1.1302539537055452, "grad_norm": 2.1006864143828454, "learning_rate": 2.390466298379621e-06, "loss": 0.018061065673828126, "step": 130715 }, { "epoch": 1.1302971872270884, "grad_norm": 18.25444338692442, "learning_rate": 2.3902668161141154e-06, "loss": 0.08713760375976562, "step": 130720 }, { "epoch": 1.1303404207486316, "grad_norm": 3.0614345784548753, "learning_rate": 2.390067336660645e-06, "loss": 0.011758804321289062, "step": 130725 }, { "epoch": 1.1303836542701748, "grad_norm": 13.854664514349992, "learning_rate": 2.3898678600201307e-06, "loss": 0.06876029968261718, "step": 130730 }, { "epoch": 1.1304268877917183, "grad_norm": 74.01520502983873, "learning_rate": 2.3896683861934908e-06, "loss": 0.22469329833984375, "step": 130735 }, { "epoch": 1.1304701213132615, "grad_norm": 0.49343220524110687, "learning_rate": 2.3894689151816472e-06, "loss": 0.043209075927734375, "step": 130740 }, { "epoch": 1.1305133548348048, "grad_norm": 1.6460603398812295, "learning_rate": 2.3892694469855195e-06, "loss": 0.05319690704345703, "step": 130745 }, { "epoch": 1.130556588356348, "grad_norm": 2.222216792158799, "learning_rate": 2.3890699816060262e-06, "loss": 0.0122589111328125, "step": 130750 }, { "epoch": 1.1305998218778912, "grad_norm": 2.165067498337972, "learning_rate": 2.388870519044089e-06, "loss": 0.08023948669433593, "step": 130755 }, { "epoch": 1.1306430553994344, "grad_norm": 5.086883364204721, "learning_rate": 2.3886710593006266e-06, "loss": 0.049919509887695314, "step": 130760 }, { "epoch": 1.1306862889209777, "grad_norm": 5.000471866826411, "learning_rate": 2.3884716023765597e-06, "loss": 0.0843130111694336, "step": 130765 }, { "epoch": 1.1307295224425211, "grad_norm": 0.4617486834343443, "learning_rate": 2.3882721482728063e-06, "loss": 0.05847244262695313, "step": 130770 }, { "epoch": 1.1307727559640643, "grad_norm": 1.3378081528479444, "learning_rate": 2.3880726969902888e-06, "loss": 0.042031478881835935, "step": 130775 }, { "epoch": 1.1308159894856076, "grad_norm": 0.3034582885973815, "learning_rate": 2.3878732485299254e-06, "loss": 0.03419647216796875, "step": 130780 }, { "epoch": 1.1308592230071508, "grad_norm": 5.2992038623731155, "learning_rate": 2.3876738028926373e-06, "loss": 0.10980072021484374, "step": 130785 }, { "epoch": 1.130902456528694, "grad_norm": 0.838389224486613, "learning_rate": 2.3874743600793434e-06, "loss": 0.046254730224609374, "step": 130790 }, { "epoch": 1.1309456900502373, "grad_norm": 2.1152789502716107, "learning_rate": 2.387274920090964e-06, "loss": 0.06644439697265625, "step": 130795 }, { "epoch": 1.1309889235717807, "grad_norm": 1.6101081329480045, "learning_rate": 2.3870754829284165e-06, "loss": 0.05599212646484375, "step": 130800 }, { "epoch": 1.131032157093324, "grad_norm": 1.1528027250978345, "learning_rate": 2.3868760485926244e-06, "loss": 0.1907745361328125, "step": 130805 }, { "epoch": 1.1310753906148672, "grad_norm": 0.7913861652434896, "learning_rate": 2.3866766170845052e-06, "loss": 0.08598709106445312, "step": 130810 }, { "epoch": 1.1311186241364104, "grad_norm": 0.08355758834802576, "learning_rate": 2.3864771884049798e-06, "loss": 0.08393096923828125, "step": 130815 }, { "epoch": 1.1311618576579536, "grad_norm": 0.10306931140776397, "learning_rate": 2.3862777625549674e-06, "loss": 0.010329437255859376, "step": 130820 }, { "epoch": 1.1312050911794969, "grad_norm": 1.7181101535368015, "learning_rate": 2.3860783395353873e-06, "loss": 0.0368896484375, "step": 130825 }, { "epoch": 1.13124832470104, "grad_norm": 0.33907782435431366, "learning_rate": 2.385878919347159e-06, "loss": 0.1844097137451172, "step": 130830 }, { "epoch": 1.1312915582225835, "grad_norm": 7.687126930776388, "learning_rate": 2.3856795019912043e-06, "loss": 0.10375823974609374, "step": 130835 }, { "epoch": 1.1313347917441268, "grad_norm": 0.04270085971363794, "learning_rate": 2.3854800874684403e-06, "loss": 0.027728271484375, "step": 130840 }, { "epoch": 1.13137802526567, "grad_norm": 0.5652381058060988, "learning_rate": 2.3852806757797884e-06, "loss": 0.017796707153320313, "step": 130845 }, { "epoch": 1.1314212587872132, "grad_norm": 0.2501107766166256, "learning_rate": 2.385081266926168e-06, "loss": 0.016051483154296876, "step": 130850 }, { "epoch": 1.1314644923087565, "grad_norm": 0.0750808686557652, "learning_rate": 2.3848818609084984e-06, "loss": 0.028424072265625, "step": 130855 }, { "epoch": 1.1315077258303, "grad_norm": 0.7036415047973932, "learning_rate": 2.3846824577276987e-06, "loss": 0.02358722686767578, "step": 130860 }, { "epoch": 1.1315509593518431, "grad_norm": 2.65990237335832, "learning_rate": 2.3844830573846894e-06, "loss": 0.015920066833496095, "step": 130865 }, { "epoch": 1.1315941928733864, "grad_norm": 0.33754616574821633, "learning_rate": 2.3842836598803894e-06, "loss": 0.017026519775390624, "step": 130870 }, { "epoch": 1.1316374263949296, "grad_norm": 0.10043498435788283, "learning_rate": 2.3840842652157196e-06, "loss": 0.03178348541259766, "step": 130875 }, { "epoch": 1.1316806599164728, "grad_norm": 6.588075177592265, "learning_rate": 2.3838848733915984e-06, "loss": 0.07910614013671875, "step": 130880 }, { "epoch": 1.131723893438016, "grad_norm": 36.98311394806454, "learning_rate": 2.3836854844089462e-06, "loss": 0.14513282775878905, "step": 130885 }, { "epoch": 1.1317671269595593, "grad_norm": 1.0094973637069402, "learning_rate": 2.3834860982686813e-06, "loss": 0.09891510009765625, "step": 130890 }, { "epoch": 1.1318103604811025, "grad_norm": 23.958262318180378, "learning_rate": 2.383286714971724e-06, "loss": 0.17240371704101562, "step": 130895 }, { "epoch": 1.131853594002646, "grad_norm": 6.255912141666483, "learning_rate": 2.3830873345189937e-06, "loss": 0.102691650390625, "step": 130900 }, { "epoch": 1.1318968275241892, "grad_norm": 12.829885994612232, "learning_rate": 2.382887956911411e-06, "loss": 0.04127655029296875, "step": 130905 }, { "epoch": 1.1319400610457324, "grad_norm": 0.5076964615602746, "learning_rate": 2.3826885821498943e-06, "loss": 0.01739921569824219, "step": 130910 }, { "epoch": 1.1319832945672756, "grad_norm": 3.6125295967112114, "learning_rate": 2.382489210235364e-06, "loss": 0.04216842651367188, "step": 130915 }, { "epoch": 1.1320265280888189, "grad_norm": 12.30054672777113, "learning_rate": 2.3822898411687374e-06, "loss": 0.11297111511230469, "step": 130920 }, { "epoch": 1.1320697616103623, "grad_norm": 0.23579132277147644, "learning_rate": 2.3820904749509364e-06, "loss": 0.173333740234375, "step": 130925 }, { "epoch": 1.1321129951319056, "grad_norm": 2.401260350570301, "learning_rate": 2.381891111582878e-06, "loss": 0.0415313720703125, "step": 130930 }, { "epoch": 1.1321562286534488, "grad_norm": 0.392394214762265, "learning_rate": 2.381691751065485e-06, "loss": 0.005471038818359375, "step": 130935 }, { "epoch": 1.132199462174992, "grad_norm": 5.81102908826477, "learning_rate": 2.3814923933996744e-06, "loss": 0.10786895751953125, "step": 130940 }, { "epoch": 1.1322426956965352, "grad_norm": 9.299856940109771, "learning_rate": 2.381293038586366e-06, "loss": 0.02996978759765625, "step": 130945 }, { "epoch": 1.1322859292180785, "grad_norm": 6.414184337074598, "learning_rate": 2.38109368662648e-06, "loss": 0.23330459594726563, "step": 130950 }, { "epoch": 1.1323291627396217, "grad_norm": 0.8601515933718639, "learning_rate": 2.3808943375209355e-06, "loss": 0.10400543212890626, "step": 130955 }, { "epoch": 1.1323723962611651, "grad_norm": 1.9710786346172506, "learning_rate": 2.3806949912706495e-06, "loss": 0.07421417236328125, "step": 130960 }, { "epoch": 1.1324156297827084, "grad_norm": 2.1477744150933207, "learning_rate": 2.3804956478765456e-06, "loss": 0.09634475708007813, "step": 130965 }, { "epoch": 1.1324588633042516, "grad_norm": 0.28797426786141783, "learning_rate": 2.380296307339541e-06, "loss": 0.15162811279296876, "step": 130970 }, { "epoch": 1.1325020968257948, "grad_norm": 6.247897929547166, "learning_rate": 2.380096969660554e-06, "loss": 0.1563507080078125, "step": 130975 }, { "epoch": 1.132545330347338, "grad_norm": 3.394130657293977, "learning_rate": 2.3798976348405057e-06, "loss": 0.04962005615234375, "step": 130980 }, { "epoch": 1.1325885638688813, "grad_norm": 1.2350548068227105, "learning_rate": 2.379698302880315e-06, "loss": 0.09965133666992188, "step": 130985 }, { "epoch": 1.1326317973904247, "grad_norm": 0.47219471860268886, "learning_rate": 2.3794989737808994e-06, "loss": 0.5746139526367188, "step": 130990 }, { "epoch": 1.132675030911968, "grad_norm": 4.16366432956136, "learning_rate": 2.379299647543181e-06, "loss": 0.11861896514892578, "step": 130995 }, { "epoch": 1.1327182644335112, "grad_norm": 3.269762994757199, "learning_rate": 2.3791003241680777e-06, "loss": 0.0750091552734375, "step": 131000 }, { "epoch": 1.1327614979550544, "grad_norm": 0.15143873333657693, "learning_rate": 2.3789010036565083e-06, "loss": 0.09971046447753906, "step": 131005 }, { "epoch": 1.1328047314765977, "grad_norm": 1.3688816678969453, "learning_rate": 2.3787016860093933e-06, "loss": 0.1671703815460205, "step": 131010 }, { "epoch": 1.1328479649981409, "grad_norm": 4.759134550454212, "learning_rate": 2.3785023712276515e-06, "loss": 0.17004776000976562, "step": 131015 }, { "epoch": 1.1328911985196841, "grad_norm": 9.777096952650904, "learning_rate": 2.3783030593122015e-06, "loss": 0.058612060546875, "step": 131020 }, { "epoch": 1.1329344320412276, "grad_norm": 4.42849844650717, "learning_rate": 2.3781037502639617e-06, "loss": 0.06453914642333984, "step": 131025 }, { "epoch": 1.1329776655627708, "grad_norm": 1.2903761251530566, "learning_rate": 2.3779044440838535e-06, "loss": 0.038385009765625, "step": 131030 }, { "epoch": 1.133020899084314, "grad_norm": 15.295721912903277, "learning_rate": 2.3777051407727945e-06, "loss": 0.0842437744140625, "step": 131035 }, { "epoch": 1.1330641326058573, "grad_norm": 23.216413416265276, "learning_rate": 2.377505840331705e-06, "loss": 0.06247100830078125, "step": 131040 }, { "epoch": 1.1331073661274005, "grad_norm": 2.4119796730533745, "learning_rate": 2.377306542761504e-06, "loss": 0.10854339599609375, "step": 131045 }, { "epoch": 1.1331505996489437, "grad_norm": 6.897443557974375, "learning_rate": 2.3771072480631096e-06, "loss": 0.1571868896484375, "step": 131050 }, { "epoch": 1.1331938331704872, "grad_norm": 1.0698333133595554, "learning_rate": 2.376907956237441e-06, "loss": 0.017888641357421874, "step": 131055 }, { "epoch": 1.1332370666920304, "grad_norm": 48.09815475029834, "learning_rate": 2.376708667285418e-06, "loss": 0.11748199462890625, "step": 131060 }, { "epoch": 1.1332803002135736, "grad_norm": 0.4008759197025411, "learning_rate": 2.3765093812079594e-06, "loss": 0.08431396484375, "step": 131065 }, { "epoch": 1.1333235337351169, "grad_norm": 3.5074040378149074, "learning_rate": 2.3763100980059856e-06, "loss": 0.042078399658203126, "step": 131070 }, { "epoch": 1.13336676725666, "grad_norm": 12.555245942905595, "learning_rate": 2.376110817680414e-06, "loss": 0.05492715835571289, "step": 131075 }, { "epoch": 1.1334100007782033, "grad_norm": 28.388983740790923, "learning_rate": 2.3759115402321645e-06, "loss": 0.12063751220703126, "step": 131080 }, { "epoch": 1.1334532342997465, "grad_norm": 44.47593855772626, "learning_rate": 2.3757122656621543e-06, "loss": 0.17005386352539062, "step": 131085 }, { "epoch": 1.13349646782129, "grad_norm": 3.5558175257818947, "learning_rate": 2.3755129939713054e-06, "loss": 0.02641448974609375, "step": 131090 }, { "epoch": 1.1335397013428332, "grad_norm": 20.253562821762788, "learning_rate": 2.3753137251605344e-06, "loss": 0.10867767333984375, "step": 131095 }, { "epoch": 1.1335829348643764, "grad_norm": 34.614774538470456, "learning_rate": 2.3751144592307622e-06, "loss": 0.07886314392089844, "step": 131100 }, { "epoch": 1.1336261683859197, "grad_norm": 20.723802423649936, "learning_rate": 2.374915196182907e-06, "loss": 0.08101959228515625, "step": 131105 }, { "epoch": 1.133669401907463, "grad_norm": 6.62136150782422, "learning_rate": 2.3747159360178877e-06, "loss": 0.18260345458984376, "step": 131110 }, { "epoch": 1.1337126354290064, "grad_norm": 4.26929008940407, "learning_rate": 2.3745166787366227e-06, "loss": 0.04099578857421875, "step": 131115 }, { "epoch": 1.1337558689505496, "grad_norm": 6.1300124765810295, "learning_rate": 2.3743174243400303e-06, "loss": 0.042376708984375, "step": 131120 }, { "epoch": 1.1337991024720928, "grad_norm": 0.4584457458663842, "learning_rate": 2.3741181728290324e-06, "loss": 0.046835136413574216, "step": 131125 }, { "epoch": 1.133842335993636, "grad_norm": 2.970569509584825, "learning_rate": 2.373918924204546e-06, "loss": 0.06495018005371093, "step": 131130 }, { "epoch": 1.1338855695151793, "grad_norm": 1.0293287057941745, "learning_rate": 2.3737196784674905e-06, "loss": 0.10255279541015624, "step": 131135 }, { "epoch": 1.1339288030367225, "grad_norm": 0.1744907868569138, "learning_rate": 2.373520435618784e-06, "loss": 0.005064964294433594, "step": 131140 }, { "epoch": 1.1339720365582657, "grad_norm": 0.8355107674243837, "learning_rate": 2.373321195659346e-06, "loss": 0.07483673095703125, "step": 131145 }, { "epoch": 1.134015270079809, "grad_norm": 5.458460366058709, "learning_rate": 2.373121958590094e-06, "loss": 0.131304931640625, "step": 131150 }, { "epoch": 1.1340585036013524, "grad_norm": 2.786650477809914, "learning_rate": 2.3729227244119494e-06, "loss": 0.20649185180664062, "step": 131155 }, { "epoch": 1.1341017371228956, "grad_norm": 86.89610568162468, "learning_rate": 2.37272349312583e-06, "loss": 0.3582599639892578, "step": 131160 }, { "epoch": 1.1341449706444389, "grad_norm": 2.8520376435613097, "learning_rate": 2.3725242647326544e-06, "loss": 0.024849700927734374, "step": 131165 }, { "epoch": 1.134188204165982, "grad_norm": 5.834206755403312, "learning_rate": 2.372325039233341e-06, "loss": 0.1740743637084961, "step": 131170 }, { "epoch": 1.1342314376875253, "grad_norm": 0.9705390569299069, "learning_rate": 2.3721258166288095e-06, "loss": 0.03651351928710937, "step": 131175 }, { "epoch": 1.1342746712090688, "grad_norm": 1.7480778881712746, "learning_rate": 2.371926596919977e-06, "loss": 0.06610870361328125, "step": 131180 }, { "epoch": 1.134317904730612, "grad_norm": 0.9692228469788184, "learning_rate": 2.3717273801077645e-06, "loss": 0.06375160217285156, "step": 131185 }, { "epoch": 1.1343611382521552, "grad_norm": 3.2488612488704103, "learning_rate": 2.37152816619309e-06, "loss": 0.04710884094238281, "step": 131190 }, { "epoch": 1.1344043717736985, "grad_norm": 0.33686729040619245, "learning_rate": 2.3713289551768725e-06, "loss": 0.04938011169433594, "step": 131195 }, { "epoch": 1.1344476052952417, "grad_norm": 5.825266243746381, "learning_rate": 2.3711297470600293e-06, "loss": 0.035364532470703126, "step": 131200 }, { "epoch": 1.134490838816785, "grad_norm": 0.9834080773242712, "learning_rate": 2.370930541843481e-06, "loss": 0.18348388671875, "step": 131205 }, { "epoch": 1.1345340723383281, "grad_norm": 15.503396098811423, "learning_rate": 2.370731339528145e-06, "loss": 0.05804786682128906, "step": 131210 }, { "epoch": 1.1345773058598716, "grad_norm": 9.697037141771077, "learning_rate": 2.3705321401149395e-06, "loss": 0.108892822265625, "step": 131215 }, { "epoch": 1.1346205393814148, "grad_norm": 1.3890120726639275, "learning_rate": 2.3703329436047855e-06, "loss": 0.011087989807128907, "step": 131220 }, { "epoch": 1.134663772902958, "grad_norm": 1.154916357906128, "learning_rate": 2.3701337499986e-06, "loss": 0.009391212463378906, "step": 131225 }, { "epoch": 1.1347070064245013, "grad_norm": 2.470681557131281, "learning_rate": 2.369934559297302e-06, "loss": 0.06676597595214843, "step": 131230 }, { "epoch": 1.1347502399460445, "grad_norm": 37.43617848723914, "learning_rate": 2.36973537150181e-06, "loss": 0.07153587341308594, "step": 131235 }, { "epoch": 1.1347934734675877, "grad_norm": 13.812216139852412, "learning_rate": 2.369536186613043e-06, "loss": 0.08155946731567383, "step": 131240 }, { "epoch": 1.1348367069891312, "grad_norm": 7.138103417192575, "learning_rate": 2.3693370046319182e-06, "loss": 0.11747589111328124, "step": 131245 }, { "epoch": 1.1348799405106744, "grad_norm": 3.375415713979491, "learning_rate": 2.369137825559357e-06, "loss": 0.01619415283203125, "step": 131250 }, { "epoch": 1.1349231740322177, "grad_norm": 3.7036340078137884, "learning_rate": 2.368938649396276e-06, "loss": 0.13564071655273438, "step": 131255 }, { "epoch": 1.1349664075537609, "grad_norm": 31.47255301410065, "learning_rate": 2.3687394761435936e-06, "loss": 0.09851951599121093, "step": 131260 }, { "epoch": 1.135009641075304, "grad_norm": 3.266847456197476, "learning_rate": 2.36854030580223e-06, "loss": 0.027984619140625, "step": 131265 }, { "epoch": 1.1350528745968473, "grad_norm": 45.0967150103001, "learning_rate": 2.368341138373102e-06, "loss": 0.1514739990234375, "step": 131270 }, { "epoch": 1.1350961081183906, "grad_norm": 0.45808197458525873, "learning_rate": 2.3681419738571295e-06, "loss": 0.0529052734375, "step": 131275 }, { "epoch": 1.135139341639934, "grad_norm": 6.956351984766862, "learning_rate": 2.3679428122552285e-06, "loss": 0.0158416748046875, "step": 131280 }, { "epoch": 1.1351825751614772, "grad_norm": 0.16635220899819286, "learning_rate": 2.367743653568321e-06, "loss": 0.013567352294921875, "step": 131285 }, { "epoch": 1.1352258086830205, "grad_norm": 5.414640423023082, "learning_rate": 2.3675444977973237e-06, "loss": 0.029825210571289062, "step": 131290 }, { "epoch": 1.1352690422045637, "grad_norm": 1.9440353389151175, "learning_rate": 2.3673453449431554e-06, "loss": 0.1902679443359375, "step": 131295 }, { "epoch": 1.135312275726107, "grad_norm": 44.26614474084407, "learning_rate": 2.3671461950067345e-06, "loss": 0.3874786376953125, "step": 131300 }, { "epoch": 1.1353555092476502, "grad_norm": 2.5624703916659404, "learning_rate": 2.3669470479889794e-06, "loss": 0.043536376953125, "step": 131305 }, { "epoch": 1.1353987427691936, "grad_norm": 0.3696114019802846, "learning_rate": 2.3667479038908076e-06, "loss": 0.016654205322265626, "step": 131310 }, { "epoch": 1.1354419762907368, "grad_norm": 1.0311376967920072, "learning_rate": 2.3665487627131397e-06, "loss": 0.04490509033203125, "step": 131315 }, { "epoch": 1.13548520981228, "grad_norm": 15.511742015667267, "learning_rate": 2.366349624456892e-06, "loss": 0.26392478942871095, "step": 131320 }, { "epoch": 1.1355284433338233, "grad_norm": 2.2957375614648043, "learning_rate": 2.3661504891229847e-06, "loss": 0.1367584228515625, "step": 131325 }, { "epoch": 1.1355716768553665, "grad_norm": 7.113626233982482, "learning_rate": 2.3659513567123353e-06, "loss": 0.030694580078125, "step": 131330 }, { "epoch": 1.1356149103769098, "grad_norm": 1.4458495519537395, "learning_rate": 2.3657522272258625e-06, "loss": 0.010990524291992187, "step": 131335 }, { "epoch": 1.135658143898453, "grad_norm": 0.3389245187051944, "learning_rate": 2.3655531006644825e-06, "loss": 0.03451814651489258, "step": 131340 }, { "epoch": 1.1357013774199964, "grad_norm": 3.753271035125928, "learning_rate": 2.365353977029117e-06, "loss": 0.009270858764648438, "step": 131345 }, { "epoch": 1.1357446109415397, "grad_norm": 2.8958087359617988, "learning_rate": 2.365154856320683e-06, "loss": 0.0334716796875, "step": 131350 }, { "epoch": 1.135787844463083, "grad_norm": 33.69095229499008, "learning_rate": 2.3649557385400982e-06, "loss": 0.14059829711914062, "step": 131355 }, { "epoch": 1.1358310779846261, "grad_norm": 2.8351196452900247, "learning_rate": 2.364756623688282e-06, "loss": 0.03652496337890625, "step": 131360 }, { "epoch": 1.1358743115061694, "grad_norm": 20.692794242217598, "learning_rate": 2.3645575117661522e-06, "loss": 0.06052093505859375, "step": 131365 }, { "epoch": 1.1359175450277128, "grad_norm": 1.5257203523497838, "learning_rate": 2.3643584027746263e-06, "loss": 0.015696334838867187, "step": 131370 }, { "epoch": 1.135960778549256, "grad_norm": 3.8219612128526204, "learning_rate": 2.3641592967146225e-06, "loss": 0.07277069091796876, "step": 131375 }, { "epoch": 1.1360040120707993, "grad_norm": 20.032602588190997, "learning_rate": 2.3639601935870612e-06, "loss": 0.06627120971679687, "step": 131380 }, { "epoch": 1.1360472455923425, "grad_norm": 0.6338898763304054, "learning_rate": 2.3637610933928593e-06, "loss": 0.031869125366210935, "step": 131385 }, { "epoch": 1.1360904791138857, "grad_norm": 12.710885845970774, "learning_rate": 2.363561996132935e-06, "loss": 0.0456817626953125, "step": 131390 }, { "epoch": 1.136133712635429, "grad_norm": 0.8999944163184158, "learning_rate": 2.363362901808206e-06, "loss": 0.16266565322875975, "step": 131395 }, { "epoch": 1.1361769461569722, "grad_norm": 25.291206053461032, "learning_rate": 2.3631638104195916e-06, "loss": 0.16183013916015626, "step": 131400 }, { "epoch": 1.1362201796785156, "grad_norm": 5.114187428440143, "learning_rate": 2.3629647219680078e-06, "loss": 0.04029006958007812, "step": 131405 }, { "epoch": 1.1362634132000589, "grad_norm": 0.22366620010615806, "learning_rate": 2.3627656364543763e-06, "loss": 0.027614784240722657, "step": 131410 }, { "epoch": 1.136306646721602, "grad_norm": 1.2393105972201006, "learning_rate": 2.362566553879613e-06, "loss": 0.054498291015625, "step": 131415 }, { "epoch": 1.1363498802431453, "grad_norm": 1.6521635711897338, "learning_rate": 2.3623674742446367e-06, "loss": 0.13906784057617189, "step": 131420 }, { "epoch": 1.1363931137646885, "grad_norm": 0.483675727561644, "learning_rate": 2.3621683975503644e-06, "loss": 0.05405712127685547, "step": 131425 }, { "epoch": 1.1364363472862318, "grad_norm": 0.2040199687083298, "learning_rate": 2.361969323797716e-06, "loss": 0.06369781494140625, "step": 131430 }, { "epoch": 1.1364795808077752, "grad_norm": 2.938781464581228, "learning_rate": 2.361770252987607e-06, "loss": 0.05979080200195312, "step": 131435 }, { "epoch": 1.1365228143293185, "grad_norm": 1.1349812082521746, "learning_rate": 2.361571185120959e-06, "loss": 0.20696029663085938, "step": 131440 }, { "epoch": 1.1365660478508617, "grad_norm": 0.35127658460306144, "learning_rate": 2.3613721201986877e-06, "loss": 0.046967697143554685, "step": 131445 }, { "epoch": 1.136609281372405, "grad_norm": 1.8292242277499875, "learning_rate": 2.361173058221712e-06, "loss": 0.215667724609375, "step": 131450 }, { "epoch": 1.1366525148939481, "grad_norm": 0.21354794728599724, "learning_rate": 2.3609739991909493e-06, "loss": 0.3252555847167969, "step": 131455 }, { "epoch": 1.1366957484154914, "grad_norm": 0.45303849939261487, "learning_rate": 2.3607749431073184e-06, "loss": 0.07287406921386719, "step": 131460 }, { "epoch": 1.1367389819370346, "grad_norm": 0.27429478991859163, "learning_rate": 2.3605758899717374e-06, "loss": 0.04975814819335937, "step": 131465 }, { "epoch": 1.136782215458578, "grad_norm": 0.28368340122837177, "learning_rate": 2.360376839785122e-06, "loss": 0.152703857421875, "step": 131470 }, { "epoch": 1.1368254489801213, "grad_norm": 1.7661290299963104, "learning_rate": 2.3601777925483935e-06, "loss": 0.020369338989257812, "step": 131475 }, { "epoch": 1.1368686825016645, "grad_norm": 2.105245468803048, "learning_rate": 2.3599787482624687e-06, "loss": 0.03416366577148437, "step": 131480 }, { "epoch": 1.1369119160232077, "grad_norm": 14.465258182364646, "learning_rate": 2.359779706928265e-06, "loss": 0.02902374267578125, "step": 131485 }, { "epoch": 1.136955149544751, "grad_norm": 3.383301478873296, "learning_rate": 2.359580668546701e-06, "loss": 0.023180198669433594, "step": 131490 }, { "epoch": 1.1369983830662942, "grad_norm": 1.8446176922198074, "learning_rate": 2.3593816331186943e-06, "loss": 0.01881561279296875, "step": 131495 }, { "epoch": 1.1370416165878376, "grad_norm": 0.16071643827794582, "learning_rate": 2.359182600645162e-06, "loss": 0.02403411865234375, "step": 131500 }, { "epoch": 1.1370848501093809, "grad_norm": 24.88862303881518, "learning_rate": 2.3589835711270236e-06, "loss": 0.028035736083984374, "step": 131505 }, { "epoch": 1.137128083630924, "grad_norm": 0.163932818468759, "learning_rate": 2.3587845445651967e-06, "loss": 0.014350318908691406, "step": 131510 }, { "epoch": 1.1371713171524673, "grad_norm": 36.35484884862121, "learning_rate": 2.3585855209605978e-06, "loss": 0.09383163452148438, "step": 131515 }, { "epoch": 1.1372145506740106, "grad_norm": 3.006950976924512, "learning_rate": 2.358386500314147e-06, "loss": 0.08776931762695313, "step": 131520 }, { "epoch": 1.1372577841955538, "grad_norm": 12.25290641461166, "learning_rate": 2.358187482626761e-06, "loss": 0.12194061279296875, "step": 131525 }, { "epoch": 1.137301017717097, "grad_norm": 1.987941265493651, "learning_rate": 2.357988467899357e-06, "loss": 0.01693572998046875, "step": 131530 }, { "epoch": 1.1373442512386405, "grad_norm": 1.9045981656311253, "learning_rate": 2.357789456132853e-06, "loss": 0.024407958984375, "step": 131535 }, { "epoch": 1.1373874847601837, "grad_norm": 20.44802840063199, "learning_rate": 2.357590447328168e-06, "loss": 0.04453496932983399, "step": 131540 }, { "epoch": 1.137430718281727, "grad_norm": 13.470860788826995, "learning_rate": 2.3573914414862184e-06, "loss": 0.1600208282470703, "step": 131545 }, { "epoch": 1.1374739518032702, "grad_norm": 14.515093064306653, "learning_rate": 2.3571924386079235e-06, "loss": 0.154254150390625, "step": 131550 }, { "epoch": 1.1375171853248134, "grad_norm": 4.50003432871451, "learning_rate": 2.3569934386942004e-06, "loss": 0.1608255386352539, "step": 131555 }, { "epoch": 1.1375604188463568, "grad_norm": 10.362351998843614, "learning_rate": 2.3567944417459668e-06, "loss": 0.033487701416015626, "step": 131560 }, { "epoch": 1.1376036523679, "grad_norm": 2.493325946975297, "learning_rate": 2.3565954477641386e-06, "loss": 0.03215484619140625, "step": 131565 }, { "epoch": 1.1376468858894433, "grad_norm": 0.4782541988096018, "learning_rate": 2.3563964567496363e-06, "loss": 0.04547119140625, "step": 131570 }, { "epoch": 1.1376901194109865, "grad_norm": 14.036552006316132, "learning_rate": 2.3561974687033776e-06, "loss": 0.07744483947753907, "step": 131575 }, { "epoch": 1.1377333529325298, "grad_norm": 35.26913830491459, "learning_rate": 2.355998483626279e-06, "loss": 0.1161529541015625, "step": 131580 }, { "epoch": 1.137776586454073, "grad_norm": 8.110807737271779, "learning_rate": 2.3557995015192585e-06, "loss": 0.0592681884765625, "step": 131585 }, { "epoch": 1.1378198199756162, "grad_norm": 4.506325904294172, "learning_rate": 2.355600522383233e-06, "loss": 0.04264144897460938, "step": 131590 }, { "epoch": 1.1378630534971594, "grad_norm": 50.8675979328958, "learning_rate": 2.355401546219121e-06, "loss": 0.1194091796875, "step": 131595 }, { "epoch": 1.1379062870187029, "grad_norm": 0.1611452051004613, "learning_rate": 2.35520257302784e-06, "loss": 0.039841747283935545, "step": 131600 }, { "epoch": 1.1379495205402461, "grad_norm": 3.9499397938489547, "learning_rate": 2.3550036028103086e-06, "loss": 0.036881637573242185, "step": 131605 }, { "epoch": 1.1379927540617893, "grad_norm": 4.804103848296309, "learning_rate": 2.3548046355674438e-06, "loss": 0.016363525390625, "step": 131610 }, { "epoch": 1.1380359875833326, "grad_norm": 2.2285539401587013, "learning_rate": 2.354605671300163e-06, "loss": 0.009479904174804687, "step": 131615 }, { "epoch": 1.1380792211048758, "grad_norm": 1.0775553452652662, "learning_rate": 2.354406710009383e-06, "loss": 0.027819061279296876, "step": 131620 }, { "epoch": 1.1381224546264193, "grad_norm": 0.48953388742055426, "learning_rate": 2.3542077516960223e-06, "loss": 0.034503936767578125, "step": 131625 }, { "epoch": 1.1381656881479625, "grad_norm": 11.335586618907888, "learning_rate": 2.3540087963609974e-06, "loss": 0.09138069152832032, "step": 131630 }, { "epoch": 1.1382089216695057, "grad_norm": 1.9378061511314995, "learning_rate": 2.3538098440052287e-06, "loss": 0.1017242431640625, "step": 131635 }, { "epoch": 1.138252155191049, "grad_norm": 0.09650233202875826, "learning_rate": 2.353610894629632e-06, "loss": 0.0374267578125, "step": 131640 }, { "epoch": 1.1382953887125922, "grad_norm": 6.0341707616252425, "learning_rate": 2.353411948235124e-06, "loss": 0.03749275207519531, "step": 131645 }, { "epoch": 1.1383386222341354, "grad_norm": 14.218313016337062, "learning_rate": 2.3532130048226227e-06, "loss": 0.044797515869140624, "step": 131650 }, { "epoch": 1.1383818557556786, "grad_norm": 1.1937333728906954, "learning_rate": 2.353014064393046e-06, "loss": 0.10294189453125, "step": 131655 }, { "epoch": 1.138425089277222, "grad_norm": 11.053129042357622, "learning_rate": 2.3528151269473104e-06, "loss": 0.055517578125, "step": 131660 }, { "epoch": 1.1384683227987653, "grad_norm": 0.9997159749263613, "learning_rate": 2.352616192486336e-06, "loss": 0.29033851623535156, "step": 131665 }, { "epoch": 1.1385115563203085, "grad_norm": 0.4728490158838555, "learning_rate": 2.352417261011038e-06, "loss": 0.032487106323242185, "step": 131670 }, { "epoch": 1.1385547898418518, "grad_norm": 3.727236766109988, "learning_rate": 2.3522183325223344e-06, "loss": 0.1785125732421875, "step": 131675 }, { "epoch": 1.138598023363395, "grad_norm": 0.4403352371390667, "learning_rate": 2.3520194070211414e-06, "loss": 0.08198814392089844, "step": 131680 }, { "epoch": 1.1386412568849382, "grad_norm": 0.6378902156079992, "learning_rate": 2.3518204845083793e-06, "loss": 0.2404632568359375, "step": 131685 }, { "epoch": 1.1386844904064817, "grad_norm": 5.367881421593671, "learning_rate": 2.351621564984962e-06, "loss": 0.09812774658203124, "step": 131690 }, { "epoch": 1.138727723928025, "grad_norm": 73.36849401403107, "learning_rate": 2.3514226484518104e-06, "loss": 0.14811172485351562, "step": 131695 }, { "epoch": 1.1387709574495681, "grad_norm": 3.246369655833735, "learning_rate": 2.35122373490984e-06, "loss": 0.033782958984375, "step": 131700 }, { "epoch": 1.1388141909711114, "grad_norm": 0.703505458823001, "learning_rate": 2.351024824359968e-06, "loss": 0.02684917449951172, "step": 131705 }, { "epoch": 1.1388574244926546, "grad_norm": 4.929900022661867, "learning_rate": 2.350825916803112e-06, "loss": 0.05159816741943359, "step": 131710 }, { "epoch": 1.1389006580141978, "grad_norm": 56.38909779897156, "learning_rate": 2.3506270122401903e-06, "loss": 0.47881927490234377, "step": 131715 }, { "epoch": 1.138943891535741, "grad_norm": 0.7027758299702147, "learning_rate": 2.3504281106721195e-06, "loss": 0.17429046630859374, "step": 131720 }, { "epoch": 1.1389871250572845, "grad_norm": 2.3209600468233567, "learning_rate": 2.3502292120998153e-06, "loss": 0.06875267028808593, "step": 131725 }, { "epoch": 1.1390303585788277, "grad_norm": 0.9915645647235818, "learning_rate": 2.3500303165241976e-06, "loss": 0.2460481643676758, "step": 131730 }, { "epoch": 1.139073592100371, "grad_norm": 0.5411032074569967, "learning_rate": 2.349831423946183e-06, "loss": 0.025603103637695312, "step": 131735 }, { "epoch": 1.1391168256219142, "grad_norm": 3.269065482193243, "learning_rate": 2.3496325343666877e-06, "loss": 0.054369354248046876, "step": 131740 }, { "epoch": 1.1391600591434574, "grad_norm": 12.131121965085194, "learning_rate": 2.3494336477866303e-06, "loss": 0.19011077880859376, "step": 131745 }, { "epoch": 1.1392032926650006, "grad_norm": 2.964483984167602, "learning_rate": 2.3492347642069274e-06, "loss": 0.06822509765625, "step": 131750 }, { "epoch": 1.139246526186544, "grad_norm": 12.252672906997958, "learning_rate": 2.349035883628495e-06, "loss": 0.08903961181640625, "step": 131755 }, { "epoch": 1.1392897597080873, "grad_norm": 4.887285558552014, "learning_rate": 2.348837006052253e-06, "loss": 0.0291900634765625, "step": 131760 }, { "epoch": 1.1393329932296306, "grad_norm": 0.20009043155079742, "learning_rate": 2.348638131479117e-06, "loss": 0.06040802001953125, "step": 131765 }, { "epoch": 1.1393762267511738, "grad_norm": 1.5257100984813539, "learning_rate": 2.348439259910004e-06, "loss": 0.015200042724609375, "step": 131770 }, { "epoch": 1.139419460272717, "grad_norm": 2.6658104288063442, "learning_rate": 2.348240391345832e-06, "loss": 0.05874176025390625, "step": 131775 }, { "epoch": 1.1394626937942602, "grad_norm": 10.280206390234728, "learning_rate": 2.3480415257875174e-06, "loss": 0.05516357421875, "step": 131780 }, { "epoch": 1.1395059273158035, "grad_norm": 2.627232168641464, "learning_rate": 2.347842663235977e-06, "loss": 0.07841148376464843, "step": 131785 }, { "epoch": 1.139549160837347, "grad_norm": 0.3059027992053217, "learning_rate": 2.3476438036921296e-06, "loss": 0.0999267578125, "step": 131790 }, { "epoch": 1.1395923943588901, "grad_norm": 5.011785131893396, "learning_rate": 2.3474449471568907e-06, "loss": 0.017681121826171875, "step": 131795 }, { "epoch": 1.1396356278804334, "grad_norm": 7.300948349604102, "learning_rate": 2.3472460936311784e-06, "loss": 0.06030426025390625, "step": 131800 }, { "epoch": 1.1396788614019766, "grad_norm": 0.6427425890971571, "learning_rate": 2.34704724311591e-06, "loss": 0.007668685913085937, "step": 131805 }, { "epoch": 1.1397220949235198, "grad_norm": 1.3690114924902168, "learning_rate": 2.346848395612002e-06, "loss": 0.31987724304199217, "step": 131810 }, { "epoch": 1.1397653284450633, "grad_norm": 3.0617241009179668, "learning_rate": 2.34664955112037e-06, "loss": 0.02482757568359375, "step": 131815 }, { "epoch": 1.1398085619666065, "grad_norm": 9.079277425873348, "learning_rate": 2.346450709641934e-06, "loss": 0.0699127197265625, "step": 131820 }, { "epoch": 1.1398517954881497, "grad_norm": 5.8123868706308, "learning_rate": 2.3462518711776083e-06, "loss": 0.045548439025878906, "step": 131825 }, { "epoch": 1.139895029009693, "grad_norm": 3.403297024986454, "learning_rate": 2.3460530357283122e-06, "loss": 0.0227813720703125, "step": 131830 }, { "epoch": 1.1399382625312362, "grad_norm": 1.8317001252005658, "learning_rate": 2.345854203294962e-06, "loss": 0.028020477294921874, "step": 131835 }, { "epoch": 1.1399814960527794, "grad_norm": 66.76964027284153, "learning_rate": 2.345655373878474e-06, "loss": 0.30855712890625, "step": 131840 }, { "epoch": 1.1400247295743227, "grad_norm": 5.768893197027222, "learning_rate": 2.3454565474797652e-06, "loss": 0.0597137451171875, "step": 131845 }, { "epoch": 1.1400679630958659, "grad_norm": 1.352524725931117, "learning_rate": 2.345257724099753e-06, "loss": 0.0456634521484375, "step": 131850 }, { "epoch": 1.1401111966174093, "grad_norm": 83.31596174619037, "learning_rate": 2.345058903739354e-06, "loss": 0.12911453247070312, "step": 131855 }, { "epoch": 1.1401544301389526, "grad_norm": 0.7942584819489465, "learning_rate": 2.3448600863994866e-06, "loss": 0.010699462890625, "step": 131860 }, { "epoch": 1.1401976636604958, "grad_norm": 59.71465811015962, "learning_rate": 2.3446612720810664e-06, "loss": 0.08753128051757812, "step": 131865 }, { "epoch": 1.140240897182039, "grad_norm": 8.037511406256222, "learning_rate": 2.3444624607850106e-06, "loss": 0.05087413787841797, "step": 131870 }, { "epoch": 1.1402841307035823, "grad_norm": 8.158106248886346, "learning_rate": 2.344263652512235e-06, "loss": 0.04684371948242187, "step": 131875 }, { "epoch": 1.1403273642251257, "grad_norm": 5.156704883804914, "learning_rate": 2.3440648472636587e-06, "loss": 0.026627731323242188, "step": 131880 }, { "epoch": 1.140370597746669, "grad_norm": 0.5957391499713061, "learning_rate": 2.343866045040195e-06, "loss": 0.19892425537109376, "step": 131885 }, { "epoch": 1.1404138312682122, "grad_norm": 7.404204683282471, "learning_rate": 2.3436672458427658e-06, "loss": 0.04820098876953125, "step": 131890 }, { "epoch": 1.1404570647897554, "grad_norm": 7.184213658290651, "learning_rate": 2.3434684496722844e-06, "loss": 0.06952781677246093, "step": 131895 }, { "epoch": 1.1405002983112986, "grad_norm": 0.7210061774966808, "learning_rate": 2.343269656529669e-06, "loss": 0.1138143539428711, "step": 131900 }, { "epoch": 1.1405435318328419, "grad_norm": 17.290038576011778, "learning_rate": 2.3430708664158348e-06, "loss": 0.0537872314453125, "step": 131905 }, { "epoch": 1.140586765354385, "grad_norm": 19.364558110196796, "learning_rate": 2.3428720793317003e-06, "loss": 0.34813385009765624, "step": 131910 }, { "epoch": 1.1406299988759285, "grad_norm": 39.56741751271782, "learning_rate": 2.3426732952781807e-06, "loss": 0.12197265625, "step": 131915 }, { "epoch": 1.1406732323974718, "grad_norm": 36.19211419290911, "learning_rate": 2.3424745142561954e-06, "loss": 0.10318145751953126, "step": 131920 }, { "epoch": 1.140716465919015, "grad_norm": 3.1402029524436426, "learning_rate": 2.3422757362666588e-06, "loss": 0.030594635009765624, "step": 131925 }, { "epoch": 1.1407596994405582, "grad_norm": 0.1200923896066721, "learning_rate": 2.3420769613104886e-06, "loss": 0.025685691833496095, "step": 131930 }, { "epoch": 1.1408029329621014, "grad_norm": 0.39751165236558805, "learning_rate": 2.341878189388601e-06, "loss": 0.014580535888671874, "step": 131935 }, { "epoch": 1.1408461664836447, "grad_norm": 4.566354664614483, "learning_rate": 2.3416794205019135e-06, "loss": 0.02684783935546875, "step": 131940 }, { "epoch": 1.1408894000051881, "grad_norm": 0.1457575962438163, "learning_rate": 2.341480654651341e-06, "loss": 0.025231170654296874, "step": 131945 }, { "epoch": 1.1409326335267314, "grad_norm": 1.9250929533536296, "learning_rate": 2.341281891837803e-06, "loss": 0.2157867431640625, "step": 131950 }, { "epoch": 1.1409758670482746, "grad_norm": 7.503154058823593, "learning_rate": 2.3410831320622143e-06, "loss": 0.13426704406738282, "step": 131955 }, { "epoch": 1.1410191005698178, "grad_norm": 1.1162326721188116, "learning_rate": 2.3408843753254924e-06, "loss": 0.2690135955810547, "step": 131960 }, { "epoch": 1.141062334091361, "grad_norm": 0.5310346970585036, "learning_rate": 2.340685621628553e-06, "loss": 0.06921806335449218, "step": 131965 }, { "epoch": 1.1411055676129043, "grad_norm": 3.4045270074563296, "learning_rate": 2.3404868709723132e-06, "loss": 0.09123458862304687, "step": 131970 }, { "epoch": 1.1411488011344475, "grad_norm": 0.16683748845872146, "learning_rate": 2.3402881233576907e-06, "loss": 0.03268909454345703, "step": 131975 }, { "epoch": 1.141192034655991, "grad_norm": 4.517562970703091, "learning_rate": 2.340089378785599e-06, "loss": 0.2277740478515625, "step": 131980 }, { "epoch": 1.1412352681775342, "grad_norm": 1.9341026625382698, "learning_rate": 2.339890637256958e-06, "loss": 0.02779083251953125, "step": 131985 }, { "epoch": 1.1412785016990774, "grad_norm": 3.1255589230341934, "learning_rate": 2.339691898772683e-06, "loss": 0.05634727478027344, "step": 131990 }, { "epoch": 1.1413217352206206, "grad_norm": 13.932944926606096, "learning_rate": 2.339493163333691e-06, "loss": 0.06933517456054687, "step": 131995 }, { "epoch": 1.1413649687421639, "grad_norm": 24.686353156010693, "learning_rate": 2.339294430940898e-06, "loss": 0.17027130126953124, "step": 132000 }, { "epoch": 1.141408202263707, "grad_norm": 2.6686172505726193, "learning_rate": 2.339095701595221e-06, "loss": 0.023446273803710938, "step": 132005 }, { "epoch": 1.1414514357852505, "grad_norm": 13.941004813076699, "learning_rate": 2.3388969752975744e-06, "loss": 0.08509674072265624, "step": 132010 }, { "epoch": 1.1414946693067938, "grad_norm": 1.248552719062131, "learning_rate": 2.3386982520488786e-06, "loss": 0.1733428955078125, "step": 132015 }, { "epoch": 1.141537902828337, "grad_norm": 17.759740246830255, "learning_rate": 2.338499531850047e-06, "loss": 0.08356399536132812, "step": 132020 }, { "epoch": 1.1415811363498802, "grad_norm": 0.4069768594844664, "learning_rate": 2.3383008147019975e-06, "loss": 0.3232166290283203, "step": 132025 }, { "epoch": 1.1416243698714235, "grad_norm": 0.126489743157777, "learning_rate": 2.3381021006056465e-06, "loss": 0.14316253662109374, "step": 132030 }, { "epoch": 1.1416676033929667, "grad_norm": 0.8427148021431667, "learning_rate": 2.33790338956191e-06, "loss": 0.28915252685546877, "step": 132035 }, { "epoch": 1.14171083691451, "grad_norm": 1.066952807049968, "learning_rate": 2.337704681571703e-06, "loss": 0.05024986267089844, "step": 132040 }, { "epoch": 1.1417540704360534, "grad_norm": 1.608976127353485, "learning_rate": 2.3375059766359453e-06, "loss": 0.06852874755859376, "step": 132045 }, { "epoch": 1.1417973039575966, "grad_norm": 9.296666846828584, "learning_rate": 2.3373072747555504e-06, "loss": 0.054248809814453125, "step": 132050 }, { "epoch": 1.1418405374791398, "grad_norm": 3.0992401076352487, "learning_rate": 2.3371085759314367e-06, "loss": 0.019681644439697266, "step": 132055 }, { "epoch": 1.141883771000683, "grad_norm": 2.905652781462287, "learning_rate": 2.3369098801645195e-06, "loss": 0.11625518798828124, "step": 132060 }, { "epoch": 1.1419270045222263, "grad_norm": 2.1131958266942146, "learning_rate": 2.3367111874557156e-06, "loss": 0.020537567138671876, "step": 132065 }, { "epoch": 1.1419702380437697, "grad_norm": 11.093328452335191, "learning_rate": 2.3365124978059403e-06, "loss": 0.08855476379394531, "step": 132070 }, { "epoch": 1.142013471565313, "grad_norm": 0.12254056215246033, "learning_rate": 2.3363138112161105e-06, "loss": 0.03002338409423828, "step": 132075 }, { "epoch": 1.1420567050868562, "grad_norm": 9.150073306982133, "learning_rate": 2.336115127687143e-06, "loss": 0.092535400390625, "step": 132080 }, { "epoch": 1.1420999386083994, "grad_norm": 2.081887961272878, "learning_rate": 2.335916447219955e-06, "loss": 0.04821205139160156, "step": 132085 }, { "epoch": 1.1421431721299427, "grad_norm": 0.12989320645639552, "learning_rate": 2.335717769815461e-06, "loss": 0.11949596405029297, "step": 132090 }, { "epoch": 1.1421864056514859, "grad_norm": 0.6921569921475984, "learning_rate": 2.335519095474578e-06, "loss": 0.046800994873046876, "step": 132095 }, { "epoch": 1.142229639173029, "grad_norm": 0.2955059423416455, "learning_rate": 2.335320424198222e-06, "loss": 0.0184326171875, "step": 132100 }, { "epoch": 1.1422728726945726, "grad_norm": 7.329276363369795, "learning_rate": 2.335121755987309e-06, "loss": 0.03849868774414063, "step": 132105 }, { "epoch": 1.1423161062161158, "grad_norm": 29.061966845474913, "learning_rate": 2.334923090842756e-06, "loss": 0.06470489501953125, "step": 132110 }, { "epoch": 1.142359339737659, "grad_norm": 41.93888815551783, "learning_rate": 2.3347244287654793e-06, "loss": 0.21863288879394532, "step": 132115 }, { "epoch": 1.1424025732592022, "grad_norm": 13.482363033672932, "learning_rate": 2.334525769756395e-06, "loss": 0.11090202331542968, "step": 132120 }, { "epoch": 1.1424458067807455, "grad_norm": 1.8284990392569402, "learning_rate": 2.334327113816419e-06, "loss": 0.02442779541015625, "step": 132125 }, { "epoch": 1.1424890403022887, "grad_norm": 28.845968197199838, "learning_rate": 2.334128460946467e-06, "loss": 0.17679939270019532, "step": 132130 }, { "epoch": 1.1425322738238322, "grad_norm": 18.148256958018855, "learning_rate": 2.3339298111474554e-06, "loss": 0.15983047485351562, "step": 132135 }, { "epoch": 1.1425755073453754, "grad_norm": 23.86035135698442, "learning_rate": 2.333731164420301e-06, "loss": 0.11599788665771485, "step": 132140 }, { "epoch": 1.1426187408669186, "grad_norm": 15.70876667143113, "learning_rate": 2.33353252076592e-06, "loss": 0.017132568359375, "step": 132145 }, { "epoch": 1.1426619743884618, "grad_norm": 16.918028696645706, "learning_rate": 2.333333880185228e-06, "loss": 0.1183868408203125, "step": 132150 }, { "epoch": 1.142705207910005, "grad_norm": 1.983600132174938, "learning_rate": 2.3331352426791416e-06, "loss": 0.0308502197265625, "step": 132155 }, { "epoch": 1.1427484414315483, "grad_norm": 0.624553903039554, "learning_rate": 2.3329366082485753e-06, "loss": 0.11341285705566406, "step": 132160 }, { "epoch": 1.1427916749530915, "grad_norm": 2.5746605603893022, "learning_rate": 2.332737976894448e-06, "loss": 0.034197998046875, "step": 132165 }, { "epoch": 1.142834908474635, "grad_norm": 10.12861015395475, "learning_rate": 2.3325393486176722e-06, "loss": 0.0680572509765625, "step": 132170 }, { "epoch": 1.1428781419961782, "grad_norm": 10.13544366818938, "learning_rate": 2.3323407234191675e-06, "loss": 0.14474029541015626, "step": 132175 }, { "epoch": 1.1429213755177214, "grad_norm": 5.784227996588449, "learning_rate": 2.332142101299848e-06, "loss": 0.09511871337890625, "step": 132180 }, { "epoch": 1.1429646090392647, "grad_norm": 0.12043546892628434, "learning_rate": 2.3319434822606304e-06, "loss": 0.047296905517578126, "step": 132185 }, { "epoch": 1.143007842560808, "grad_norm": 5.824248131338271, "learning_rate": 2.33174486630243e-06, "loss": 0.0664764404296875, "step": 132190 }, { "epoch": 1.1430510760823511, "grad_norm": 1.6586625907037187, "learning_rate": 2.3315462534261635e-06, "loss": 0.10140228271484375, "step": 132195 }, { "epoch": 1.1430943096038946, "grad_norm": 0.9324059742098827, "learning_rate": 2.3313476436327456e-06, "loss": 0.0563812255859375, "step": 132200 }, { "epoch": 1.1431375431254378, "grad_norm": 0.611741222114097, "learning_rate": 2.3311490369230944e-06, "loss": 0.12942962646484374, "step": 132205 }, { "epoch": 1.143180776646981, "grad_norm": 37.549783669218215, "learning_rate": 2.330950433298125e-06, "loss": 0.215155029296875, "step": 132210 }, { "epoch": 1.1432240101685243, "grad_norm": 2.6678999827720085, "learning_rate": 2.330751832758752e-06, "loss": 0.16790618896484374, "step": 132215 }, { "epoch": 1.1432672436900675, "grad_norm": 0.81984222661096, "learning_rate": 2.3305532353058933e-06, "loss": 0.08229217529296876, "step": 132220 }, { "epoch": 1.1433104772116107, "grad_norm": 3.5775742116283444, "learning_rate": 2.330354640940464e-06, "loss": 0.03140411376953125, "step": 132225 }, { "epoch": 1.143353710733154, "grad_norm": 8.385975170536144, "learning_rate": 2.33015604966338e-06, "loss": 0.03816070556640625, "step": 132230 }, { "epoch": 1.1433969442546974, "grad_norm": 4.868035997738729, "learning_rate": 2.3299574614755555e-06, "loss": 0.04813079833984375, "step": 132235 }, { "epoch": 1.1434401777762406, "grad_norm": 0.7536921664021295, "learning_rate": 2.3297588763779097e-06, "loss": 0.1616352081298828, "step": 132240 }, { "epoch": 1.1434834112977839, "grad_norm": 0.8889506999833389, "learning_rate": 2.3295602943713556e-06, "loss": 0.0076122283935546875, "step": 132245 }, { "epoch": 1.143526644819327, "grad_norm": 1.1154586229098267, "learning_rate": 2.3293617154568115e-06, "loss": 0.05096893310546875, "step": 132250 }, { "epoch": 1.1435698783408703, "grad_norm": 0.9342151387886091, "learning_rate": 2.329163139635191e-06, "loss": 0.08322334289550781, "step": 132255 }, { "epoch": 1.1436131118624135, "grad_norm": 6.185518026660055, "learning_rate": 2.3289645669074116e-06, "loss": 0.13446884155273436, "step": 132260 }, { "epoch": 1.143656345383957, "grad_norm": 5.860326051176558, "learning_rate": 2.328765997274387e-06, "loss": 0.1314085006713867, "step": 132265 }, { "epoch": 1.1436995789055002, "grad_norm": 3.8256659435346627, "learning_rate": 2.328567430737035e-06, "loss": 0.26264152526855467, "step": 132270 }, { "epoch": 1.1437428124270435, "grad_norm": 1.3966809843695684, "learning_rate": 2.3283688672962704e-06, "loss": 0.09163856506347656, "step": 132275 }, { "epoch": 1.1437860459485867, "grad_norm": 0.14508211415482533, "learning_rate": 2.32817030695301e-06, "loss": 0.06276016235351563, "step": 132280 }, { "epoch": 1.14382927947013, "grad_norm": 14.178843099564235, "learning_rate": 2.3279717497081686e-06, "loss": 0.09682884216308593, "step": 132285 }, { "epoch": 1.1438725129916731, "grad_norm": 7.251196699528755, "learning_rate": 2.3277731955626622e-06, "loss": 0.032916259765625, "step": 132290 }, { "epoch": 1.1439157465132164, "grad_norm": 0.4046497966397983, "learning_rate": 2.327574644517405e-06, "loss": 0.053165435791015625, "step": 132295 }, { "epoch": 1.1439589800347598, "grad_norm": 2.8450765144029857, "learning_rate": 2.3273760965733156e-06, "loss": 0.014209556579589843, "step": 132300 }, { "epoch": 1.144002213556303, "grad_norm": 2.8659083538124204, "learning_rate": 2.3271775517313074e-06, "loss": 0.08796234130859375, "step": 132305 }, { "epoch": 1.1440454470778463, "grad_norm": 3.069090817509363, "learning_rate": 2.3269790099922977e-06, "loss": 0.10037994384765625, "step": 132310 }, { "epoch": 1.1440886805993895, "grad_norm": 3.363273485588189, "learning_rate": 2.326780471357201e-06, "loss": 0.019079208374023438, "step": 132315 }, { "epoch": 1.1441319141209327, "grad_norm": 7.359561602040257, "learning_rate": 2.3265819358269334e-06, "loss": 0.13347396850585938, "step": 132320 }, { "epoch": 1.1441751476424762, "grad_norm": 0.14407561866932378, "learning_rate": 2.32638340340241e-06, "loss": 0.2113971710205078, "step": 132325 }, { "epoch": 1.1442183811640194, "grad_norm": 10.364636469343179, "learning_rate": 2.326184874084547e-06, "loss": 0.3213104248046875, "step": 132330 }, { "epoch": 1.1442616146855626, "grad_norm": 4.088260555568375, "learning_rate": 2.325986347874259e-06, "loss": 0.0430450439453125, "step": 132335 }, { "epoch": 1.1443048482071059, "grad_norm": 0.6160359528937627, "learning_rate": 2.3257878247724637e-06, "loss": 0.027095794677734375, "step": 132340 }, { "epoch": 1.144348081728649, "grad_norm": 17.661240140430085, "learning_rate": 2.3255893047800754e-06, "loss": 0.057000350952148435, "step": 132345 }, { "epoch": 1.1443913152501923, "grad_norm": 2.6799318054790993, "learning_rate": 2.325390787898009e-06, "loss": 0.09862823486328125, "step": 132350 }, { "epoch": 1.1444345487717356, "grad_norm": 12.101426568516693, "learning_rate": 2.325192274127181e-06, "loss": 0.05948486328125, "step": 132355 }, { "epoch": 1.144477782293279, "grad_norm": 2.7998633290056123, "learning_rate": 2.324993763468506e-06, "loss": 0.06672897338867187, "step": 132360 }, { "epoch": 1.1445210158148222, "grad_norm": 43.63569840612949, "learning_rate": 2.3247952559229e-06, "loss": 0.20640220642089843, "step": 132365 }, { "epoch": 1.1445642493363655, "grad_norm": 0.2931240712740814, "learning_rate": 2.32459675149128e-06, "loss": 0.06991844177246094, "step": 132370 }, { "epoch": 1.1446074828579087, "grad_norm": 11.94752509628735, "learning_rate": 2.3243982501745594e-06, "loss": 0.03588409423828125, "step": 132375 }, { "epoch": 1.144650716379452, "grad_norm": 20.002633374032474, "learning_rate": 2.3241997519736544e-06, "loss": 0.06905527114868164, "step": 132380 }, { "epoch": 1.1446939499009952, "grad_norm": 0.41607326179679566, "learning_rate": 2.32400125688948e-06, "loss": 0.12971343994140624, "step": 132385 }, { "epoch": 1.1447371834225386, "grad_norm": 5.723234490789878, "learning_rate": 2.323802764922951e-06, "loss": 0.12187118530273437, "step": 132390 }, { "epoch": 1.1447804169440818, "grad_norm": 0.11851904165784684, "learning_rate": 2.323604276074986e-06, "loss": 0.011939239501953126, "step": 132395 }, { "epoch": 1.144823650465625, "grad_norm": 3.5959225667088117, "learning_rate": 2.3234057903464976e-06, "loss": 0.09557838439941406, "step": 132400 }, { "epoch": 1.1448668839871683, "grad_norm": 27.464968396766793, "learning_rate": 2.3232073077384022e-06, "loss": 0.12448978424072266, "step": 132405 }, { "epoch": 1.1449101175087115, "grad_norm": 0.75467022355832, "learning_rate": 2.3230088282516152e-06, "loss": 0.029706573486328124, "step": 132410 }, { "epoch": 1.1449533510302548, "grad_norm": 2.0510004097488754, "learning_rate": 2.3228103518870506e-06, "loss": 0.14856338500976562, "step": 132415 }, { "epoch": 1.144996584551798, "grad_norm": 2.3869914632297493, "learning_rate": 2.3226118786456256e-06, "loss": 0.014017009735107422, "step": 132420 }, { "epoch": 1.1450398180733414, "grad_norm": 3.2467159397134786, "learning_rate": 2.3224134085282533e-06, "loss": 0.03288726806640625, "step": 132425 }, { "epoch": 1.1450830515948847, "grad_norm": 0.25197977056546234, "learning_rate": 2.322214941535852e-06, "loss": 0.016278076171875, "step": 132430 }, { "epoch": 1.1451262851164279, "grad_norm": 5.319266440820934, "learning_rate": 2.322016477669335e-06, "loss": 0.03200836181640625, "step": 132435 }, { "epoch": 1.1451695186379711, "grad_norm": 0.23795189134701053, "learning_rate": 2.3218180169296183e-06, "loss": 0.034942626953125, "step": 132440 }, { "epoch": 1.1452127521595143, "grad_norm": 3.285421398366531, "learning_rate": 2.321619559317617e-06, "loss": 0.015667724609375, "step": 132445 }, { "epoch": 1.1452559856810576, "grad_norm": 12.409938468067738, "learning_rate": 2.3214211048342464e-06, "loss": 0.14974288940429686, "step": 132450 }, { "epoch": 1.145299219202601, "grad_norm": 2.2868995280307423, "learning_rate": 2.321222653480421e-06, "loss": 0.066192626953125, "step": 132455 }, { "epoch": 1.1453424527241443, "grad_norm": 0.7276421450641427, "learning_rate": 2.3210242052570575e-06, "loss": 0.18455047607421876, "step": 132460 }, { "epoch": 1.1453856862456875, "grad_norm": 0.4369066146699882, "learning_rate": 2.3208257601650703e-06, "loss": 0.07032318115234375, "step": 132465 }, { "epoch": 1.1454289197672307, "grad_norm": 0.3944566459375056, "learning_rate": 2.320627318205374e-06, "loss": 0.03746013641357422, "step": 132470 }, { "epoch": 1.145472153288774, "grad_norm": 1.3109608766611438, "learning_rate": 2.3204288793788856e-06, "loss": 0.11169471740722656, "step": 132475 }, { "epoch": 1.1455153868103172, "grad_norm": 0.30789045867106307, "learning_rate": 2.3202304436865188e-06, "loss": 0.025113964080810548, "step": 132480 }, { "epoch": 1.1455586203318604, "grad_norm": 3.146261400530783, "learning_rate": 2.3200320111291878e-06, "loss": 0.027811431884765626, "step": 132485 }, { "epoch": 1.1456018538534039, "grad_norm": 9.897685566497497, "learning_rate": 2.3198335817078108e-06, "loss": 0.12119903564453124, "step": 132490 }, { "epoch": 1.145645087374947, "grad_norm": 3.7313515930140033, "learning_rate": 2.319635155423301e-06, "loss": 0.007334136962890625, "step": 132495 }, { "epoch": 1.1456883208964903, "grad_norm": 36.787850437530665, "learning_rate": 2.319436732276573e-06, "loss": 0.13136520385742187, "step": 132500 }, { "epoch": 1.1457315544180335, "grad_norm": 21.290720210069168, "learning_rate": 2.319238312268543e-06, "loss": 0.10370101928710937, "step": 132505 }, { "epoch": 1.1457747879395768, "grad_norm": 0.35502112169382755, "learning_rate": 2.319039895400126e-06, "loss": 0.049560546875, "step": 132510 }, { "epoch": 1.1458180214611202, "grad_norm": 0.17087819892315464, "learning_rate": 2.318841481672237e-06, "loss": 0.027793121337890626, "step": 132515 }, { "epoch": 1.1458612549826634, "grad_norm": 6.42785483577708, "learning_rate": 2.3186430710857894e-06, "loss": 0.043098831176757814, "step": 132520 }, { "epoch": 1.1459044885042067, "grad_norm": 15.572916582381676, "learning_rate": 2.318444663641701e-06, "loss": 0.036304473876953125, "step": 132525 }, { "epoch": 1.14594772202575, "grad_norm": 5.348142791278029, "learning_rate": 2.318246259340885e-06, "loss": 0.16366958618164062, "step": 132530 }, { "epoch": 1.1459909555472931, "grad_norm": 0.7112357732970624, "learning_rate": 2.3180478581842583e-06, "loss": 0.14745521545410156, "step": 132535 }, { "epoch": 1.1460341890688364, "grad_norm": 3.8114732104715836, "learning_rate": 2.3178494601727336e-06, "loss": 0.012875175476074219, "step": 132540 }, { "epoch": 1.1460774225903796, "grad_norm": 39.50264008161394, "learning_rate": 2.3176510653072274e-06, "loss": 0.41714019775390626, "step": 132545 }, { "epoch": 1.1461206561119228, "grad_norm": 0.22366348893207724, "learning_rate": 2.317452673588653e-06, "loss": 0.01565093994140625, "step": 132550 }, { "epoch": 1.1461638896334663, "grad_norm": 5.137546652275699, "learning_rate": 2.3172542850179278e-06, "loss": 0.12431869506835938, "step": 132555 }, { "epoch": 1.1462071231550095, "grad_norm": 0.5856687204979139, "learning_rate": 2.3170558995959648e-06, "loss": 0.06631202697753906, "step": 132560 }, { "epoch": 1.1462503566765527, "grad_norm": 12.495007491366186, "learning_rate": 2.3168575173236802e-06, "loss": 0.02622222900390625, "step": 132565 }, { "epoch": 1.146293590198096, "grad_norm": 0.2747640800669744, "learning_rate": 2.3166591382019885e-06, "loss": 0.04324531555175781, "step": 132570 }, { "epoch": 1.1463368237196392, "grad_norm": 2.4436807374514844, "learning_rate": 2.3164607622318045e-06, "loss": 0.014983749389648438, "step": 132575 }, { "epoch": 1.1463800572411826, "grad_norm": 5.142809030436608, "learning_rate": 2.316262389414042e-06, "loss": 0.05091876983642578, "step": 132580 }, { "epoch": 1.1464232907627259, "grad_norm": 1.0707511568753576, "learning_rate": 2.3160640197496176e-06, "loss": 0.2269451141357422, "step": 132585 }, { "epoch": 1.146466524284269, "grad_norm": 1.5671171428998791, "learning_rate": 2.315865653239445e-06, "loss": 0.0095550537109375, "step": 132590 }, { "epoch": 1.1465097578058123, "grad_norm": 4.921127364463691, "learning_rate": 2.315667289884441e-06, "loss": 0.0316981315612793, "step": 132595 }, { "epoch": 1.1465529913273556, "grad_norm": 0.4483669365529113, "learning_rate": 2.315468929685518e-06, "loss": 0.06520843505859375, "step": 132600 }, { "epoch": 1.1465962248488988, "grad_norm": 1.49823408800781, "learning_rate": 2.315270572643592e-06, "loss": 0.07666435241699218, "step": 132605 }, { "epoch": 1.146639458370442, "grad_norm": 7.673605569623957, "learning_rate": 2.3150722187595775e-06, "loss": 0.042400360107421875, "step": 132610 }, { "epoch": 1.1466826918919855, "grad_norm": 0.9097280937100114, "learning_rate": 2.314873868034388e-06, "loss": 0.029929351806640626, "step": 132615 }, { "epoch": 1.1467259254135287, "grad_norm": 16.11679142231975, "learning_rate": 2.3146755204689413e-06, "loss": 0.18683090209960937, "step": 132620 }, { "epoch": 1.146769158935072, "grad_norm": 2.254398112826822, "learning_rate": 2.314477176064151e-06, "loss": 0.07907943725585938, "step": 132625 }, { "epoch": 1.1468123924566151, "grad_norm": 0.25844475170455394, "learning_rate": 2.314278834820931e-06, "loss": 0.02177085876464844, "step": 132630 }, { "epoch": 1.1468556259781584, "grad_norm": 1.6319418840696347, "learning_rate": 2.3140804967401967e-06, "loss": 0.007771492004394531, "step": 132635 }, { "epoch": 1.1468988594997016, "grad_norm": 1.0568664664468574, "learning_rate": 2.313882161822861e-06, "loss": 0.0914642333984375, "step": 132640 }, { "epoch": 1.146942093021245, "grad_norm": 13.114692359605158, "learning_rate": 2.3136838300698404e-06, "loss": 0.033490467071533206, "step": 132645 }, { "epoch": 1.1469853265427883, "grad_norm": 0.9239563305292574, "learning_rate": 2.3134855014820505e-06, "loss": 0.022174072265625, "step": 132650 }, { "epoch": 1.1470285600643315, "grad_norm": 19.36278666809074, "learning_rate": 2.3132871760604045e-06, "loss": 0.07418231964111328, "step": 132655 }, { "epoch": 1.1470717935858747, "grad_norm": 0.5664702503471727, "learning_rate": 2.3130888538058176e-06, "loss": 0.07680015563964844, "step": 132660 }, { "epoch": 1.147115027107418, "grad_norm": 0.33880633522966896, "learning_rate": 2.3128905347192035e-06, "loss": 0.25200881958007815, "step": 132665 }, { "epoch": 1.1471582606289612, "grad_norm": 0.2221141496249116, "learning_rate": 2.3126922188014785e-06, "loss": 0.026847076416015626, "step": 132670 }, { "epoch": 1.1472014941505044, "grad_norm": 7.883159418042088, "learning_rate": 2.3124939060535558e-06, "loss": 0.07073135375976562, "step": 132675 }, { "epoch": 1.1472447276720479, "grad_norm": 6.21789820582131, "learning_rate": 2.3122955964763493e-06, "loss": 0.05676822662353516, "step": 132680 }, { "epoch": 1.147287961193591, "grad_norm": 0.82924644837988, "learning_rate": 2.312097290070776e-06, "loss": 0.0106781005859375, "step": 132685 }, { "epoch": 1.1473311947151343, "grad_norm": 1.2221045006127904, "learning_rate": 2.3118989868377492e-06, "loss": 0.0118133544921875, "step": 132690 }, { "epoch": 1.1473744282366776, "grad_norm": 3.0996075870668123, "learning_rate": 2.311700686778183e-06, "loss": 0.01837615966796875, "step": 132695 }, { "epoch": 1.1474176617582208, "grad_norm": 0.5395752908172257, "learning_rate": 2.311502389892993e-06, "loss": 0.0293182373046875, "step": 132700 }, { "epoch": 1.147460895279764, "grad_norm": 2.291980293252784, "learning_rate": 2.311304096183093e-06, "loss": 0.2191162109375, "step": 132705 }, { "epoch": 1.1475041288013075, "grad_norm": 0.5963097645515264, "learning_rate": 2.3111058056493964e-06, "loss": 0.17423629760742188, "step": 132710 }, { "epoch": 1.1475473623228507, "grad_norm": 1.9757165226643643, "learning_rate": 2.3109075182928203e-06, "loss": 0.03837795257568359, "step": 132715 }, { "epoch": 1.147590595844394, "grad_norm": 1.0549508368605118, "learning_rate": 2.310709234114278e-06, "loss": 0.09235076904296875, "step": 132720 }, { "epoch": 1.1476338293659372, "grad_norm": 9.106328398958997, "learning_rate": 2.3105109531146827e-06, "loss": 0.05371437072753906, "step": 132725 }, { "epoch": 1.1476770628874804, "grad_norm": 6.998564723612894, "learning_rate": 2.310312675294951e-06, "loss": 0.31464385986328125, "step": 132730 }, { "epoch": 1.1477202964090236, "grad_norm": 3.387975887998858, "learning_rate": 2.310114400655996e-06, "loss": 0.02510986328125, "step": 132735 }, { "epoch": 1.1477635299305669, "grad_norm": 3.84023160008755, "learning_rate": 2.3099161291987314e-06, "loss": 0.0731414794921875, "step": 132740 }, { "epoch": 1.1478067634521103, "grad_norm": 40.908287024368754, "learning_rate": 2.309717860924074e-06, "loss": 0.04113960266113281, "step": 132745 }, { "epoch": 1.1478499969736535, "grad_norm": 2.5703468773739027, "learning_rate": 2.3095195958329363e-06, "loss": 0.03082275390625, "step": 132750 }, { "epoch": 1.1478932304951968, "grad_norm": 0.4881716037036255, "learning_rate": 2.3093213339262333e-06, "loss": 0.015995025634765625, "step": 132755 }, { "epoch": 1.14793646401674, "grad_norm": 3.6146468963029275, "learning_rate": 2.3091230752048795e-06, "loss": 0.04105033874511719, "step": 132760 }, { "epoch": 1.1479796975382832, "grad_norm": 22.00041105149109, "learning_rate": 2.308924819669789e-06, "loss": 0.13140535354614258, "step": 132765 }, { "epoch": 1.1480229310598267, "grad_norm": 9.32110145990334, "learning_rate": 2.3087265673218763e-06, "loss": 0.019817352294921875, "step": 132770 }, { "epoch": 1.14806616458137, "grad_norm": 3.731961392457858, "learning_rate": 2.3085283181620544e-06, "loss": 0.0640472412109375, "step": 132775 }, { "epoch": 1.1481093981029131, "grad_norm": 6.670856929328023, "learning_rate": 2.30833007219124e-06, "loss": 0.049854278564453125, "step": 132780 }, { "epoch": 1.1481526316244564, "grad_norm": 16.314205565918893, "learning_rate": 2.3081318294103452e-06, "loss": 0.11662063598632813, "step": 132785 }, { "epoch": 1.1481958651459996, "grad_norm": 0.1483416825192271, "learning_rate": 2.3079335898202863e-06, "loss": 0.020154571533203124, "step": 132790 }, { "epoch": 1.1482390986675428, "grad_norm": 1.2370774116024272, "learning_rate": 2.3077353534219767e-06, "loss": 0.021487045288085937, "step": 132795 }, { "epoch": 1.148282332189086, "grad_norm": 9.376823093343901, "learning_rate": 2.30753712021633e-06, "loss": 0.24368972778320314, "step": 132800 }, { "epoch": 1.1483255657106293, "grad_norm": 6.176078025637095, "learning_rate": 2.30733889020426e-06, "loss": 0.3185894012451172, "step": 132805 }, { "epoch": 1.1483687992321727, "grad_norm": 56.85700369926598, "learning_rate": 2.307140663386683e-06, "loss": 0.36419525146484377, "step": 132810 }, { "epoch": 1.148412032753716, "grad_norm": 1.4558853138047851, "learning_rate": 2.306942439764511e-06, "loss": 0.37977752685546873, "step": 132815 }, { "epoch": 1.1484552662752592, "grad_norm": 3.7979874526210597, "learning_rate": 2.3067442193386606e-06, "loss": 0.10733642578125, "step": 132820 }, { "epoch": 1.1484984997968024, "grad_norm": 6.9166943632907065, "learning_rate": 2.3065460021100445e-06, "loss": 0.10954971313476562, "step": 132825 }, { "epoch": 1.1485417333183456, "grad_norm": 1.0782454727675737, "learning_rate": 2.306347788079576e-06, "loss": 0.011186981201171875, "step": 132830 }, { "epoch": 1.148584966839889, "grad_norm": 1.3369862697734822, "learning_rate": 2.3061495772481705e-06, "loss": 0.023215484619140626, "step": 132835 }, { "epoch": 1.1486282003614323, "grad_norm": 12.196293908993074, "learning_rate": 2.3059513696167413e-06, "loss": 0.08823432922363281, "step": 132840 }, { "epoch": 1.1486714338829755, "grad_norm": 14.328123474576039, "learning_rate": 2.305753165186204e-06, "loss": 0.06780471801757812, "step": 132845 }, { "epoch": 1.1487146674045188, "grad_norm": 1.8355828821067697, "learning_rate": 2.305554963957472e-06, "loss": 0.08373756408691406, "step": 132850 }, { "epoch": 1.148757900926062, "grad_norm": 5.882972989824316, "learning_rate": 2.3053567659314586e-06, "loss": 0.29782752990722655, "step": 132855 }, { "epoch": 1.1488011344476052, "grad_norm": 6.839868184910067, "learning_rate": 2.305158571109078e-06, "loss": 0.07361640930175781, "step": 132860 }, { "epoch": 1.1488443679691485, "grad_norm": 25.99825566482242, "learning_rate": 2.3049603794912456e-06, "loss": 0.037713623046875, "step": 132865 }, { "epoch": 1.148887601490692, "grad_norm": 4.7282566645085655, "learning_rate": 2.304762191078873e-06, "loss": 0.025835418701171876, "step": 132870 }, { "epoch": 1.1489308350122351, "grad_norm": 9.010734102411245, "learning_rate": 2.304564005872877e-06, "loss": 0.1348602294921875, "step": 132875 }, { "epoch": 1.1489740685337784, "grad_norm": 0.7082926235773365, "learning_rate": 2.3043658238741703e-06, "loss": 0.007019233703613281, "step": 132880 }, { "epoch": 1.1490173020553216, "grad_norm": 1.3602006842382455, "learning_rate": 2.304167645083667e-06, "loss": 0.7966176986694335, "step": 132885 }, { "epoch": 1.1490605355768648, "grad_norm": 0.6520731839490952, "learning_rate": 2.3039694695022808e-06, "loss": 0.13927803039550782, "step": 132890 }, { "epoch": 1.149103769098408, "grad_norm": 3.2116147392140153, "learning_rate": 2.303771297130926e-06, "loss": 0.031719970703125, "step": 132895 }, { "epoch": 1.1491470026199515, "grad_norm": 2.7530520544603445, "learning_rate": 2.3035731279705154e-06, "loss": 0.024140357971191406, "step": 132900 }, { "epoch": 1.1491902361414947, "grad_norm": 0.7573046338921291, "learning_rate": 2.303374962021965e-06, "loss": 0.129693603515625, "step": 132905 }, { "epoch": 1.149233469663038, "grad_norm": 39.71652415657658, "learning_rate": 2.3031767992861883e-06, "loss": 0.1303731918334961, "step": 132910 }, { "epoch": 1.1492767031845812, "grad_norm": 1.0988382419871217, "learning_rate": 2.302978639764098e-06, "loss": 0.08121795654296875, "step": 132915 }, { "epoch": 1.1493199367061244, "grad_norm": 0.15332150181803617, "learning_rate": 2.3027804834566086e-06, "loss": 0.013747787475585938, "step": 132920 }, { "epoch": 1.1493631702276677, "grad_norm": 0.6557806206634396, "learning_rate": 2.3025823303646344e-06, "loss": 0.028410720825195312, "step": 132925 }, { "epoch": 1.1494064037492109, "grad_norm": 2.821112210991767, "learning_rate": 2.3023841804890887e-06, "loss": 0.061865234375, "step": 132930 }, { "epoch": 1.1494496372707543, "grad_norm": 1.2237106361327474, "learning_rate": 2.3021860338308845e-06, "loss": 0.05271568298339844, "step": 132935 }, { "epoch": 1.1494928707922976, "grad_norm": 9.190207059085465, "learning_rate": 2.3019878903909375e-06, "loss": 0.09175949096679688, "step": 132940 }, { "epoch": 1.1495361043138408, "grad_norm": 3.7436132736800363, "learning_rate": 2.3017897501701613e-06, "loss": 0.0696197509765625, "step": 132945 }, { "epoch": 1.149579337835384, "grad_norm": 3.162489546480364, "learning_rate": 2.301591613169468e-06, "loss": 0.04222869873046875, "step": 132950 }, { "epoch": 1.1496225713569272, "grad_norm": 0.2896850193163668, "learning_rate": 2.3013934793897735e-06, "loss": 0.004755783081054688, "step": 132955 }, { "epoch": 1.1496658048784705, "grad_norm": 3.216650798396178, "learning_rate": 2.30119534883199e-06, "loss": 0.024567413330078124, "step": 132960 }, { "epoch": 1.149709038400014, "grad_norm": 4.408810298556773, "learning_rate": 2.300997221497031e-06, "loss": 0.2860382080078125, "step": 132965 }, { "epoch": 1.1497522719215572, "grad_norm": 3.352223419266581, "learning_rate": 2.3007990973858117e-06, "loss": 0.09656448364257812, "step": 132970 }, { "epoch": 1.1497955054431004, "grad_norm": 0.3141326750007739, "learning_rate": 2.3006009764992457e-06, "loss": 0.026001358032226564, "step": 132975 }, { "epoch": 1.1498387389646436, "grad_norm": 2.1207069664853995, "learning_rate": 2.3004028588382454e-06, "loss": 0.06267356872558594, "step": 132980 }, { "epoch": 1.1498819724861868, "grad_norm": 2.740753196008145, "learning_rate": 2.300204744403726e-06, "loss": 0.0312896728515625, "step": 132985 }, { "epoch": 1.14992520600773, "grad_norm": 5.367029818022226, "learning_rate": 2.3000066331966004e-06, "loss": 0.01777820587158203, "step": 132990 }, { "epoch": 1.1499684395292733, "grad_norm": 27.105223711602946, "learning_rate": 2.299808525217781e-06, "loss": 0.0798797607421875, "step": 132995 }, { "epoch": 1.1500116730508168, "grad_norm": 1.0613147784038581, "learning_rate": 2.2996104204681844e-06, "loss": 0.093048095703125, "step": 133000 }, { "epoch": 1.15005490657236, "grad_norm": 1.612437616860537, "learning_rate": 2.2994123189487226e-06, "loss": 0.021540069580078126, "step": 133005 }, { "epoch": 1.1500981400939032, "grad_norm": 7.315323547729403, "learning_rate": 2.299214220660308e-06, "loss": 0.022550201416015624, "step": 133010 }, { "epoch": 1.1501413736154464, "grad_norm": 44.192248189185705, "learning_rate": 2.299016125603857e-06, "loss": 0.19102783203125, "step": 133015 }, { "epoch": 1.1501846071369897, "grad_norm": 0.16185095704449376, "learning_rate": 2.2988180337802812e-06, "loss": 0.04844169616699219, "step": 133020 }, { "epoch": 1.1502278406585331, "grad_norm": 0.5278446058498001, "learning_rate": 2.298619945190495e-06, "loss": 0.017859649658203126, "step": 133025 }, { "epoch": 1.1502710741800763, "grad_norm": 0.49821790024653595, "learning_rate": 2.29842185983541e-06, "loss": 0.14062576293945311, "step": 133030 }, { "epoch": 1.1503143077016196, "grad_norm": 0.5186702649406162, "learning_rate": 2.2982237777159428e-06, "loss": 0.08519210815429687, "step": 133035 }, { "epoch": 1.1503575412231628, "grad_norm": 0.21657058169735743, "learning_rate": 2.298025698833005e-06, "loss": 0.0316619873046875, "step": 133040 }, { "epoch": 1.150400774744706, "grad_norm": 4.3579520594723045, "learning_rate": 2.2978276231875113e-06, "loss": 0.05365142822265625, "step": 133045 }, { "epoch": 1.1504440082662493, "grad_norm": 0.3108189935244079, "learning_rate": 2.297629550780374e-06, "loss": 0.0794769287109375, "step": 133050 }, { "epoch": 1.1504872417877925, "grad_norm": 0.17047913230109968, "learning_rate": 2.297431481612508e-06, "loss": 0.4125476837158203, "step": 133055 }, { "epoch": 1.150530475309336, "grad_norm": 0.1131392446686247, "learning_rate": 2.297233415684824e-06, "loss": 0.0192474365234375, "step": 133060 }, { "epoch": 1.1505737088308792, "grad_norm": 1.1044251156184826, "learning_rate": 2.2970353529982386e-06, "loss": 0.024825286865234376, "step": 133065 }, { "epoch": 1.1506169423524224, "grad_norm": 4.873686893721923, "learning_rate": 2.296837293553664e-06, "loss": 0.026702880859375, "step": 133070 }, { "epoch": 1.1506601758739656, "grad_norm": 0.7931959620675313, "learning_rate": 2.2966392373520143e-06, "loss": 0.008977508544921875, "step": 133075 }, { "epoch": 1.1507034093955089, "grad_norm": 2.2129237370459705, "learning_rate": 2.296441184394202e-06, "loss": 0.0575592041015625, "step": 133080 }, { "epoch": 1.150746642917052, "grad_norm": 1.1108623102566395, "learning_rate": 2.2962431346811404e-06, "loss": 0.48976669311523435, "step": 133085 }, { "epoch": 1.1507898764385955, "grad_norm": 13.94407250747163, "learning_rate": 2.296045088213743e-06, "loss": 0.14289016723632814, "step": 133090 }, { "epoch": 1.1508331099601388, "grad_norm": 0.15444943700148242, "learning_rate": 2.2958470449929234e-06, "loss": 0.04130573272705078, "step": 133095 }, { "epoch": 1.150876343481682, "grad_norm": 10.077283168326396, "learning_rate": 2.295649005019596e-06, "loss": 0.03555984497070312, "step": 133100 }, { "epoch": 1.1509195770032252, "grad_norm": 0.5667760320965894, "learning_rate": 2.2954509682946725e-06, "loss": 0.03134307861328125, "step": 133105 }, { "epoch": 1.1509628105247685, "grad_norm": 0.4303989151289835, "learning_rate": 2.2952529348190673e-06, "loss": 0.030675888061523438, "step": 133110 }, { "epoch": 1.1510060440463117, "grad_norm": 0.8655018937988556, "learning_rate": 2.2950549045936928e-06, "loss": 0.008742523193359376, "step": 133115 }, { "epoch": 1.151049277567855, "grad_norm": 0.5975164518392412, "learning_rate": 2.2948568776194636e-06, "loss": 0.0771453857421875, "step": 133120 }, { "epoch": 1.1510925110893984, "grad_norm": 8.406906212636505, "learning_rate": 2.2946588538972903e-06, "loss": 0.05542449951171875, "step": 133125 }, { "epoch": 1.1511357446109416, "grad_norm": 15.33004555915908, "learning_rate": 2.2944608334280895e-06, "loss": 0.049512481689453124, "step": 133130 }, { "epoch": 1.1511789781324848, "grad_norm": 0.2082639782725825, "learning_rate": 2.2942628162127734e-06, "loss": 0.011987686157226562, "step": 133135 }, { "epoch": 1.151222211654028, "grad_norm": 4.311394970991838, "learning_rate": 2.2940648022522542e-06, "loss": 0.028443145751953124, "step": 133140 }, { "epoch": 1.1512654451755713, "grad_norm": 2.130011519692306, "learning_rate": 2.2938667915474455e-06, "loss": 0.025562286376953125, "step": 133145 }, { "epoch": 1.1513086786971145, "grad_norm": 5.688784194466287, "learning_rate": 2.2936687840992613e-06, "loss": 0.01635589599609375, "step": 133150 }, { "epoch": 1.151351912218658, "grad_norm": 1.7731363787761945, "learning_rate": 2.293470779908613e-06, "loss": 0.11892623901367187, "step": 133155 }, { "epoch": 1.1513951457402012, "grad_norm": 7.157753337496547, "learning_rate": 2.2932727789764162e-06, "loss": 0.4181732177734375, "step": 133160 }, { "epoch": 1.1514383792617444, "grad_norm": 2.0238904212514424, "learning_rate": 2.293074781303583e-06, "loss": 0.07853775024414063, "step": 133165 }, { "epoch": 1.1514816127832876, "grad_norm": 0.4711923033775116, "learning_rate": 2.292876786891026e-06, "loss": 0.09212646484375, "step": 133170 }, { "epoch": 1.1515248463048309, "grad_norm": 4.210003529629832, "learning_rate": 2.292678795739659e-06, "loss": 0.070025634765625, "step": 133175 }, { "epoch": 1.151568079826374, "grad_norm": 20.593801687855972, "learning_rate": 2.2924808078503953e-06, "loss": 0.05062942504882813, "step": 133180 }, { "epoch": 1.1516113133479173, "grad_norm": 9.114253822224725, "learning_rate": 2.2922828232241475e-06, "loss": 0.21260833740234375, "step": 133185 }, { "epoch": 1.1516545468694608, "grad_norm": 8.584386648311563, "learning_rate": 2.2920848418618272e-06, "loss": 0.06417388916015625, "step": 133190 }, { "epoch": 1.151697780391004, "grad_norm": 2.677384651642117, "learning_rate": 2.2918868637643506e-06, "loss": 0.042939376831054685, "step": 133195 }, { "epoch": 1.1517410139125472, "grad_norm": 3.3330302936457668, "learning_rate": 2.2916888889326294e-06, "loss": 0.2870819091796875, "step": 133200 }, { "epoch": 1.1517842474340905, "grad_norm": 2.1935895903498093, "learning_rate": 2.2914909173675754e-06, "loss": 0.06695938110351562, "step": 133205 }, { "epoch": 1.1518274809556337, "grad_norm": 20.809967451151948, "learning_rate": 2.2912929490701035e-06, "loss": 0.1608795166015625, "step": 133210 }, { "epoch": 1.151870714477177, "grad_norm": 24.69606230059597, "learning_rate": 2.291094984041126e-06, "loss": 0.07593307495117188, "step": 133215 }, { "epoch": 1.1519139479987204, "grad_norm": 43.42000137769734, "learning_rate": 2.2908970222815545e-06, "loss": 0.127880859375, "step": 133220 }, { "epoch": 1.1519571815202636, "grad_norm": 3.513044678009496, "learning_rate": 2.290699063792304e-06, "loss": 0.00974273681640625, "step": 133225 }, { "epoch": 1.1520004150418068, "grad_norm": 8.70298574336247, "learning_rate": 2.2905011085742876e-06, "loss": 0.07599868774414062, "step": 133230 }, { "epoch": 1.15204364856335, "grad_norm": 5.318609056064911, "learning_rate": 2.290303156628416e-06, "loss": 0.04983673095703125, "step": 133235 }, { "epoch": 1.1520868820848933, "grad_norm": 0.11721957741534579, "learning_rate": 2.2901052079556046e-06, "loss": 0.03460769653320313, "step": 133240 }, { "epoch": 1.1521301156064365, "grad_norm": 5.1250004689157445, "learning_rate": 2.2899072625567655e-06, "loss": 0.037885284423828124, "step": 133245 }, { "epoch": 1.1521733491279798, "grad_norm": 0.519044420471116, "learning_rate": 2.28970932043281e-06, "loss": 0.042919921875, "step": 133250 }, { "epoch": 1.1522165826495232, "grad_norm": 39.53261248707634, "learning_rate": 2.289511381584653e-06, "loss": 0.40022125244140627, "step": 133255 }, { "epoch": 1.1522598161710664, "grad_norm": 14.438097840765916, "learning_rate": 2.2893134460132066e-06, "loss": 0.15410957336425782, "step": 133260 }, { "epoch": 1.1523030496926097, "grad_norm": 25.58536645565639, "learning_rate": 2.2891155137193843e-06, "loss": 0.06847286224365234, "step": 133265 }, { "epoch": 1.1523462832141529, "grad_norm": 4.420101646777669, "learning_rate": 2.2889175847040987e-06, "loss": 0.12948532104492189, "step": 133270 }, { "epoch": 1.1523895167356961, "grad_norm": 2.7156477967436286, "learning_rate": 2.288719658968262e-06, "loss": 0.1974761962890625, "step": 133275 }, { "epoch": 1.1524327502572396, "grad_norm": 0.6463583053204526, "learning_rate": 2.2885217365127877e-06, "loss": 0.303851318359375, "step": 133280 }, { "epoch": 1.1524759837787828, "grad_norm": 26.028923781066332, "learning_rate": 2.2883238173385867e-06, "loss": 0.0677154541015625, "step": 133285 }, { "epoch": 1.152519217300326, "grad_norm": 6.532573459467697, "learning_rate": 2.288125901446574e-06, "loss": 0.052945709228515624, "step": 133290 }, { "epoch": 1.1525624508218693, "grad_norm": 5.063304135954993, "learning_rate": 2.2879279888376627e-06, "loss": 0.029395294189453126, "step": 133295 }, { "epoch": 1.1526056843434125, "grad_norm": 0.2714012555012003, "learning_rate": 2.2877300795127646e-06, "loss": 0.003974151611328125, "step": 133300 }, { "epoch": 1.1526489178649557, "grad_norm": 9.663130195784673, "learning_rate": 2.287532173472792e-06, "loss": 0.03246002197265625, "step": 133305 }, { "epoch": 1.152692151386499, "grad_norm": 2.9442936290729977, "learning_rate": 2.2873342707186576e-06, "loss": 0.19929351806640624, "step": 133310 }, { "epoch": 1.1527353849080424, "grad_norm": 6.963213289134046, "learning_rate": 2.287136371251275e-06, "loss": 0.027936553955078124, "step": 133315 }, { "epoch": 1.1527786184295856, "grad_norm": 2.1055309794552635, "learning_rate": 2.2869384750715556e-06, "loss": 0.0787689208984375, "step": 133320 }, { "epoch": 1.1528218519511289, "grad_norm": 4.556189896232125, "learning_rate": 2.2867405821804143e-06, "loss": 0.032862091064453126, "step": 133325 }, { "epoch": 1.152865085472672, "grad_norm": 24.38004294878569, "learning_rate": 2.2865426925787623e-06, "loss": 0.2028076171875, "step": 133330 }, { "epoch": 1.1529083189942153, "grad_norm": 0.8768537769166725, "learning_rate": 2.286344806267512e-06, "loss": 0.03723983764648438, "step": 133335 }, { "epoch": 1.1529515525157585, "grad_norm": 4.624622116115057, "learning_rate": 2.2861469232475764e-06, "loss": 0.050191497802734374, "step": 133340 }, { "epoch": 1.152994786037302, "grad_norm": 6.754840225343011, "learning_rate": 2.2859490435198675e-06, "loss": 0.07665328979492188, "step": 133345 }, { "epoch": 1.1530380195588452, "grad_norm": 2.970660935013993, "learning_rate": 2.285751167085299e-06, "loss": 0.039347267150878905, "step": 133350 }, { "epoch": 1.1530812530803884, "grad_norm": 3.1579040900878437, "learning_rate": 2.285553293944783e-06, "loss": 0.12161636352539062, "step": 133355 }, { "epoch": 1.1531244866019317, "grad_norm": 7.344201468470769, "learning_rate": 2.2853554240992327e-06, "loss": 0.019295883178710938, "step": 133360 }, { "epoch": 1.153167720123475, "grad_norm": 2.96677123238546, "learning_rate": 2.28515755754956e-06, "loss": 0.04650115966796875, "step": 133365 }, { "epoch": 1.1532109536450181, "grad_norm": 1.7783802470528536, "learning_rate": 2.284959694296676e-06, "loss": 0.022202301025390624, "step": 133370 }, { "epoch": 1.1532541871665614, "grad_norm": 1.3245059522077185, "learning_rate": 2.284761834341496e-06, "loss": 0.04473114013671875, "step": 133375 }, { "epoch": 1.1532974206881048, "grad_norm": 5.617903065210842, "learning_rate": 2.28456397768493e-06, "loss": 0.1925069808959961, "step": 133380 }, { "epoch": 1.153340654209648, "grad_norm": 1.4055137003630274, "learning_rate": 2.2843661243278933e-06, "loss": 0.17812004089355468, "step": 133385 }, { "epoch": 1.1533838877311913, "grad_norm": 2.773298774248741, "learning_rate": 2.284168274271296e-06, "loss": 0.25687408447265625, "step": 133390 }, { "epoch": 1.1534271212527345, "grad_norm": 3.902638597537801, "learning_rate": 2.283970427516052e-06, "loss": 0.2743980407714844, "step": 133395 }, { "epoch": 1.1534703547742777, "grad_norm": 0.6556673631990191, "learning_rate": 2.2837725840630723e-06, "loss": 0.01732025146484375, "step": 133400 }, { "epoch": 1.153513588295821, "grad_norm": 17.3631777585206, "learning_rate": 2.283574743913271e-06, "loss": 0.13487014770507813, "step": 133405 }, { "epoch": 1.1535568218173644, "grad_norm": 1.9782555737236809, "learning_rate": 2.2833769070675583e-06, "loss": 0.035243988037109375, "step": 133410 }, { "epoch": 1.1536000553389076, "grad_norm": 3.9661030143766838, "learning_rate": 2.283179073526849e-06, "loss": 0.036467361450195315, "step": 133415 }, { "epoch": 1.1536432888604509, "grad_norm": 34.60213516823001, "learning_rate": 2.2829812432920547e-06, "loss": 0.1287353515625, "step": 133420 }, { "epoch": 1.153686522381994, "grad_norm": 1.3019005882152177, "learning_rate": 2.2827834163640873e-06, "loss": 0.04193801879882812, "step": 133425 }, { "epoch": 1.1537297559035373, "grad_norm": 6.462695375610554, "learning_rate": 2.282585592743859e-06, "loss": 0.11549739837646485, "step": 133430 }, { "epoch": 1.1537729894250806, "grad_norm": 0.4643428739048805, "learning_rate": 2.2823877724322833e-06, "loss": 0.02983245849609375, "step": 133435 }, { "epoch": 1.1538162229466238, "grad_norm": 9.656491395605492, "learning_rate": 2.28218995543027e-06, "loss": 0.1073822021484375, "step": 133440 }, { "epoch": 1.1538594564681672, "grad_norm": 0.04765018371023662, "learning_rate": 2.281992141738735e-06, "loss": 0.029818344116210937, "step": 133445 }, { "epoch": 1.1539026899897105, "grad_norm": 8.235827558035426, "learning_rate": 2.281794331358588e-06, "loss": 0.06643829345703126, "step": 133450 }, { "epoch": 1.1539459235112537, "grad_norm": 8.695767774154557, "learning_rate": 2.281596524290743e-06, "loss": 0.02590808868408203, "step": 133455 }, { "epoch": 1.153989157032797, "grad_norm": 35.029836171674276, "learning_rate": 2.2813987205361103e-06, "loss": 0.11340179443359374, "step": 133460 }, { "epoch": 1.1540323905543401, "grad_norm": 7.242002799294441, "learning_rate": 2.2812009200956044e-06, "loss": 0.03209419250488281, "step": 133465 }, { "epoch": 1.1540756240758836, "grad_norm": 0.7051169677892093, "learning_rate": 2.281003122970136e-06, "loss": 0.048996353149414064, "step": 133470 }, { "epoch": 1.1541188575974268, "grad_norm": 2.1446085043446046, "learning_rate": 2.280805329160616e-06, "loss": 0.07130661010742187, "step": 133475 }, { "epoch": 1.15416209111897, "grad_norm": 1.62124237012239, "learning_rate": 2.28060753866796e-06, "loss": 0.1868194580078125, "step": 133480 }, { "epoch": 1.1542053246405133, "grad_norm": 1.3923260227601386, "learning_rate": 2.2804097514930775e-06, "loss": 0.0121368408203125, "step": 133485 }, { "epoch": 1.1542485581620565, "grad_norm": 0.35816084951730665, "learning_rate": 2.280211967636882e-06, "loss": 0.013489913940429688, "step": 133490 }, { "epoch": 1.1542917916835997, "grad_norm": 14.883598581548323, "learning_rate": 2.280014187100286e-06, "loss": 0.094384765625, "step": 133495 }, { "epoch": 1.154335025205143, "grad_norm": 7.81683382198366, "learning_rate": 2.279816409884201e-06, "loss": 0.03819122314453125, "step": 133500 }, { "epoch": 1.1543782587266862, "grad_norm": 4.101930984311541, "learning_rate": 2.2796186359895375e-06, "loss": 0.1471405029296875, "step": 133505 }, { "epoch": 1.1544214922482297, "grad_norm": 1.6878312302954113, "learning_rate": 2.2794208654172102e-06, "loss": 0.0669342041015625, "step": 133510 }, { "epoch": 1.1544647257697729, "grad_norm": 8.191319617828718, "learning_rate": 2.2792230981681295e-06, "loss": 0.08760604858398438, "step": 133515 }, { "epoch": 1.154507959291316, "grad_norm": 0.42456859958919296, "learning_rate": 2.2790253342432093e-06, "loss": 0.05368194580078125, "step": 133520 }, { "epoch": 1.1545511928128593, "grad_norm": 21.954439568604524, "learning_rate": 2.2788275736433606e-06, "loss": 0.09207115173339844, "step": 133525 }, { "epoch": 1.1545944263344026, "grad_norm": 6.175026536435297, "learning_rate": 2.278629816369495e-06, "loss": 0.04440231323242187, "step": 133530 }, { "epoch": 1.154637659855946, "grad_norm": 35.76486466163445, "learning_rate": 2.2784320624225247e-06, "loss": 0.2220602035522461, "step": 133535 }, { "epoch": 1.1546808933774892, "grad_norm": 0.6937071603038512, "learning_rate": 2.2782343118033617e-06, "loss": 0.0230377197265625, "step": 133540 }, { "epoch": 1.1547241268990325, "grad_norm": 1.3937218084099092, "learning_rate": 2.2780365645129184e-06, "loss": 0.025376129150390624, "step": 133545 }, { "epoch": 1.1547673604205757, "grad_norm": 8.308009574015276, "learning_rate": 2.2778388205521073e-06, "loss": 0.08209075927734374, "step": 133550 }, { "epoch": 1.154810593942119, "grad_norm": 3.9866171840884763, "learning_rate": 2.2776410799218396e-06, "loss": 0.079132080078125, "step": 133555 }, { "epoch": 1.1548538274636622, "grad_norm": 40.152871383931625, "learning_rate": 2.2774433426230278e-06, "loss": 0.09819564819335938, "step": 133560 }, { "epoch": 1.1548970609852054, "grad_norm": 0.15046891293632783, "learning_rate": 2.2772456086565825e-06, "loss": 0.10331382751464843, "step": 133565 }, { "epoch": 1.1549402945067488, "grad_norm": 3.3269765100511854, "learning_rate": 2.2770478780234164e-06, "loss": 0.02723236083984375, "step": 133570 }, { "epoch": 1.154983528028292, "grad_norm": 7.609600822771012, "learning_rate": 2.276850150724442e-06, "loss": 0.0224365234375, "step": 133575 }, { "epoch": 1.1550267615498353, "grad_norm": 2.084493891628164, "learning_rate": 2.2766524267605714e-06, "loss": 0.05703544616699219, "step": 133580 }, { "epoch": 1.1550699950713785, "grad_norm": 1.6238052421817386, "learning_rate": 2.2764547061327156e-06, "loss": 0.05396881103515625, "step": 133585 }, { "epoch": 1.1551132285929218, "grad_norm": 1.6958149406070204, "learning_rate": 2.276256988841787e-06, "loss": 0.07879180908203125, "step": 133590 }, { "epoch": 1.155156462114465, "grad_norm": 3.425347293116939, "learning_rate": 2.2760592748886968e-06, "loss": 0.06844024658203125, "step": 133595 }, { "epoch": 1.1551996956360084, "grad_norm": 2.3564193295104348, "learning_rate": 2.2758615642743565e-06, "loss": 0.016841888427734375, "step": 133600 }, { "epoch": 1.1552429291575517, "grad_norm": 1.5716918970500868, "learning_rate": 2.275663856999679e-06, "loss": 0.0220245361328125, "step": 133605 }, { "epoch": 1.155286162679095, "grad_norm": 16.686377291590134, "learning_rate": 2.275466153065577e-06, "loss": 0.07471160888671875, "step": 133610 }, { "epoch": 1.1553293962006381, "grad_norm": 4.896176176870397, "learning_rate": 2.2752684524729607e-06, "loss": 0.1590496063232422, "step": 133615 }, { "epoch": 1.1553726297221814, "grad_norm": 1.9568767675569139, "learning_rate": 2.275070755222742e-06, "loss": 0.17035980224609376, "step": 133620 }, { "epoch": 1.1554158632437246, "grad_norm": 0.18215646773995756, "learning_rate": 2.2748730613158328e-06, "loss": 0.06443595886230469, "step": 133625 }, { "epoch": 1.1554590967652678, "grad_norm": 0.7577339820198005, "learning_rate": 2.274675370753145e-06, "loss": 0.10046844482421875, "step": 133630 }, { "epoch": 1.1555023302868113, "grad_norm": 12.813902085856137, "learning_rate": 2.274477683535589e-06, "loss": 0.1617218017578125, "step": 133635 }, { "epoch": 1.1555455638083545, "grad_norm": 6.30459485044766, "learning_rate": 2.2742799996640797e-06, "loss": 0.08319892883300781, "step": 133640 }, { "epoch": 1.1555887973298977, "grad_norm": 1.962744511432475, "learning_rate": 2.2740823191395265e-06, "loss": 0.0554443359375, "step": 133645 }, { "epoch": 1.155632030851441, "grad_norm": 0.2515305365133615, "learning_rate": 2.273884641962842e-06, "loss": 0.1600311279296875, "step": 133650 }, { "epoch": 1.1556752643729842, "grad_norm": 2.8730036042362577, "learning_rate": 2.273686968134936e-06, "loss": 0.438140869140625, "step": 133655 }, { "epoch": 1.1557184978945274, "grad_norm": 1.689963251445996, "learning_rate": 2.2734892976567224e-06, "loss": 0.02655792236328125, "step": 133660 }, { "epoch": 1.1557617314160709, "grad_norm": 0.10667419144847853, "learning_rate": 2.2732916305291112e-06, "loss": 0.055119657516479494, "step": 133665 }, { "epoch": 1.155804964937614, "grad_norm": 1.9717339067822612, "learning_rate": 2.2730939667530155e-06, "loss": 0.0881134033203125, "step": 133670 }, { "epoch": 1.1558481984591573, "grad_norm": 2.099964971179677, "learning_rate": 2.2728963063293466e-06, "loss": 0.081219482421875, "step": 133675 }, { "epoch": 1.1558914319807005, "grad_norm": 1.7711954409456672, "learning_rate": 2.272698649259015e-06, "loss": 0.019237518310546875, "step": 133680 }, { "epoch": 1.1559346655022438, "grad_norm": 22.32069269456446, "learning_rate": 2.272500995542933e-06, "loss": 0.06953887939453125, "step": 133685 }, { "epoch": 1.155977899023787, "grad_norm": 58.263080556590424, "learning_rate": 2.2723033451820126e-06, "loss": 0.21615104675292968, "step": 133690 }, { "epoch": 1.1560211325453302, "grad_norm": 1.5778276295174705, "learning_rate": 2.2721056981771637e-06, "loss": 0.033222389221191403, "step": 133695 }, { "epoch": 1.1560643660668737, "grad_norm": 6.326188475609238, "learning_rate": 2.2719080545293e-06, "loss": 0.11496849060058593, "step": 133700 }, { "epoch": 1.156107599588417, "grad_norm": 6.793361761970731, "learning_rate": 2.271710414239332e-06, "loss": 0.07210311889648438, "step": 133705 }, { "epoch": 1.1561508331099601, "grad_norm": 1.0716161296575692, "learning_rate": 2.2715127773081708e-06, "loss": 0.020539093017578124, "step": 133710 }, { "epoch": 1.1561940666315034, "grad_norm": 6.714092183889296, "learning_rate": 2.271315143736729e-06, "loss": 0.03129730224609375, "step": 133715 }, { "epoch": 1.1562373001530466, "grad_norm": 2.834235539359804, "learning_rate": 2.2711175135259173e-06, "loss": 0.104150390625, "step": 133720 }, { "epoch": 1.15628053367459, "grad_norm": 0.3580949124290805, "learning_rate": 2.2709198866766473e-06, "loss": 0.0156280517578125, "step": 133725 }, { "epoch": 1.1563237671961333, "grad_norm": 5.701344284153593, "learning_rate": 2.2707222631898287e-06, "loss": 0.1511138916015625, "step": 133730 }, { "epoch": 1.1563670007176765, "grad_norm": 1.6106052574913396, "learning_rate": 2.270524643066376e-06, "loss": 0.06678390502929688, "step": 133735 }, { "epoch": 1.1564102342392197, "grad_norm": 16.570961763774918, "learning_rate": 2.2703270263071986e-06, "loss": 0.10121612548828125, "step": 133740 }, { "epoch": 1.156453467760763, "grad_norm": 0.41758954535399123, "learning_rate": 2.270129412913209e-06, "loss": 0.0124237060546875, "step": 133745 }, { "epoch": 1.1564967012823062, "grad_norm": 9.158952886606262, "learning_rate": 2.2699318028853184e-06, "loss": 0.029180145263671874, "step": 133750 }, { "epoch": 1.1565399348038494, "grad_norm": 27.134811398256655, "learning_rate": 2.2697341962244375e-06, "loss": 0.04898223876953125, "step": 133755 }, { "epoch": 1.1565831683253927, "grad_norm": 0.15958730648252306, "learning_rate": 2.269536592931477e-06, "loss": 0.1096710205078125, "step": 133760 }, { "epoch": 1.156626401846936, "grad_norm": 17.257821444175534, "learning_rate": 2.2693389930073505e-06, "loss": 0.150958251953125, "step": 133765 }, { "epoch": 1.1566696353684793, "grad_norm": 2.0466994298600447, "learning_rate": 2.2691413964529668e-06, "loss": 0.055254364013671876, "step": 133770 }, { "epoch": 1.1567128688900226, "grad_norm": 6.373401143496399, "learning_rate": 2.2689438032692393e-06, "loss": 0.2199859619140625, "step": 133775 }, { "epoch": 1.1567561024115658, "grad_norm": 0.387129275313566, "learning_rate": 2.2687462134570787e-06, "loss": 0.014337921142578125, "step": 133780 }, { "epoch": 1.156799335933109, "grad_norm": 21.30892547327635, "learning_rate": 2.268548627017396e-06, "loss": 0.16899032592773439, "step": 133785 }, { "epoch": 1.1568425694546525, "grad_norm": 2.2553026261346476, "learning_rate": 2.2683510439511004e-06, "loss": 0.28838043212890624, "step": 133790 }, { "epoch": 1.1568858029761957, "grad_norm": 3.823482578315964, "learning_rate": 2.268153464259107e-06, "loss": 0.02581939697265625, "step": 133795 }, { "epoch": 1.156929036497739, "grad_norm": 5.639098052422422, "learning_rate": 2.2679558879423245e-06, "loss": 0.114361572265625, "step": 133800 }, { "epoch": 1.1569722700192822, "grad_norm": 0.3029178083548912, "learning_rate": 2.267758315001665e-06, "loss": 0.02944488525390625, "step": 133805 }, { "epoch": 1.1570155035408254, "grad_norm": 2.8202813454913285, "learning_rate": 2.2675607454380396e-06, "loss": 0.01835289001464844, "step": 133810 }, { "epoch": 1.1570587370623686, "grad_norm": 5.942529918119508, "learning_rate": 2.26736317925236e-06, "loss": 0.079888916015625, "step": 133815 }, { "epoch": 1.1571019705839118, "grad_norm": 29.652756351240182, "learning_rate": 2.2671656164455354e-06, "loss": 0.0947052001953125, "step": 133820 }, { "epoch": 1.1571452041054553, "grad_norm": 40.30471260556235, "learning_rate": 2.266968057018478e-06, "loss": 0.0875946044921875, "step": 133825 }, { "epoch": 1.1571884376269985, "grad_norm": 2.192009648769222, "learning_rate": 2.2667705009720997e-06, "loss": 0.01610107421875, "step": 133830 }, { "epoch": 1.1572316711485418, "grad_norm": 1.3563357288278124, "learning_rate": 2.2665729483073113e-06, "loss": 0.09295883178710937, "step": 133835 }, { "epoch": 1.157274904670085, "grad_norm": 2.7494152476407976, "learning_rate": 2.2663753990250237e-06, "loss": 0.06201381683349609, "step": 133840 }, { "epoch": 1.1573181381916282, "grad_norm": 2.299293605883273, "learning_rate": 2.2661778531261482e-06, "loss": 0.01560821533203125, "step": 133845 }, { "epoch": 1.1573613717131714, "grad_norm": 1.533252331967905, "learning_rate": 2.2659803106115946e-06, "loss": 0.08483963012695313, "step": 133850 }, { "epoch": 1.157404605234715, "grad_norm": 6.749673138842369, "learning_rate": 2.265782771482275e-06, "loss": 0.0852081298828125, "step": 133855 }, { "epoch": 1.1574478387562581, "grad_norm": 9.532436642633318, "learning_rate": 2.265585235739101e-06, "loss": 0.018894195556640625, "step": 133860 }, { "epoch": 1.1574910722778013, "grad_norm": 1.4399859582139565, "learning_rate": 2.265387703382983e-06, "loss": 0.12791519165039061, "step": 133865 }, { "epoch": 1.1575343057993446, "grad_norm": 25.40323272218827, "learning_rate": 2.2651901744148323e-06, "loss": 0.20026016235351562, "step": 133870 }, { "epoch": 1.1575775393208878, "grad_norm": 19.415636396868873, "learning_rate": 2.2649926488355594e-06, "loss": 0.12335205078125, "step": 133875 }, { "epoch": 1.157620772842431, "grad_norm": 2.416599308763879, "learning_rate": 2.2647951266460746e-06, "loss": 0.04187469482421875, "step": 133880 }, { "epoch": 1.1576640063639743, "grad_norm": 5.855834194079354, "learning_rate": 2.264597607847291e-06, "loss": 0.03190193176269531, "step": 133885 }, { "epoch": 1.1577072398855177, "grad_norm": 30.633952635639208, "learning_rate": 2.264400092440117e-06, "loss": 0.16215858459472657, "step": 133890 }, { "epoch": 1.157750473407061, "grad_norm": 11.223356582800456, "learning_rate": 2.264202580425465e-06, "loss": 0.15989189147949218, "step": 133895 }, { "epoch": 1.1577937069286042, "grad_norm": 51.01092746966548, "learning_rate": 2.2640050718042465e-06, "loss": 0.08603515625, "step": 133900 }, { "epoch": 1.1578369404501474, "grad_norm": 11.232999637778185, "learning_rate": 2.2638075665773715e-06, "loss": 0.0715576171875, "step": 133905 }, { "epoch": 1.1578801739716906, "grad_norm": 5.48242769063063, "learning_rate": 2.26361006474575e-06, "loss": 0.07428569793701172, "step": 133910 }, { "epoch": 1.1579234074932339, "grad_norm": 7.157772276951832, "learning_rate": 2.263412566310295e-06, "loss": 0.1029815673828125, "step": 133915 }, { "epoch": 1.1579666410147773, "grad_norm": 0.2530554351620788, "learning_rate": 2.2632150712719144e-06, "loss": 0.06466217041015625, "step": 133920 }, { "epoch": 1.1580098745363205, "grad_norm": 2.092557059686923, "learning_rate": 2.2630175796315225e-06, "loss": 0.104736328125, "step": 133925 }, { "epoch": 1.1580531080578638, "grad_norm": 0.30910189945404365, "learning_rate": 2.262820091390028e-06, "loss": 0.11269989013671874, "step": 133930 }, { "epoch": 1.158096341579407, "grad_norm": 58.608704155085775, "learning_rate": 2.2626226065483414e-06, "loss": 0.165869140625, "step": 133935 }, { "epoch": 1.1581395751009502, "grad_norm": 10.42185768136698, "learning_rate": 2.262425125107375e-06, "loss": 0.02159576416015625, "step": 133940 }, { "epoch": 1.1581828086224935, "grad_norm": 6.961259686073282, "learning_rate": 2.262227647068039e-06, "loss": 0.08582763671875, "step": 133945 }, { "epoch": 1.1582260421440367, "grad_norm": 4.570618130656307, "learning_rate": 2.262030172431242e-06, "loss": 0.10697402954101562, "step": 133950 }, { "epoch": 1.1582692756655801, "grad_norm": 1.2510093005679255, "learning_rate": 2.2618327011978987e-06, "loss": 0.02993621826171875, "step": 133955 }, { "epoch": 1.1583125091871234, "grad_norm": 42.087076236123295, "learning_rate": 2.261635233368917e-06, "loss": 0.22702484130859374, "step": 133960 }, { "epoch": 1.1583557427086666, "grad_norm": 6.635902743572, "learning_rate": 2.261437768945208e-06, "loss": 0.0937225341796875, "step": 133965 }, { "epoch": 1.1583989762302098, "grad_norm": 0.4990105366828453, "learning_rate": 2.261240307927684e-06, "loss": 0.02244434356689453, "step": 133970 }, { "epoch": 1.158442209751753, "grad_norm": 11.276556883411335, "learning_rate": 2.2610428503172535e-06, "loss": 0.09365596771240234, "step": 133975 }, { "epoch": 1.1584854432732965, "grad_norm": 0.45874511041046767, "learning_rate": 2.260845396114829e-06, "loss": 0.00360870361328125, "step": 133980 }, { "epoch": 1.1585286767948397, "grad_norm": 15.82211943448202, "learning_rate": 2.260647945321318e-06, "loss": 0.05277099609375, "step": 133985 }, { "epoch": 1.158571910316383, "grad_norm": 31.180534674141164, "learning_rate": 2.260450497937635e-06, "loss": 0.24921875, "step": 133990 }, { "epoch": 1.1586151438379262, "grad_norm": 1.2050170405422405, "learning_rate": 2.2602530539646884e-06, "loss": 0.09048652648925781, "step": 133995 }, { "epoch": 1.1586583773594694, "grad_norm": 1.7791095194143165, "learning_rate": 2.26005561340339e-06, "loss": 0.03601760864257812, "step": 134000 }, { "epoch": 1.1587016108810126, "grad_norm": 6.992236453381495, "learning_rate": 2.2598581762546495e-06, "loss": 0.051943206787109376, "step": 134005 }, { "epoch": 1.1587448444025559, "grad_norm": 79.06406306149874, "learning_rate": 2.259660742519378e-06, "loss": 0.045934295654296874, "step": 134010 }, { "epoch": 1.1587880779240993, "grad_norm": 3.1515947588588475, "learning_rate": 2.2594633121984844e-06, "loss": 0.02334747314453125, "step": 134015 }, { "epoch": 1.1588313114456426, "grad_norm": 4.204501214674971, "learning_rate": 2.259265885292882e-06, "loss": 0.03329391479492187, "step": 134020 }, { "epoch": 1.1588745449671858, "grad_norm": 6.665529667446314, "learning_rate": 2.259068461803479e-06, "loss": 0.05425872802734375, "step": 134025 }, { "epoch": 1.158917778488729, "grad_norm": 16.64593572881307, "learning_rate": 2.258871041731187e-06, "loss": 0.03234100341796875, "step": 134030 }, { "epoch": 1.1589610120102722, "grad_norm": 2.530155081969253, "learning_rate": 2.258673625076917e-06, "loss": 0.14989910125732422, "step": 134035 }, { "epoch": 1.1590042455318155, "grad_norm": 0.7099026993920547, "learning_rate": 2.258476211841578e-06, "loss": 0.1185638427734375, "step": 134040 }, { "epoch": 1.159047479053359, "grad_norm": 0.5675441355989223, "learning_rate": 2.2582788020260806e-06, "loss": 0.24899139404296874, "step": 134045 }, { "epoch": 1.1590907125749021, "grad_norm": 9.079082225982834, "learning_rate": 2.258081395631337e-06, "loss": 0.17288742065429688, "step": 134050 }, { "epoch": 1.1591339460964454, "grad_norm": 4.412149594350276, "learning_rate": 2.2578839926582555e-06, "loss": 0.1032012939453125, "step": 134055 }, { "epoch": 1.1591771796179886, "grad_norm": 0.07450179650238097, "learning_rate": 2.257686593107748e-06, "loss": 0.06752300262451172, "step": 134060 }, { "epoch": 1.1592204131395318, "grad_norm": 0.2635764475327676, "learning_rate": 2.2574891969807246e-06, "loss": 0.029701614379882814, "step": 134065 }, { "epoch": 1.159263646661075, "grad_norm": 15.295979950889711, "learning_rate": 2.2572918042780956e-06, "loss": 0.1370269775390625, "step": 134070 }, { "epoch": 1.1593068801826183, "grad_norm": 1.0690755078080116, "learning_rate": 2.2570944150007705e-06, "loss": 0.018491363525390624, "step": 134075 }, { "epoch": 1.1593501137041617, "grad_norm": 9.493675883201442, "learning_rate": 2.25689702914966e-06, "loss": 0.17321128845214845, "step": 134080 }, { "epoch": 1.159393347225705, "grad_norm": 3.9139266271845727, "learning_rate": 2.256699646725675e-06, "loss": 0.17313499450683595, "step": 134085 }, { "epoch": 1.1594365807472482, "grad_norm": 7.194115329031797, "learning_rate": 2.256502267729726e-06, "loss": 0.1210174560546875, "step": 134090 }, { "epoch": 1.1594798142687914, "grad_norm": 2.2231325429831488, "learning_rate": 2.256304892162723e-06, "loss": 0.15592727661132813, "step": 134095 }, { "epoch": 1.1595230477903347, "grad_norm": 2.7080058019412996, "learning_rate": 2.256107520025576e-06, "loss": 0.07067184448242188, "step": 134100 }, { "epoch": 1.1595662813118779, "grad_norm": 1.3171689057121363, "learning_rate": 2.2559101513191946e-06, "loss": 0.03203401565551758, "step": 134105 }, { "epoch": 1.1596095148334213, "grad_norm": 1.5304259942484344, "learning_rate": 2.2557127860444894e-06, "loss": 0.02929229736328125, "step": 134110 }, { "epoch": 1.1596527483549646, "grad_norm": 6.541379540918804, "learning_rate": 2.2555154242023726e-06, "loss": 0.052009963989257814, "step": 134115 }, { "epoch": 1.1596959818765078, "grad_norm": 47.73030008387264, "learning_rate": 2.2553180657937525e-06, "loss": 0.058101654052734375, "step": 134120 }, { "epoch": 1.159739215398051, "grad_norm": 20.947245995091183, "learning_rate": 2.2551207108195397e-06, "loss": 0.235345458984375, "step": 134125 }, { "epoch": 1.1597824489195943, "grad_norm": 1.0415567316114374, "learning_rate": 2.254923359280644e-06, "loss": 0.09542236328125, "step": 134130 }, { "epoch": 1.1598256824411375, "grad_norm": 27.498459108124116, "learning_rate": 2.2547260111779762e-06, "loss": 0.0780059814453125, "step": 134135 }, { "epoch": 1.1598689159626807, "grad_norm": 36.239132602156346, "learning_rate": 2.2545286665124467e-06, "loss": 0.16152687072753907, "step": 134140 }, { "epoch": 1.1599121494842242, "grad_norm": 34.439828536293156, "learning_rate": 2.2543313252849637e-06, "loss": 0.13512115478515624, "step": 134145 }, { "epoch": 1.1599553830057674, "grad_norm": 16.33569747058649, "learning_rate": 2.2541339874964395e-06, "loss": 0.32981719970703127, "step": 134150 }, { "epoch": 1.1599986165273106, "grad_norm": 10.257216588569568, "learning_rate": 2.253936653147784e-06, "loss": 0.02365875244140625, "step": 134155 }, { "epoch": 1.1600418500488539, "grad_norm": 15.502860822532424, "learning_rate": 2.2537393222399054e-06, "loss": 0.06431999206542968, "step": 134160 }, { "epoch": 1.160085083570397, "grad_norm": 4.731999629466489, "learning_rate": 2.2535419947737165e-06, "loss": 0.01789989471435547, "step": 134165 }, { "epoch": 1.1601283170919405, "grad_norm": 0.2400620569278615, "learning_rate": 2.2533446707501257e-06, "loss": 0.06652297973632812, "step": 134170 }, { "epoch": 1.1601715506134838, "grad_norm": 1.0630396713506147, "learning_rate": 2.253147350170042e-06, "loss": 0.0519287109375, "step": 134175 }, { "epoch": 1.160214784135027, "grad_norm": 5.1657919391607345, "learning_rate": 2.252950033034378e-06, "loss": 0.04139919281005859, "step": 134180 }, { "epoch": 1.1602580176565702, "grad_norm": 25.856214889919745, "learning_rate": 2.2527527193440423e-06, "loss": 0.08375244140625, "step": 134185 }, { "epoch": 1.1603012511781134, "grad_norm": 5.629958289585152, "learning_rate": 2.2525554090999446e-06, "loss": 0.05551424026489258, "step": 134190 }, { "epoch": 1.1603444846996567, "grad_norm": 0.5556069449571588, "learning_rate": 2.252358102302996e-06, "loss": 0.031103515625, "step": 134195 }, { "epoch": 1.1603877182212, "grad_norm": 5.37973241829324, "learning_rate": 2.2521607989541056e-06, "loss": 0.015468788146972657, "step": 134200 }, { "epoch": 1.1604309517427431, "grad_norm": 4.4090596443999495, "learning_rate": 2.251963499054182e-06, "loss": 0.07113113403320312, "step": 134205 }, { "epoch": 1.1604741852642866, "grad_norm": 1.8291288875417044, "learning_rate": 2.2517662026041384e-06, "loss": 0.35985870361328126, "step": 134210 }, { "epoch": 1.1605174187858298, "grad_norm": 29.04964355435419, "learning_rate": 2.2515689096048827e-06, "loss": 0.2092376708984375, "step": 134215 }, { "epoch": 1.160560652307373, "grad_norm": 6.834346655568883, "learning_rate": 2.2513716200573248e-06, "loss": 0.02182159423828125, "step": 134220 }, { "epoch": 1.1606038858289163, "grad_norm": 1.42336536906255, "learning_rate": 2.2511743339623752e-06, "loss": 0.02062091827392578, "step": 134225 }, { "epoch": 1.1606471193504595, "grad_norm": 0.8786884996282535, "learning_rate": 2.2509770513209433e-06, "loss": 0.04801788330078125, "step": 134230 }, { "epoch": 1.160690352872003, "grad_norm": 3.329121167565472, "learning_rate": 2.250779772133939e-06, "loss": 0.037641143798828124, "step": 134235 }, { "epoch": 1.1607335863935462, "grad_norm": 5.162021519479797, "learning_rate": 2.250582496402271e-06, "loss": 0.42841854095458987, "step": 134240 }, { "epoch": 1.1607768199150894, "grad_norm": 46.96739067960662, "learning_rate": 2.2503852241268515e-06, "loss": 0.20804290771484374, "step": 134245 }, { "epoch": 1.1608200534366326, "grad_norm": 21.41670362118157, "learning_rate": 2.2501879553085886e-06, "loss": 0.23464202880859375, "step": 134250 }, { "epoch": 1.1608632869581759, "grad_norm": 4.395104380008601, "learning_rate": 2.249990689948393e-06, "loss": 0.04432830810546875, "step": 134255 }, { "epoch": 1.160906520479719, "grad_norm": 20.491367687351296, "learning_rate": 2.249793428047175e-06, "loss": 0.16364307403564454, "step": 134260 }, { "epoch": 1.1609497540012623, "grad_norm": 1.7072281343449858, "learning_rate": 2.2495961696058424e-06, "loss": 0.1724628448486328, "step": 134265 }, { "epoch": 1.1609929875228058, "grad_norm": 13.627482886768547, "learning_rate": 2.249398914625305e-06, "loss": 0.1355813980102539, "step": 134270 }, { "epoch": 1.161036221044349, "grad_norm": 6.062845916612386, "learning_rate": 2.249201663106475e-06, "loss": 0.06520462036132812, "step": 134275 }, { "epoch": 1.1610794545658922, "grad_norm": 27.61635604941095, "learning_rate": 2.2490044150502596e-06, "loss": 0.18558502197265625, "step": 134280 }, { "epoch": 1.1611226880874355, "grad_norm": 5.2525870205845715, "learning_rate": 2.24880717045757e-06, "loss": 0.061254119873046874, "step": 134285 }, { "epoch": 1.1611659216089787, "grad_norm": 1.2228668955851798, "learning_rate": 2.2486099293293157e-06, "loss": 0.028879165649414062, "step": 134290 }, { "epoch": 1.161209155130522, "grad_norm": 0.46293813012943696, "learning_rate": 2.2484126916664056e-06, "loss": 0.022118473052978517, "step": 134295 }, { "epoch": 1.1612523886520654, "grad_norm": 2.8941254261980585, "learning_rate": 2.248215457469749e-06, "loss": 0.04986352920532226, "step": 134300 }, { "epoch": 1.1612956221736086, "grad_norm": 7.709160062766305, "learning_rate": 2.2480182267402574e-06, "loss": 0.119525146484375, "step": 134305 }, { "epoch": 1.1613388556951518, "grad_norm": 0.16627322534021993, "learning_rate": 2.2478209994788383e-06, "loss": 0.037799072265625, "step": 134310 }, { "epoch": 1.161382089216695, "grad_norm": 5.134723234393926, "learning_rate": 2.247623775686403e-06, "loss": 0.035483169555664065, "step": 134315 }, { "epoch": 1.1614253227382383, "grad_norm": 5.971679973022363, "learning_rate": 2.247426555363861e-06, "loss": 0.09680004119873047, "step": 134320 }, { "epoch": 1.1614685562597815, "grad_norm": 1.5459402332760237, "learning_rate": 2.247229338512121e-06, "loss": 0.012604713439941406, "step": 134325 }, { "epoch": 1.1615117897813247, "grad_norm": 2.3335341034697015, "learning_rate": 2.2470321251320916e-06, "loss": 0.01416168212890625, "step": 134330 }, { "epoch": 1.1615550233028682, "grad_norm": 0.20496892504863154, "learning_rate": 2.246834915224683e-06, "loss": 0.01716270446777344, "step": 134335 }, { "epoch": 1.1615982568244114, "grad_norm": 0.39927508307194054, "learning_rate": 2.246637708790807e-06, "loss": 0.015299224853515625, "step": 134340 }, { "epoch": 1.1616414903459547, "grad_norm": 28.303426110205997, "learning_rate": 2.246440505831371e-06, "loss": 0.059772491455078125, "step": 134345 }, { "epoch": 1.1616847238674979, "grad_norm": 0.05225016540662398, "learning_rate": 2.2462433063472847e-06, "loss": 0.09785995483398438, "step": 134350 }, { "epoch": 1.161727957389041, "grad_norm": 0.22285092851155663, "learning_rate": 2.2460461103394573e-06, "loss": 0.01853523254394531, "step": 134355 }, { "epoch": 1.1617711909105843, "grad_norm": 12.704446304119266, "learning_rate": 2.2458489178087988e-06, "loss": 0.11764450073242187, "step": 134360 }, { "epoch": 1.1618144244321278, "grad_norm": 12.05327024099846, "learning_rate": 2.2456517287562176e-06, "loss": 0.0679290771484375, "step": 134365 }, { "epoch": 1.161857657953671, "grad_norm": 1.4614787482649731, "learning_rate": 2.245454543182625e-06, "loss": 0.12588958740234374, "step": 134370 }, { "epoch": 1.1619008914752142, "grad_norm": 3.668824854102762, "learning_rate": 2.2452573610889297e-06, "loss": 0.19322509765625, "step": 134375 }, { "epoch": 1.1619441249967575, "grad_norm": 0.14333356433827565, "learning_rate": 2.2450601824760407e-06, "loss": 0.07650909423828126, "step": 134380 }, { "epoch": 1.1619873585183007, "grad_norm": 40.86201916048913, "learning_rate": 2.2448630073448666e-06, "loss": 0.16491546630859374, "step": 134385 }, { "epoch": 1.162030592039844, "grad_norm": 13.672213703551586, "learning_rate": 2.2446658356963184e-06, "loss": 0.12009468078613281, "step": 134390 }, { "epoch": 1.1620738255613872, "grad_norm": 4.2342886138870925, "learning_rate": 2.244468667531303e-06, "loss": 0.09096603393554688, "step": 134395 }, { "epoch": 1.1621170590829306, "grad_norm": 2.248531197652303, "learning_rate": 2.2442715028507332e-06, "loss": 0.014521026611328125, "step": 134400 }, { "epoch": 1.1621602926044738, "grad_norm": 2.158063908510092, "learning_rate": 2.2440743416555156e-06, "loss": 0.045309829711914065, "step": 134405 }, { "epoch": 1.162203526126017, "grad_norm": 1.0708150480966074, "learning_rate": 2.2438771839465607e-06, "loss": 0.18060379028320311, "step": 134410 }, { "epoch": 1.1622467596475603, "grad_norm": 6.5420392800461995, "learning_rate": 2.2436800297247774e-06, "loss": 0.02018890380859375, "step": 134415 }, { "epoch": 1.1622899931691035, "grad_norm": 6.195346545967367, "learning_rate": 2.243482878991075e-06, "loss": 0.104736328125, "step": 134420 }, { "epoch": 1.162333226690647, "grad_norm": 0.8200216322570147, "learning_rate": 2.243285731746363e-06, "loss": 0.015525054931640626, "step": 134425 }, { "epoch": 1.1623764602121902, "grad_norm": 2.7409451306551573, "learning_rate": 2.2430885879915486e-06, "loss": 0.0684539794921875, "step": 134430 }, { "epoch": 1.1624196937337334, "grad_norm": 1.0059139543931084, "learning_rate": 2.2428914477275447e-06, "loss": 0.014942550659179687, "step": 134435 }, { "epoch": 1.1624629272552767, "grad_norm": 0.07797216723946593, "learning_rate": 2.242694310955258e-06, "loss": 0.029413414001464844, "step": 134440 }, { "epoch": 1.16250616077682, "grad_norm": 6.742145617901043, "learning_rate": 2.2424971776755977e-06, "loss": 0.043994140625, "step": 134445 }, { "epoch": 1.1625493942983631, "grad_norm": 0.07576349026304403, "learning_rate": 2.242300047889474e-06, "loss": 0.03033313751220703, "step": 134450 }, { "epoch": 1.1625926278199064, "grad_norm": 0.6483701224682931, "learning_rate": 2.242102921597796e-06, "loss": 0.036936187744140626, "step": 134455 }, { "epoch": 1.1626358613414496, "grad_norm": 7.990677303530469, "learning_rate": 2.2419057988014708e-06, "loss": 0.16938629150390624, "step": 134460 }, { "epoch": 1.162679094862993, "grad_norm": 0.892700106796687, "learning_rate": 2.2417086795014104e-06, "loss": 0.055621337890625, "step": 134465 }, { "epoch": 1.1627223283845363, "grad_norm": 4.455274920949106, "learning_rate": 2.2415115636985224e-06, "loss": 0.08957138061523437, "step": 134470 }, { "epoch": 1.1627655619060795, "grad_norm": 0.2345228180497521, "learning_rate": 2.2413144513937154e-06, "loss": 0.12932891845703126, "step": 134475 }, { "epoch": 1.1628087954276227, "grad_norm": 1.8044300195091585, "learning_rate": 2.2411173425879e-06, "loss": 0.046868515014648435, "step": 134480 }, { "epoch": 1.162852028949166, "grad_norm": 2.256003223590363, "learning_rate": 2.240920237281984e-06, "loss": 0.0563812255859375, "step": 134485 }, { "epoch": 1.1628952624707094, "grad_norm": 1.9526239041290594, "learning_rate": 2.240723135476877e-06, "loss": 0.01860504150390625, "step": 134490 }, { "epoch": 1.1629384959922526, "grad_norm": 0.20741821151013676, "learning_rate": 2.2405260371734866e-06, "loss": 0.11746826171875, "step": 134495 }, { "epoch": 1.1629817295137959, "grad_norm": 4.594300427355353, "learning_rate": 2.2403289423727243e-06, "loss": 0.07764739990234375, "step": 134500 }, { "epoch": 1.163024963035339, "grad_norm": 0.6544963455311675, "learning_rate": 2.240131851075497e-06, "loss": 0.11034927368164063, "step": 134505 }, { "epoch": 1.1630681965568823, "grad_norm": 16.49306197179081, "learning_rate": 2.2399347632827154e-06, "loss": 0.04330902099609375, "step": 134510 }, { "epoch": 1.1631114300784255, "grad_norm": 36.573944636214684, "learning_rate": 2.239737678995287e-06, "loss": 0.06246604919433594, "step": 134515 }, { "epoch": 1.1631546635999688, "grad_norm": 2.3683416231520953, "learning_rate": 2.239540598214122e-06, "loss": 0.007585906982421875, "step": 134520 }, { "epoch": 1.1631978971215122, "grad_norm": 1.7090028537475614, "learning_rate": 2.2393435209401267e-06, "loss": 0.07345848083496094, "step": 134525 }, { "epoch": 1.1632411306430555, "grad_norm": 34.272521380370755, "learning_rate": 2.2391464471742126e-06, "loss": 0.1302520751953125, "step": 134530 }, { "epoch": 1.1632843641645987, "grad_norm": 0.5105252497018308, "learning_rate": 2.2389493769172882e-06, "loss": 0.030199432373046876, "step": 134535 }, { "epoch": 1.163327597686142, "grad_norm": 13.834683240457016, "learning_rate": 2.2387523101702626e-06, "loss": 0.09534111022949218, "step": 134540 }, { "epoch": 1.1633708312076851, "grad_norm": 4.100352488275142, "learning_rate": 2.2385552469340437e-06, "loss": 0.027423095703125, "step": 134545 }, { "epoch": 1.1634140647292284, "grad_norm": 3.314721739671922, "learning_rate": 2.2383581872095408e-06, "loss": 0.07559356689453126, "step": 134550 }, { "epoch": 1.1634572982507718, "grad_norm": 19.288051929314594, "learning_rate": 2.2381611309976615e-06, "loss": 0.07042465209960938, "step": 134555 }, { "epoch": 1.163500531772315, "grad_norm": 0.6523857696086718, "learning_rate": 2.2379640782993164e-06, "loss": 0.047673797607421874, "step": 134560 }, { "epoch": 1.1635437652938583, "grad_norm": 2.240403715152893, "learning_rate": 2.2377670291154143e-06, "loss": 0.03320121765136719, "step": 134565 }, { "epoch": 1.1635869988154015, "grad_norm": 6.793034791542887, "learning_rate": 2.237569983446863e-06, "loss": 0.04875755310058594, "step": 134570 }, { "epoch": 1.1636302323369447, "grad_norm": 2.8264692333407555, "learning_rate": 2.2373729412945718e-06, "loss": 0.0399169921875, "step": 134575 }, { "epoch": 1.163673465858488, "grad_norm": 4.134791883046251, "learning_rate": 2.2371759026594486e-06, "loss": 0.01280364990234375, "step": 134580 }, { "epoch": 1.1637166993800312, "grad_norm": 7.58963884300767, "learning_rate": 2.2369788675424035e-06, "loss": 0.11256179809570313, "step": 134585 }, { "epoch": 1.1637599329015746, "grad_norm": 3.2066356055969796, "learning_rate": 2.2367818359443424e-06, "loss": 0.03774566650390625, "step": 134590 }, { "epoch": 1.1638031664231179, "grad_norm": 50.079651738964166, "learning_rate": 2.2365848078661784e-06, "loss": 0.07159671783447266, "step": 134595 }, { "epoch": 1.163846399944661, "grad_norm": 1.0116835189937199, "learning_rate": 2.236387783308817e-06, "loss": 0.03564529418945313, "step": 134600 }, { "epoch": 1.1638896334662043, "grad_norm": 17.823255256950876, "learning_rate": 2.236190762273168e-06, "loss": 0.35109634399414064, "step": 134605 }, { "epoch": 1.1639328669877476, "grad_norm": 0.39107203422362824, "learning_rate": 2.235993744760139e-06, "loss": 0.07100067138671876, "step": 134610 }, { "epoch": 1.1639761005092908, "grad_norm": 0.5502122035441627, "learning_rate": 2.23579673077064e-06, "loss": 0.024794769287109376, "step": 134615 }, { "epoch": 1.1640193340308342, "grad_norm": 6.481086137554232, "learning_rate": 2.235599720305578e-06, "loss": 0.06053009033203125, "step": 134620 }, { "epoch": 1.1640625675523775, "grad_norm": 41.31244927766083, "learning_rate": 2.235402713365863e-06, "loss": 0.45039520263671873, "step": 134625 }, { "epoch": 1.1641058010739207, "grad_norm": 4.592561117375793, "learning_rate": 2.235205709952404e-06, "loss": 0.061502838134765626, "step": 134630 }, { "epoch": 1.164149034595464, "grad_norm": 1.7996359031430156, "learning_rate": 2.235008710066108e-06, "loss": 0.2354074478149414, "step": 134635 }, { "epoch": 1.1641922681170072, "grad_norm": 5.154368195471226, "learning_rate": 2.234811713707884e-06, "loss": 0.06398849487304688, "step": 134640 }, { "epoch": 1.1642355016385504, "grad_norm": 1.3629073112995678, "learning_rate": 2.2346147208786415e-06, "loss": 0.030533599853515624, "step": 134645 }, { "epoch": 1.1642787351600936, "grad_norm": 3.15234172807544, "learning_rate": 2.2344177315792865e-06, "loss": 0.03219757080078125, "step": 134650 }, { "epoch": 1.164321968681637, "grad_norm": 2.9109692942112315, "learning_rate": 2.234220745810731e-06, "loss": 0.09611587524414063, "step": 134655 }, { "epoch": 1.1643652022031803, "grad_norm": 1.5967345626210951, "learning_rate": 2.234023763573882e-06, "loss": 0.11495895385742187, "step": 134660 }, { "epoch": 1.1644084357247235, "grad_norm": 0.5076048747234777, "learning_rate": 2.233826784869647e-06, "loss": 0.06411094665527343, "step": 134665 }, { "epoch": 1.1644516692462668, "grad_norm": 40.385163290301584, "learning_rate": 2.2336298096989353e-06, "loss": 0.12347869873046875, "step": 134670 }, { "epoch": 1.16449490276781, "grad_norm": 1.8046914219940364, "learning_rate": 2.233432838062655e-06, "loss": 0.188116455078125, "step": 134675 }, { "epoch": 1.1645381362893534, "grad_norm": 10.9009944920034, "learning_rate": 2.2332358699617153e-06, "loss": 0.0989410400390625, "step": 134680 }, { "epoch": 1.1645813698108967, "grad_norm": 50.090722980112545, "learning_rate": 2.2330389053970226e-06, "loss": 0.23023834228515624, "step": 134685 }, { "epoch": 1.16462460333244, "grad_norm": 2.642813774271068, "learning_rate": 2.2328419443694878e-06, "loss": 0.09209365844726562, "step": 134690 }, { "epoch": 1.1646678368539831, "grad_norm": 1.560833778999369, "learning_rate": 2.2326449868800185e-06, "loss": 0.01753864288330078, "step": 134695 }, { "epoch": 1.1647110703755263, "grad_norm": 0.613461379919321, "learning_rate": 2.232448032929522e-06, "loss": 0.01988983154296875, "step": 134700 }, { "epoch": 1.1647543038970696, "grad_norm": 0.4630783304835401, "learning_rate": 2.2322510825189076e-06, "loss": 0.17957687377929688, "step": 134705 }, { "epoch": 1.1647975374186128, "grad_norm": 2.5883997973738673, "learning_rate": 2.2320541356490834e-06, "loss": 0.00990753173828125, "step": 134710 }, { "epoch": 1.1648407709401563, "grad_norm": 3.672755973025246, "learning_rate": 2.2318571923209564e-06, "loss": 0.013779830932617188, "step": 134715 }, { "epoch": 1.1648840044616995, "grad_norm": 0.6407305260555205, "learning_rate": 2.231660252535438e-06, "loss": 0.006665420532226562, "step": 134720 }, { "epoch": 1.1649272379832427, "grad_norm": 2.6654382005746933, "learning_rate": 2.2314633162934343e-06, "loss": 0.12904052734375, "step": 134725 }, { "epoch": 1.164970471504786, "grad_norm": 0.33686839701380045, "learning_rate": 2.2312663835958532e-06, "loss": 0.01639404296875, "step": 134730 }, { "epoch": 1.1650137050263292, "grad_norm": 0.6086090696864543, "learning_rate": 2.2310694544436043e-06, "loss": 0.10285530090332032, "step": 134735 }, { "epoch": 1.1650569385478724, "grad_norm": 2.9584817007590836, "learning_rate": 2.2308725288375953e-06, "loss": 0.021906280517578126, "step": 134740 }, { "epoch": 1.1651001720694159, "grad_norm": 0.22579772945712184, "learning_rate": 2.2306756067787328e-06, "loss": 0.034992408752441403, "step": 134745 }, { "epoch": 1.165143405590959, "grad_norm": 0.07889632858415709, "learning_rate": 2.2304786882679274e-06, "loss": 0.11852874755859374, "step": 134750 }, { "epoch": 1.1651866391125023, "grad_norm": 0.36892531870499434, "learning_rate": 2.2302817733060863e-06, "loss": 0.018634796142578125, "step": 134755 }, { "epoch": 1.1652298726340455, "grad_norm": 12.873585036531736, "learning_rate": 2.2300848618941175e-06, "loss": 0.042899131774902344, "step": 134760 }, { "epoch": 1.1652731061555888, "grad_norm": 66.12658494913627, "learning_rate": 2.22988795403293e-06, "loss": 0.1279266357421875, "step": 134765 }, { "epoch": 1.165316339677132, "grad_norm": 3.751129049066998, "learning_rate": 2.2296910497234306e-06, "loss": 0.03802680969238281, "step": 134770 }, { "epoch": 1.1653595731986752, "grad_norm": 0.05453286322288938, "learning_rate": 2.2294941489665285e-06, "loss": 0.036122703552246095, "step": 134775 }, { "epoch": 1.1654028067202187, "grad_norm": 12.4200788939797, "learning_rate": 2.2292972517631295e-06, "loss": 0.2465892791748047, "step": 134780 }, { "epoch": 1.165446040241762, "grad_norm": 0.6186649871085349, "learning_rate": 2.2291003581141447e-06, "loss": 0.08001480102539063, "step": 134785 }, { "epoch": 1.1654892737633051, "grad_norm": 2.708279835151894, "learning_rate": 2.228903468020481e-06, "loss": 0.07984085083007812, "step": 134790 }, { "epoch": 1.1655325072848484, "grad_norm": 6.701186450970692, "learning_rate": 2.2287065814830468e-06, "loss": 0.37801513671875, "step": 134795 }, { "epoch": 1.1655757408063916, "grad_norm": 3.5543973357862804, "learning_rate": 2.22850969850275e-06, "loss": 0.03680267333984375, "step": 134800 }, { "epoch": 1.1656189743279348, "grad_norm": 1.188079243988627, "learning_rate": 2.228312819080497e-06, "loss": 0.13039932250976563, "step": 134805 }, { "epoch": 1.1656622078494783, "grad_norm": 0.4191663297919348, "learning_rate": 2.2281159432171977e-06, "loss": 0.049546432495117185, "step": 134810 }, { "epoch": 1.1657054413710215, "grad_norm": 9.996859149417594, "learning_rate": 2.2279190709137586e-06, "loss": 0.0379608154296875, "step": 134815 }, { "epoch": 1.1657486748925647, "grad_norm": 1.5652200548348396, "learning_rate": 2.22772220217109e-06, "loss": 0.016646575927734376, "step": 134820 }, { "epoch": 1.165791908414108, "grad_norm": 3.2174237057477293, "learning_rate": 2.227525336990098e-06, "loss": 0.23635406494140626, "step": 134825 }, { "epoch": 1.1658351419356512, "grad_norm": 20.509179996093952, "learning_rate": 2.2273284753716907e-06, "loss": 0.040904998779296875, "step": 134830 }, { "epoch": 1.1658783754571944, "grad_norm": 1.0922758379099415, "learning_rate": 2.227131617316776e-06, "loss": 0.10801906585693359, "step": 134835 }, { "epoch": 1.1659216089787376, "grad_norm": 9.15353803630008, "learning_rate": 2.2269347628262623e-06, "loss": 0.04020214080810547, "step": 134840 }, { "epoch": 1.165964842500281, "grad_norm": 0.7131762784165008, "learning_rate": 2.2267379119010556e-06, "loss": 0.11994094848632812, "step": 134845 }, { "epoch": 1.1660080760218243, "grad_norm": 2.624252024461212, "learning_rate": 2.2265410645420673e-06, "loss": 0.024005126953125, "step": 134850 }, { "epoch": 1.1660513095433676, "grad_norm": 11.384120311930298, "learning_rate": 2.2263442207502028e-06, "loss": 0.040138626098632814, "step": 134855 }, { "epoch": 1.1660945430649108, "grad_norm": 49.52887022233461, "learning_rate": 2.226147380526371e-06, "loss": 0.09749889373779297, "step": 134860 }, { "epoch": 1.166137776586454, "grad_norm": 1.042028857590928, "learning_rate": 2.2259505438714777e-06, "loss": 0.03686027526855469, "step": 134865 }, { "epoch": 1.1661810101079972, "grad_norm": 79.14944533889937, "learning_rate": 2.225753710786433e-06, "loss": 0.18653640747070313, "step": 134870 }, { "epoch": 1.1662242436295407, "grad_norm": 1.2264332120186447, "learning_rate": 2.2255568812721425e-06, "loss": 0.09314346313476562, "step": 134875 }, { "epoch": 1.166267477151084, "grad_norm": 0.061478632367170515, "learning_rate": 2.225360055329517e-06, "loss": 0.07412033081054688, "step": 134880 }, { "epoch": 1.1663107106726271, "grad_norm": 0.0523766701451406, "learning_rate": 2.225163232959462e-06, "loss": 0.14949264526367187, "step": 134885 }, { "epoch": 1.1663539441941704, "grad_norm": 5.031309815368348, "learning_rate": 2.2249664141628855e-06, "loss": 0.028147506713867187, "step": 134890 }, { "epoch": 1.1663971777157136, "grad_norm": 2.665098030747026, "learning_rate": 2.2247695989406948e-06, "loss": 0.04410686492919922, "step": 134895 }, { "epoch": 1.1664404112372568, "grad_norm": 34.971756923485955, "learning_rate": 2.2245727872937992e-06, "loss": 0.09122161865234375, "step": 134900 }, { "epoch": 1.1664836447588, "grad_norm": 15.009375455233855, "learning_rate": 2.224375979223104e-06, "loss": 0.039493751525878903, "step": 134905 }, { "epoch": 1.1665268782803435, "grad_norm": 24.343404561626322, "learning_rate": 2.2241791747295193e-06, "loss": 0.1259429931640625, "step": 134910 }, { "epoch": 1.1665701118018867, "grad_norm": 16.05960506602766, "learning_rate": 2.223982373813952e-06, "loss": 0.19701614379882812, "step": 134915 }, { "epoch": 1.16661334532343, "grad_norm": 7.830209151780092, "learning_rate": 2.2237855764773094e-06, "loss": 0.0993011474609375, "step": 134920 }, { "epoch": 1.1666565788449732, "grad_norm": 10.348844380017196, "learning_rate": 2.2235887827204985e-06, "loss": 0.05659027099609375, "step": 134925 }, { "epoch": 1.1666998123665164, "grad_norm": 5.277352387100172, "learning_rate": 2.2233919925444282e-06, "loss": 0.017481231689453126, "step": 134930 }, { "epoch": 1.1667430458880599, "grad_norm": 1.6704138543624933, "learning_rate": 2.2231952059500055e-06, "loss": 0.13240203857421876, "step": 134935 }, { "epoch": 1.166786279409603, "grad_norm": 0.11825584475035152, "learning_rate": 2.222998422938136e-06, "loss": 0.012115097045898438, "step": 134940 }, { "epoch": 1.1668295129311463, "grad_norm": 0.03143648290839498, "learning_rate": 2.2228016435097307e-06, "loss": 0.2736686706542969, "step": 134945 }, { "epoch": 1.1668727464526896, "grad_norm": 9.972083218179197, "learning_rate": 2.2226048676656956e-06, "loss": 0.017679595947265626, "step": 134950 }, { "epoch": 1.1669159799742328, "grad_norm": 0.6015685915643704, "learning_rate": 2.2224080954069374e-06, "loss": 0.07438507080078124, "step": 134955 }, { "epoch": 1.166959213495776, "grad_norm": 10.491270058945538, "learning_rate": 2.2222113267343656e-06, "loss": 0.0611663818359375, "step": 134960 }, { "epoch": 1.1670024470173193, "grad_norm": 4.538584708749273, "learning_rate": 2.222014561648886e-06, "loss": 0.036116409301757815, "step": 134965 }, { "epoch": 1.1670456805388627, "grad_norm": 3.52451764070769, "learning_rate": 2.221817800151405e-06, "loss": 0.057757568359375, "step": 134970 }, { "epoch": 1.167088914060406, "grad_norm": 1.346294713168966, "learning_rate": 2.221621042242833e-06, "loss": 0.164105224609375, "step": 134975 }, { "epoch": 1.1671321475819492, "grad_norm": 2.289759016142265, "learning_rate": 2.221424287924075e-06, "loss": 0.0788330078125, "step": 134980 }, { "epoch": 1.1671753811034924, "grad_norm": 8.06399343029766, "learning_rate": 2.2212275371960405e-06, "loss": 0.08787155151367188, "step": 134985 }, { "epoch": 1.1672186146250356, "grad_norm": 1.9989874359882072, "learning_rate": 2.221030790059636e-06, "loss": 0.04577484130859375, "step": 134990 }, { "epoch": 1.1672618481465789, "grad_norm": 5.519577630993498, "learning_rate": 2.220834046515768e-06, "loss": 0.03157310485839844, "step": 134995 }, { "epoch": 1.1673050816681223, "grad_norm": 0.29653191731546996, "learning_rate": 2.2206373065653436e-06, "loss": 0.043661308288574216, "step": 135000 }, { "epoch": 1.1673483151896655, "grad_norm": 8.90781281460662, "learning_rate": 2.2204405702092726e-06, "loss": 0.07531051635742188, "step": 135005 }, { "epoch": 1.1673915487112088, "grad_norm": 3.7773538458419003, "learning_rate": 2.2202438374484595e-06, "loss": 0.1903594970703125, "step": 135010 }, { "epoch": 1.167434782232752, "grad_norm": 3.8232598630792642, "learning_rate": 2.220047108283814e-06, "loss": 0.12020721435546874, "step": 135015 }, { "epoch": 1.1674780157542952, "grad_norm": 17.772501462328247, "learning_rate": 2.2198503827162425e-06, "loss": 0.06385650634765624, "step": 135020 }, { "epoch": 1.1675212492758384, "grad_norm": 19.145399554411814, "learning_rate": 2.2196536607466516e-06, "loss": 0.07235336303710938, "step": 135025 }, { "epoch": 1.1675644827973817, "grad_norm": 2.659041638986902, "learning_rate": 2.2194569423759488e-06, "loss": 0.017928314208984376, "step": 135030 }, { "epoch": 1.1676077163189251, "grad_norm": 3.4866307672184105, "learning_rate": 2.2192602276050415e-06, "loss": 0.08513832092285156, "step": 135035 }, { "epoch": 1.1676509498404684, "grad_norm": 0.23619893832711766, "learning_rate": 2.2190635164348373e-06, "loss": 0.09208450317382813, "step": 135040 }, { "epoch": 1.1676941833620116, "grad_norm": 10.117043697993008, "learning_rate": 2.2188668088662436e-06, "loss": 0.04802570343017578, "step": 135045 }, { "epoch": 1.1677374168835548, "grad_norm": 2.328709825847701, "learning_rate": 2.218670104900167e-06, "loss": 0.03897171020507813, "step": 135050 }, { "epoch": 1.167780650405098, "grad_norm": 2.2939482087695935, "learning_rate": 2.2184734045375145e-06, "loss": 0.056109619140625, "step": 135055 }, { "epoch": 1.1678238839266413, "grad_norm": 0.19913880572771833, "learning_rate": 2.218276707779194e-06, "loss": 0.04937591552734375, "step": 135060 }, { "epoch": 1.1678671174481847, "grad_norm": 4.801959153937354, "learning_rate": 2.2180800146261116e-06, "loss": 0.01998291015625, "step": 135065 }, { "epoch": 1.167910350969728, "grad_norm": 1.6838541687092372, "learning_rate": 2.2178833250791747e-06, "loss": 0.21771163940429689, "step": 135070 }, { "epoch": 1.1679535844912712, "grad_norm": 0.10089833502228918, "learning_rate": 2.2176866391392923e-06, "loss": 0.06021537780761719, "step": 135075 }, { "epoch": 1.1679968180128144, "grad_norm": 15.964135357134394, "learning_rate": 2.2174899568073693e-06, "loss": 0.18336563110351561, "step": 135080 }, { "epoch": 1.1680400515343576, "grad_norm": 0.7473366574818405, "learning_rate": 2.2172932780843136e-06, "loss": 0.22418441772460937, "step": 135085 }, { "epoch": 1.1680832850559009, "grad_norm": 9.459267738953221, "learning_rate": 2.217096602971032e-06, "loss": 0.24194107055664063, "step": 135090 }, { "epoch": 1.168126518577444, "grad_norm": 17.90983921999759, "learning_rate": 2.2168999314684314e-06, "loss": 0.058124542236328125, "step": 135095 }, { "epoch": 1.1681697520989875, "grad_norm": 1.0083689284418689, "learning_rate": 2.2167032635774185e-06, "loss": 0.02467041015625, "step": 135100 }, { "epoch": 1.1682129856205308, "grad_norm": 5.097860463343778, "learning_rate": 2.2165065992989025e-06, "loss": 0.13041763305664061, "step": 135105 }, { "epoch": 1.168256219142074, "grad_norm": 4.14341027717458, "learning_rate": 2.2163099386337883e-06, "loss": 0.0517822265625, "step": 135110 }, { "epoch": 1.1682994526636172, "grad_norm": 0.877193475633764, "learning_rate": 2.216113281582984e-06, "loss": 0.0619354248046875, "step": 135115 }, { "epoch": 1.1683426861851605, "grad_norm": 4.688531552422696, "learning_rate": 2.2159166281473943e-06, "loss": 0.07720832824707032, "step": 135120 }, { "epoch": 1.168385919706704, "grad_norm": 14.915353646995548, "learning_rate": 2.2157199783279293e-06, "loss": 0.04845504760742188, "step": 135125 }, { "epoch": 1.1684291532282471, "grad_norm": 12.261311935745805, "learning_rate": 2.2155233321254927e-06, "loss": 0.037996673583984376, "step": 135130 }, { "epoch": 1.1684723867497904, "grad_norm": 0.755651548763714, "learning_rate": 2.215326689540995e-06, "loss": 0.04114837646484375, "step": 135135 }, { "epoch": 1.1685156202713336, "grad_norm": 13.760612984966912, "learning_rate": 2.215130050575341e-06, "loss": 0.07426910400390625, "step": 135140 }, { "epoch": 1.1685588537928768, "grad_norm": 6.459271717591311, "learning_rate": 2.214933415229438e-06, "loss": 0.10554294586181641, "step": 135145 }, { "epoch": 1.16860208731442, "grad_norm": 0.44235541306093046, "learning_rate": 2.214736783504192e-06, "loss": 0.11172065734863282, "step": 135150 }, { "epoch": 1.1686453208359633, "grad_norm": 0.9624781830014674, "learning_rate": 2.214540155400511e-06, "loss": 0.05018692016601563, "step": 135155 }, { "epoch": 1.1686885543575065, "grad_norm": 1.7646608593881896, "learning_rate": 2.2143435309193004e-06, "loss": 0.09041290283203125, "step": 135160 }, { "epoch": 1.16873178787905, "grad_norm": 5.47618865973983, "learning_rate": 2.214146910061469e-06, "loss": 0.08641357421875, "step": 135165 }, { "epoch": 1.1687750214005932, "grad_norm": 13.53330432565681, "learning_rate": 2.2139502928279226e-06, "loss": 0.12753143310546874, "step": 135170 }, { "epoch": 1.1688182549221364, "grad_norm": 1.7158161243748735, "learning_rate": 2.2137536792195675e-06, "loss": 0.015428924560546875, "step": 135175 }, { "epoch": 1.1688614884436797, "grad_norm": 1.3891113971424283, "learning_rate": 2.2135570692373114e-06, "loss": 0.07568931579589844, "step": 135180 }, { "epoch": 1.1689047219652229, "grad_norm": 0.8270812027397502, "learning_rate": 2.2133604628820614e-06, "loss": 0.01897430419921875, "step": 135185 }, { "epoch": 1.1689479554867663, "grad_norm": 2.630237541680456, "learning_rate": 2.2131638601547225e-06, "loss": 0.23862152099609374, "step": 135190 }, { "epoch": 1.1689911890083096, "grad_norm": 5.640778721141565, "learning_rate": 2.2129672610562014e-06, "loss": 0.04095916748046875, "step": 135195 }, { "epoch": 1.1690344225298528, "grad_norm": 4.3668694706492, "learning_rate": 2.212770665587407e-06, "loss": 0.0709014892578125, "step": 135200 }, { "epoch": 1.169077656051396, "grad_norm": 5.182629146720832, "learning_rate": 2.2125740737492443e-06, "loss": 0.04513702392578125, "step": 135205 }, { "epoch": 1.1691208895729392, "grad_norm": 1.7346212349313475, "learning_rate": 2.212377485542621e-06, "loss": 0.1774810791015625, "step": 135210 }, { "epoch": 1.1691641230944825, "grad_norm": 2.9742930554813913, "learning_rate": 2.2121809009684428e-06, "loss": 0.03406982421875, "step": 135215 }, { "epoch": 1.1692073566160257, "grad_norm": 0.16451804542679258, "learning_rate": 2.211984320027617e-06, "loss": 0.009560394287109374, "step": 135220 }, { "epoch": 1.1692505901375692, "grad_norm": 9.838784562912426, "learning_rate": 2.2117877427210486e-06, "loss": 0.12433319091796875, "step": 135225 }, { "epoch": 1.1692938236591124, "grad_norm": 31.1489530583462, "learning_rate": 2.2115911690496465e-06, "loss": 0.20289535522460939, "step": 135230 }, { "epoch": 1.1693370571806556, "grad_norm": 2.80252887046646, "learning_rate": 2.211394599014316e-06, "loss": 0.04393768310546875, "step": 135235 }, { "epoch": 1.1693802907021988, "grad_norm": 4.381786714639045, "learning_rate": 2.211198032615964e-06, "loss": 0.030385971069335938, "step": 135240 }, { "epoch": 1.169423524223742, "grad_norm": 0.29390091627083664, "learning_rate": 2.211001469855497e-06, "loss": 0.07831039428710937, "step": 135245 }, { "epoch": 1.1694667577452853, "grad_norm": 0.03448566552666318, "learning_rate": 2.210804910733822e-06, "loss": 0.03169927597045898, "step": 135250 }, { "epoch": 1.1695099912668288, "grad_norm": 21.141632267660146, "learning_rate": 2.2106083552518436e-06, "loss": 0.0977813720703125, "step": 135255 }, { "epoch": 1.169553224788372, "grad_norm": 28.624189106462843, "learning_rate": 2.210411803410471e-06, "loss": 0.13356876373291016, "step": 135260 }, { "epoch": 1.1695964583099152, "grad_norm": 0.3732006252025714, "learning_rate": 2.210215255210609e-06, "loss": 0.045270538330078124, "step": 135265 }, { "epoch": 1.1696396918314584, "grad_norm": 0.8157761892945088, "learning_rate": 2.210018710653165e-06, "loss": 0.5588897705078125, "step": 135270 }, { "epoch": 1.1696829253530017, "grad_norm": 1.036911587273681, "learning_rate": 2.2098221697390446e-06, "loss": 0.028939056396484374, "step": 135275 }, { "epoch": 1.169726158874545, "grad_norm": 0.5451869209101543, "learning_rate": 2.2096256324691544e-06, "loss": 0.03900604248046875, "step": 135280 }, { "epoch": 1.1697693923960881, "grad_norm": 12.977916403514275, "learning_rate": 2.2094290988444006e-06, "loss": 0.07445602416992188, "step": 135285 }, { "epoch": 1.1698126259176316, "grad_norm": 2.762952678311178, "learning_rate": 2.2092325688656896e-06, "loss": 0.0529510498046875, "step": 135290 }, { "epoch": 1.1698558594391748, "grad_norm": 1.0037116325766462, "learning_rate": 2.2090360425339285e-06, "loss": 0.15511474609375, "step": 135295 }, { "epoch": 1.169899092960718, "grad_norm": 21.52110618199366, "learning_rate": 2.2088395198500236e-06, "loss": 0.09952468872070312, "step": 135300 }, { "epoch": 1.1699423264822613, "grad_norm": 1.1769254809516219, "learning_rate": 2.2086430008148813e-06, "loss": 0.1950927734375, "step": 135305 }, { "epoch": 1.1699855600038045, "grad_norm": 10.581574983348826, "learning_rate": 2.208446485429408e-06, "loss": 0.1689136505126953, "step": 135310 }, { "epoch": 1.1700287935253477, "grad_norm": 0.13076555550929733, "learning_rate": 2.208249973694508e-06, "loss": 0.023144912719726563, "step": 135315 }, { "epoch": 1.1700720270468912, "grad_norm": 0.5670024289674439, "learning_rate": 2.208053465611089e-06, "loss": 0.16415252685546874, "step": 135320 }, { "epoch": 1.1701152605684344, "grad_norm": 21.177132631043268, "learning_rate": 2.207856961180058e-06, "loss": 0.2908050537109375, "step": 135325 }, { "epoch": 1.1701584940899776, "grad_norm": 11.288158321931437, "learning_rate": 2.2076604604023214e-06, "loss": 0.1029052734375, "step": 135330 }, { "epoch": 1.1702017276115209, "grad_norm": 0.7020223273407831, "learning_rate": 2.207463963278785e-06, "loss": 0.01620330810546875, "step": 135335 }, { "epoch": 1.170244961133064, "grad_norm": 1.941472868462906, "learning_rate": 2.207267469810354e-06, "loss": 0.018190765380859376, "step": 135340 }, { "epoch": 1.1702881946546073, "grad_norm": 0.19409428323447175, "learning_rate": 2.2070709799979356e-06, "loss": 0.05478935241699219, "step": 135345 }, { "epoch": 1.1703314281761505, "grad_norm": 4.95024032185644, "learning_rate": 2.206874493842435e-06, "loss": 0.021622848510742188, "step": 135350 }, { "epoch": 1.170374661697694, "grad_norm": 6.843690381842323, "learning_rate": 2.2066780113447597e-06, "loss": 0.013451766967773438, "step": 135355 }, { "epoch": 1.1704178952192372, "grad_norm": 5.891976721334907, "learning_rate": 2.2064815325058158e-06, "loss": 0.06725425720214843, "step": 135360 }, { "epoch": 1.1704611287407805, "grad_norm": 16.527384840883645, "learning_rate": 2.206285057326508e-06, "loss": 0.1841888427734375, "step": 135365 }, { "epoch": 1.1705043622623237, "grad_norm": 0.15135147269816862, "learning_rate": 2.206088585807745e-06, "loss": 0.05767974853515625, "step": 135370 }, { "epoch": 1.170547595783867, "grad_norm": 16.7727507735024, "learning_rate": 2.20589211795043e-06, "loss": 0.104443359375, "step": 135375 }, { "epoch": 1.1705908293054104, "grad_norm": 0.6721267597916655, "learning_rate": 2.2056956537554705e-06, "loss": 0.07919387817382813, "step": 135380 }, { "epoch": 1.1706340628269536, "grad_norm": 0.19993483355253225, "learning_rate": 2.205499193223772e-06, "loss": 0.051652717590332034, "step": 135385 }, { "epoch": 1.1706772963484968, "grad_norm": 9.995289110706457, "learning_rate": 2.2053027363562418e-06, "loss": 0.040309906005859375, "step": 135390 }, { "epoch": 1.17072052987004, "grad_norm": 6.3922102606064835, "learning_rate": 2.205106283153785e-06, "loss": 0.26829833984375, "step": 135395 }, { "epoch": 1.1707637633915833, "grad_norm": 0.848889817474031, "learning_rate": 2.2049098336173073e-06, "loss": 0.02147369384765625, "step": 135400 }, { "epoch": 1.1708069969131265, "grad_norm": 39.650253561616104, "learning_rate": 2.204713387747716e-06, "loss": 0.17079572677612304, "step": 135405 }, { "epoch": 1.1708502304346697, "grad_norm": 0.7947392454074348, "learning_rate": 2.2045169455459163e-06, "loss": 0.0300506591796875, "step": 135410 }, { "epoch": 1.170893463956213, "grad_norm": 1.458330715174079, "learning_rate": 2.204320507012813e-06, "loss": 0.12550048828125, "step": 135415 }, { "epoch": 1.1709366974777564, "grad_norm": 2.0104848674137754, "learning_rate": 2.204124072149314e-06, "loss": 0.0192657470703125, "step": 135420 }, { "epoch": 1.1709799309992996, "grad_norm": 3.4250863591547946, "learning_rate": 2.203927640956325e-06, "loss": 0.023164749145507812, "step": 135425 }, { "epoch": 1.1710231645208429, "grad_norm": 23.90844491021141, "learning_rate": 2.2037312134347505e-06, "loss": 0.24563217163085938, "step": 135430 }, { "epoch": 1.171066398042386, "grad_norm": 16.14939493646493, "learning_rate": 2.2035347895854976e-06, "loss": 0.055657958984375, "step": 135435 }, { "epoch": 1.1711096315639293, "grad_norm": 0.7005309662118506, "learning_rate": 2.203338369409472e-06, "loss": 0.08491058349609375, "step": 135440 }, { "epoch": 1.1711528650854728, "grad_norm": 0.4867225222536298, "learning_rate": 2.2031419529075783e-06, "loss": 0.040020751953125, "step": 135445 }, { "epoch": 1.171196098607016, "grad_norm": 6.330491872943669, "learning_rate": 2.202945540080725e-06, "loss": 0.126373291015625, "step": 135450 }, { "epoch": 1.1712393321285592, "grad_norm": 1.2338190553084658, "learning_rate": 2.2027491309298163e-06, "loss": 0.14784011840820313, "step": 135455 }, { "epoch": 1.1712825656501025, "grad_norm": 1.2381569108952166, "learning_rate": 2.2025527254557573e-06, "loss": 0.02611846923828125, "step": 135460 }, { "epoch": 1.1713257991716457, "grad_norm": 4.134113738533587, "learning_rate": 2.2023563236594557e-06, "loss": 0.14800233840942384, "step": 135465 }, { "epoch": 1.171369032693189, "grad_norm": 0.2098507463684043, "learning_rate": 2.202159925541816e-06, "loss": 0.0700531005859375, "step": 135470 }, { "epoch": 1.1714122662147322, "grad_norm": 66.36484893861702, "learning_rate": 2.2019635311037444e-06, "loss": 0.22484912872314453, "step": 135475 }, { "epoch": 1.1714554997362756, "grad_norm": 7.47457299935105, "learning_rate": 2.201767140346145e-06, "loss": 0.15092620849609376, "step": 135480 }, { "epoch": 1.1714987332578188, "grad_norm": 8.9251630588575, "learning_rate": 2.2015707532699264e-06, "loss": 0.22509231567382812, "step": 135485 }, { "epoch": 1.171541966779362, "grad_norm": 4.301135499576143, "learning_rate": 2.2013743698759925e-06, "loss": 0.1538726806640625, "step": 135490 }, { "epoch": 1.1715852003009053, "grad_norm": 0.4414152896836997, "learning_rate": 2.2011779901652495e-06, "loss": 0.14150772094726563, "step": 135495 }, { "epoch": 1.1716284338224485, "grad_norm": 15.675639406403914, "learning_rate": 2.2009816141386036e-06, "loss": 0.03289108276367188, "step": 135500 }, { "epoch": 1.1716716673439918, "grad_norm": 10.30209375023644, "learning_rate": 2.2007852417969596e-06, "loss": 0.11135025024414062, "step": 135505 }, { "epoch": 1.1717149008655352, "grad_norm": 16.63723981374186, "learning_rate": 2.2005888731412223e-06, "loss": 0.11665992736816407, "step": 135510 }, { "epoch": 1.1717581343870784, "grad_norm": 9.045459274651284, "learning_rate": 2.2003925081722997e-06, "loss": 0.0648101806640625, "step": 135515 }, { "epoch": 1.1718013679086217, "grad_norm": 0.2601389188621445, "learning_rate": 2.2001961468910955e-06, "loss": 0.171392822265625, "step": 135520 }, { "epoch": 1.171844601430165, "grad_norm": 0.39825926885202667, "learning_rate": 2.1999997892985165e-06, "loss": 0.010726356506347656, "step": 135525 }, { "epoch": 1.1718878349517081, "grad_norm": 0.17146589329190193, "learning_rate": 2.199803435395468e-06, "loss": 0.06682167053222657, "step": 135530 }, { "epoch": 1.1719310684732513, "grad_norm": 19.323609137266068, "learning_rate": 2.199607085182855e-06, "loss": 0.03624725341796875, "step": 135535 }, { "epoch": 1.1719743019947946, "grad_norm": 5.308618029017753, "learning_rate": 2.1994107386615834e-06, "loss": 0.04222869873046875, "step": 135540 }, { "epoch": 1.172017535516338, "grad_norm": 5.379194265409374, "learning_rate": 2.1992143958325583e-06, "loss": 0.0556060791015625, "step": 135545 }, { "epoch": 1.1720607690378813, "grad_norm": 5.049657268172443, "learning_rate": 2.199018056696686e-06, "loss": 0.019141006469726562, "step": 135550 }, { "epoch": 1.1721040025594245, "grad_norm": 1.1345484916074453, "learning_rate": 2.1988217212548717e-06, "loss": 0.3303852081298828, "step": 135555 }, { "epoch": 1.1721472360809677, "grad_norm": 0.173903193849041, "learning_rate": 2.1986253895080217e-06, "loss": 0.07509307861328125, "step": 135560 }, { "epoch": 1.172190469602511, "grad_norm": 0.8038488229988133, "learning_rate": 2.1984290614570398e-06, "loss": 0.04715576171875, "step": 135565 }, { "epoch": 1.1722337031240542, "grad_norm": 8.945190604248944, "learning_rate": 2.1982327371028315e-06, "loss": 0.081134033203125, "step": 135570 }, { "epoch": 1.1722769366455976, "grad_norm": 55.44099361994988, "learning_rate": 2.198036416446303e-06, "loss": 0.1900054931640625, "step": 135575 }, { "epoch": 1.1723201701671409, "grad_norm": 0.028382314583803695, "learning_rate": 2.197840099488361e-06, "loss": 0.01731529235839844, "step": 135580 }, { "epoch": 1.172363403688684, "grad_norm": 1.443333614497193, "learning_rate": 2.197643786229909e-06, "loss": 0.07301292419433594, "step": 135585 }, { "epoch": 1.1724066372102273, "grad_norm": 0.7135766094109041, "learning_rate": 2.197447476671853e-06, "loss": 0.016898345947265626, "step": 135590 }, { "epoch": 1.1724498707317705, "grad_norm": 0.34790083664098304, "learning_rate": 2.1972511708150988e-06, "loss": 0.014599990844726563, "step": 135595 }, { "epoch": 1.1724931042533138, "grad_norm": 0.1943976890582348, "learning_rate": 2.1970548686605505e-06, "loss": 0.017541885375976562, "step": 135600 }, { "epoch": 1.172536337774857, "grad_norm": 0.276689818528895, "learning_rate": 2.1968585702091133e-06, "loss": 0.02801647186279297, "step": 135605 }, { "epoch": 1.1725795712964004, "grad_norm": 0.5117976843874993, "learning_rate": 2.196662275461695e-06, "loss": 0.01919097900390625, "step": 135610 }, { "epoch": 1.1726228048179437, "grad_norm": 0.36894440020413716, "learning_rate": 2.1964659844191986e-06, "loss": 0.03032073974609375, "step": 135615 }, { "epoch": 1.172666038339487, "grad_norm": 1.5327192323361618, "learning_rate": 2.1962696970825308e-06, "loss": 0.0162933349609375, "step": 135620 }, { "epoch": 1.1727092718610301, "grad_norm": 4.009642781410487, "learning_rate": 2.196073413452595e-06, "loss": 0.04615707397460937, "step": 135625 }, { "epoch": 1.1727525053825734, "grad_norm": 1.1419098903354123, "learning_rate": 2.195877133530299e-06, "loss": 0.20460567474365235, "step": 135630 }, { "epoch": 1.1727957389041168, "grad_norm": 0.6021469663139059, "learning_rate": 2.195680857316546e-06, "loss": 0.024811553955078124, "step": 135635 }, { "epoch": 1.17283897242566, "grad_norm": 55.05054365448391, "learning_rate": 2.1954845848122406e-06, "loss": 0.16132965087890624, "step": 135640 }, { "epoch": 1.1728822059472033, "grad_norm": 0.12681970744711465, "learning_rate": 2.1952883160182908e-06, "loss": 0.11283721923828124, "step": 135645 }, { "epoch": 1.1729254394687465, "grad_norm": 0.12957789870633887, "learning_rate": 2.1950920509355993e-06, "loss": 0.074267578125, "step": 135650 }, { "epoch": 1.1729686729902897, "grad_norm": 3.1523713911547167, "learning_rate": 2.194895789565072e-06, "loss": 0.13565750122070314, "step": 135655 }, { "epoch": 1.173011906511833, "grad_norm": 4.1098784901612175, "learning_rate": 2.1946995319076153e-06, "loss": 0.1263641357421875, "step": 135660 }, { "epoch": 1.1730551400333762, "grad_norm": 62.1988671271223, "learning_rate": 2.194503277964133e-06, "loss": 0.10404548645019532, "step": 135665 }, { "epoch": 1.1730983735549196, "grad_norm": 1.794507739199354, "learning_rate": 2.1943070277355288e-06, "loss": 0.04521636962890625, "step": 135670 }, { "epoch": 1.1731416070764629, "grad_norm": 4.172874987800248, "learning_rate": 2.1941107812227107e-06, "loss": 0.0852264404296875, "step": 135675 }, { "epoch": 1.173184840598006, "grad_norm": 4.224593955938796, "learning_rate": 2.193914538426583e-06, "loss": 0.026727294921875, "step": 135680 }, { "epoch": 1.1732280741195493, "grad_norm": 8.755286096943518, "learning_rate": 2.1937182993480486e-06, "loss": 0.04284820556640625, "step": 135685 }, { "epoch": 1.1732713076410926, "grad_norm": 6.098971789099543, "learning_rate": 2.1935220639880156e-06, "loss": 0.0375762939453125, "step": 135690 }, { "epoch": 1.1733145411626358, "grad_norm": 1.75531170772763, "learning_rate": 2.1933258323473876e-06, "loss": 0.05203857421875, "step": 135695 }, { "epoch": 1.1733577746841792, "grad_norm": 1.3231188709875408, "learning_rate": 2.1931296044270677e-06, "loss": 0.12398605346679688, "step": 135700 }, { "epoch": 1.1734010082057225, "grad_norm": 0.19322577792941392, "learning_rate": 2.1929333802279643e-06, "loss": 0.048903465270996094, "step": 135705 }, { "epoch": 1.1734442417272657, "grad_norm": 2.180283069208195, "learning_rate": 2.1927371597509815e-06, "loss": 0.07793426513671875, "step": 135710 }, { "epoch": 1.173487475248809, "grad_norm": 0.6638845766958363, "learning_rate": 2.192540942997022e-06, "loss": 0.084088134765625, "step": 135715 }, { "epoch": 1.1735307087703521, "grad_norm": 2.825852406200356, "learning_rate": 2.1923447299669933e-06, "loss": 0.029866409301757813, "step": 135720 }, { "epoch": 1.1735739422918954, "grad_norm": 33.42054172300076, "learning_rate": 2.1921485206617995e-06, "loss": 0.064385986328125, "step": 135725 }, { "epoch": 1.1736171758134386, "grad_norm": 4.506620272270105, "learning_rate": 2.1919523150823454e-06, "loss": 0.12593650817871094, "step": 135730 }, { "epoch": 1.173660409334982, "grad_norm": 0.27711560838708355, "learning_rate": 2.191756113229534e-06, "loss": 0.12541465759277343, "step": 135735 }, { "epoch": 1.1737036428565253, "grad_norm": 15.04941635853294, "learning_rate": 2.191559915104274e-06, "loss": 0.05955924987792969, "step": 135740 }, { "epoch": 1.1737468763780685, "grad_norm": 1.505665762739486, "learning_rate": 2.191363720707467e-06, "loss": 0.07113876342773437, "step": 135745 }, { "epoch": 1.1737901098996117, "grad_norm": 3.6761239474492213, "learning_rate": 2.19116753004002e-06, "loss": 0.01864776611328125, "step": 135750 }, { "epoch": 1.173833343421155, "grad_norm": 20.710835482899626, "learning_rate": 2.1909713431028368e-06, "loss": 0.25810890197753905, "step": 135755 }, { "epoch": 1.1738765769426982, "grad_norm": 2.936523025256353, "learning_rate": 2.190775159896822e-06, "loss": 0.02280597686767578, "step": 135760 }, { "epoch": 1.1739198104642417, "grad_norm": 15.876880250555917, "learning_rate": 2.19057898042288e-06, "loss": 0.14376373291015626, "step": 135765 }, { "epoch": 1.1739630439857849, "grad_norm": 4.73298017441132, "learning_rate": 2.190382804681917e-06, "loss": 0.03303260803222656, "step": 135770 }, { "epoch": 1.174006277507328, "grad_norm": 0.4340513441053416, "learning_rate": 2.1901866326748367e-06, "loss": 0.0060977935791015625, "step": 135775 }, { "epoch": 1.1740495110288713, "grad_norm": 0.6943813577274006, "learning_rate": 2.1899904644025443e-06, "loss": 0.03716259002685547, "step": 135780 }, { "epoch": 1.1740927445504146, "grad_norm": 3.113259358745574, "learning_rate": 2.1897942998659446e-06, "loss": 0.06585693359375, "step": 135785 }, { "epoch": 1.1741359780719578, "grad_norm": 2.9323231351207557, "learning_rate": 2.1895981390659422e-06, "loss": 0.0482574462890625, "step": 135790 }, { "epoch": 1.174179211593501, "grad_norm": 7.52017968913391, "learning_rate": 2.1894019820034406e-06, "loss": 0.10164527893066407, "step": 135795 }, { "epoch": 1.1742224451150445, "grad_norm": 1.2105824446372526, "learning_rate": 2.189205828679345e-06, "loss": 0.17501258850097656, "step": 135800 }, { "epoch": 1.1742656786365877, "grad_norm": 0.5624743748917692, "learning_rate": 2.1890096790945617e-06, "loss": 0.033751487731933594, "step": 135805 }, { "epoch": 1.174308912158131, "grad_norm": 0.8135641770206271, "learning_rate": 2.188813533249994e-06, "loss": 0.22757492065429688, "step": 135810 }, { "epoch": 1.1743521456796742, "grad_norm": 18.698221229569608, "learning_rate": 2.188617391146546e-06, "loss": 0.11321754455566406, "step": 135815 }, { "epoch": 1.1743953792012174, "grad_norm": 18.40958374921003, "learning_rate": 2.188421252785124e-06, "loss": 0.08443317413330079, "step": 135820 }, { "epoch": 1.1744386127227608, "grad_norm": 1.6868558939501657, "learning_rate": 2.1882251181666304e-06, "loss": 0.040782928466796875, "step": 135825 }, { "epoch": 1.174481846244304, "grad_norm": 0.8873887481826415, "learning_rate": 2.1880289872919704e-06, "loss": 0.08815460205078125, "step": 135830 }, { "epoch": 1.1745250797658473, "grad_norm": 4.008349609313991, "learning_rate": 2.1878328601620495e-06, "loss": 0.023082733154296875, "step": 135835 }, { "epoch": 1.1745683132873905, "grad_norm": 4.354264586366272, "learning_rate": 2.1876367367777726e-06, "loss": 0.09662628173828125, "step": 135840 }, { "epoch": 1.1746115468089338, "grad_norm": 9.32465849935487, "learning_rate": 2.1874406171400428e-06, "loss": 0.0544921875, "step": 135845 }, { "epoch": 1.174654780330477, "grad_norm": 14.054626712562834, "learning_rate": 2.1872445012497646e-06, "loss": 0.2196176528930664, "step": 135850 }, { "epoch": 1.1746980138520202, "grad_norm": 2.537919667553755, "learning_rate": 2.1870483891078435e-06, "loss": 0.026933670043945312, "step": 135855 }, { "epoch": 1.1747412473735634, "grad_norm": 2.3794460628996124, "learning_rate": 2.186852280715182e-06, "loss": 0.024271392822265626, "step": 135860 }, { "epoch": 1.174784480895107, "grad_norm": 3.2737583914206247, "learning_rate": 2.1866561760726873e-06, "loss": 0.022529411315917968, "step": 135865 }, { "epoch": 1.1748277144166501, "grad_norm": 2.6476649725579082, "learning_rate": 2.1864600751812628e-06, "loss": 0.15770111083984376, "step": 135870 }, { "epoch": 1.1748709479381934, "grad_norm": 1.0795566078198513, "learning_rate": 2.186263978041812e-06, "loss": 0.057476806640625, "step": 135875 }, { "epoch": 1.1749141814597366, "grad_norm": 0.7483751356653224, "learning_rate": 2.1860678846552394e-06, "loss": 0.0561279296875, "step": 135880 }, { "epoch": 1.1749574149812798, "grad_norm": 13.77108316153658, "learning_rate": 2.18587179502245e-06, "loss": 0.03892784118652344, "step": 135885 }, { "epoch": 1.1750006485028233, "grad_norm": 0.756713289968042, "learning_rate": 2.1856757091443488e-06, "loss": 0.09635391235351562, "step": 135890 }, { "epoch": 1.1750438820243665, "grad_norm": 5.951638575622559, "learning_rate": 2.1854796270218373e-06, "loss": 0.09505348205566407, "step": 135895 }, { "epoch": 1.1750871155459097, "grad_norm": 0.13007138484243413, "learning_rate": 2.1852835486558234e-06, "loss": 0.008919906616210938, "step": 135900 }, { "epoch": 1.175130349067453, "grad_norm": 10.60814309210817, "learning_rate": 2.18508747404721e-06, "loss": 0.1432039260864258, "step": 135905 }, { "epoch": 1.1751735825889962, "grad_norm": 0.42818084226029174, "learning_rate": 2.1848914031969e-06, "loss": 0.01872882843017578, "step": 135910 }, { "epoch": 1.1752168161105394, "grad_norm": 0.2786694914387243, "learning_rate": 2.1846953361057995e-06, "loss": 0.0762237548828125, "step": 135915 }, { "epoch": 1.1752600496320826, "grad_norm": 10.948334139020945, "learning_rate": 2.1844992727748127e-06, "loss": 0.0734466552734375, "step": 135920 }, { "epoch": 1.175303283153626, "grad_norm": 48.93428042528682, "learning_rate": 2.1843032132048415e-06, "loss": 0.174078369140625, "step": 135925 }, { "epoch": 1.1753465166751693, "grad_norm": 35.38112731343612, "learning_rate": 2.184107157396793e-06, "loss": 0.11788597106933593, "step": 135930 }, { "epoch": 1.1753897501967125, "grad_norm": 0.4714948819949132, "learning_rate": 2.1839111053515702e-06, "loss": 0.020959091186523438, "step": 135935 }, { "epoch": 1.1754329837182558, "grad_norm": 0.7600230009902272, "learning_rate": 2.183715057070077e-06, "loss": 0.09001312255859376, "step": 135940 }, { "epoch": 1.175476217239799, "grad_norm": 1.5121379943376572, "learning_rate": 2.183519012553218e-06, "loss": 0.029108428955078126, "step": 135945 }, { "epoch": 1.1755194507613422, "grad_norm": 0.45230072712361086, "learning_rate": 2.1833229718018974e-06, "loss": 0.06507720947265624, "step": 135950 }, { "epoch": 1.1755626842828857, "grad_norm": 4.252426943929874, "learning_rate": 2.1831269348170175e-06, "loss": 0.026696014404296874, "step": 135955 }, { "epoch": 1.175605917804429, "grad_norm": 4.0112549405435125, "learning_rate": 2.1829309015994855e-06, "loss": 0.06100616455078125, "step": 135960 }, { "epoch": 1.1756491513259721, "grad_norm": 25.748050866909175, "learning_rate": 2.182734872150204e-06, "loss": 0.10779857635498047, "step": 135965 }, { "epoch": 1.1756923848475154, "grad_norm": 1.6209487926674504, "learning_rate": 2.182538846470077e-06, "loss": 0.057513427734375, "step": 135970 }, { "epoch": 1.1757356183690586, "grad_norm": 0.3585909563098068, "learning_rate": 2.1823428245600084e-06, "loss": 0.0079742431640625, "step": 135975 }, { "epoch": 1.1757788518906018, "grad_norm": 2.3084330396184103, "learning_rate": 2.1821468064209025e-06, "loss": 0.009328079223632813, "step": 135980 }, { "epoch": 1.175822085412145, "grad_norm": 0.1508253747698342, "learning_rate": 2.1819507920536636e-06, "loss": 0.09381303787231446, "step": 135985 }, { "epoch": 1.1758653189336885, "grad_norm": 0.13379599492325286, "learning_rate": 2.1817547814591937e-06, "loss": 0.05394611358642578, "step": 135990 }, { "epoch": 1.1759085524552317, "grad_norm": 22.33777687206901, "learning_rate": 2.1815587746384004e-06, "loss": 0.07233963012695313, "step": 135995 }, { "epoch": 1.175951785976775, "grad_norm": 1.0874525528714303, "learning_rate": 2.1813627715921846e-06, "loss": 0.03777618408203125, "step": 136000 }, { "epoch": 1.1759950194983182, "grad_norm": 12.630824018202821, "learning_rate": 2.181166772321452e-06, "loss": 0.02577037811279297, "step": 136005 }, { "epoch": 1.1760382530198614, "grad_norm": 1.8030581340173926, "learning_rate": 2.180970776827106e-06, "loss": 0.01457672119140625, "step": 136010 }, { "epoch": 1.1760814865414047, "grad_norm": 0.4882717083587967, "learning_rate": 2.18077478511005e-06, "loss": 0.06643753051757813, "step": 136015 }, { "epoch": 1.176124720062948, "grad_norm": 2.7900838004270687, "learning_rate": 2.180578797171188e-06, "loss": 0.016504669189453126, "step": 136020 }, { "epoch": 1.1761679535844913, "grad_norm": 3.030039831366421, "learning_rate": 2.180382813011424e-06, "loss": 0.019738006591796874, "step": 136025 }, { "epoch": 1.1762111871060346, "grad_norm": 12.561278391985697, "learning_rate": 2.180186832631663e-06, "loss": 0.123046875, "step": 136030 }, { "epoch": 1.1762544206275778, "grad_norm": 0.4022469778093203, "learning_rate": 2.179990856032808e-06, "loss": 0.088470458984375, "step": 136035 }, { "epoch": 1.176297654149121, "grad_norm": 9.981491307195459, "learning_rate": 2.179794883215762e-06, "loss": 0.11034698486328125, "step": 136040 }, { "epoch": 1.1763408876706642, "grad_norm": 19.517441399806227, "learning_rate": 2.1795989141814293e-06, "loss": 0.04881591796875, "step": 136045 }, { "epoch": 1.1763841211922075, "grad_norm": 0.4154829730719481, "learning_rate": 2.1794029489307142e-06, "loss": 0.04733734130859375, "step": 136050 }, { "epoch": 1.176427354713751, "grad_norm": 2.0621132968449363, "learning_rate": 2.1792069874645204e-06, "loss": 0.04402084350585937, "step": 136055 }, { "epoch": 1.1764705882352942, "grad_norm": 4.973889636740747, "learning_rate": 2.1790110297837514e-06, "loss": 0.24368438720703126, "step": 136060 }, { "epoch": 1.1765138217568374, "grad_norm": 3.5767821054365636, "learning_rate": 2.1788150758893113e-06, "loss": 0.05855846405029297, "step": 136065 }, { "epoch": 1.1765570552783806, "grad_norm": 33.83807938733083, "learning_rate": 2.1786191257821032e-06, "loss": 0.11605186462402343, "step": 136070 }, { "epoch": 1.1766002887999238, "grad_norm": 20.416738616865345, "learning_rate": 2.178423179463031e-06, "loss": 0.26545028686523436, "step": 136075 }, { "epoch": 1.1766435223214673, "grad_norm": 1.2190980082533962, "learning_rate": 2.178227236932999e-06, "loss": 0.087713623046875, "step": 136080 }, { "epoch": 1.1766867558430105, "grad_norm": 43.02351208948724, "learning_rate": 2.1780312981929088e-06, "loss": 0.08278121948242187, "step": 136085 }, { "epoch": 1.1767299893645538, "grad_norm": 47.74692511544248, "learning_rate": 2.177835363243667e-06, "loss": 0.28122482299804685, "step": 136090 }, { "epoch": 1.176773222886097, "grad_norm": 2.2684733621139173, "learning_rate": 2.1776394320861763e-06, "loss": 0.1711639404296875, "step": 136095 }, { "epoch": 1.1768164564076402, "grad_norm": 1.504459312048285, "learning_rate": 2.1774435047213396e-06, "loss": 0.02997283935546875, "step": 136100 }, { "epoch": 1.1768596899291834, "grad_norm": 7.0846962544670795, "learning_rate": 2.1772475811500597e-06, "loss": 0.046613311767578124, "step": 136105 }, { "epoch": 1.1769029234507267, "grad_norm": 22.646445451723295, "learning_rate": 2.1770516613732423e-06, "loss": 0.06699638366699219, "step": 136110 }, { "epoch": 1.17694615697227, "grad_norm": 4.016611041607673, "learning_rate": 2.176855745391789e-06, "loss": 0.052245712280273436, "step": 136115 }, { "epoch": 1.1769893904938133, "grad_norm": 25.498414037848494, "learning_rate": 2.1766598332066046e-06, "loss": 0.1215301513671875, "step": 136120 }, { "epoch": 1.1770326240153566, "grad_norm": 1.4499967095601618, "learning_rate": 2.1764639248185932e-06, "loss": 0.101220703125, "step": 136125 }, { "epoch": 1.1770758575368998, "grad_norm": 1.0180116150754142, "learning_rate": 2.176268020228657e-06, "loss": 0.0460906982421875, "step": 136130 }, { "epoch": 1.177119091058443, "grad_norm": 2.879821324638065, "learning_rate": 2.176072119437699e-06, "loss": 0.028746414184570312, "step": 136135 }, { "epoch": 1.1771623245799863, "grad_norm": 0.9736554387878877, "learning_rate": 2.1758762224466245e-06, "loss": 0.023383331298828126, "step": 136140 }, { "epoch": 1.1772055581015297, "grad_norm": 6.002274093528516, "learning_rate": 2.175680329256336e-06, "loss": 0.03519439697265625, "step": 136145 }, { "epoch": 1.177248791623073, "grad_norm": 1.314894491607286, "learning_rate": 2.175484439867736e-06, "loss": 0.06360969543457032, "step": 136150 }, { "epoch": 1.1772920251446162, "grad_norm": 0.6147371805877917, "learning_rate": 2.17528855428173e-06, "loss": 0.0916229248046875, "step": 136155 }, { "epoch": 1.1773352586661594, "grad_norm": 11.76682706396422, "learning_rate": 2.1750926724992203e-06, "loss": 0.05531005859375, "step": 136160 }, { "epoch": 1.1773784921877026, "grad_norm": 2.913378892437486, "learning_rate": 2.1748967945211092e-06, "loss": 0.0222442626953125, "step": 136165 }, { "epoch": 1.1774217257092459, "grad_norm": 24.665159085014892, "learning_rate": 2.1747009203483026e-06, "loss": 0.08992538452148438, "step": 136170 }, { "epoch": 1.177464959230789, "grad_norm": 0.13647082873060393, "learning_rate": 2.1745050499817024e-06, "loss": 0.2740201950073242, "step": 136175 }, { "epoch": 1.1775081927523325, "grad_norm": 0.45648441344323354, "learning_rate": 2.17430918342221e-06, "loss": 0.12256698608398438, "step": 136180 }, { "epoch": 1.1775514262738758, "grad_norm": 9.314471152928594, "learning_rate": 2.174113320670732e-06, "loss": 0.08104953765869141, "step": 136185 }, { "epoch": 1.177594659795419, "grad_norm": 14.763741507315183, "learning_rate": 2.173917461728171e-06, "loss": 0.06793670654296875, "step": 136190 }, { "epoch": 1.1776378933169622, "grad_norm": 1.4497437112865734, "learning_rate": 2.173721606595429e-06, "loss": 0.10117225646972657, "step": 136195 }, { "epoch": 1.1776811268385055, "grad_norm": 0.08290094657166688, "learning_rate": 2.1735257552734103e-06, "loss": 0.05637283325195312, "step": 136200 }, { "epoch": 1.1777243603600487, "grad_norm": 8.353450175061976, "learning_rate": 2.173329907763018e-06, "loss": 0.0358428955078125, "step": 136205 }, { "epoch": 1.1777675938815921, "grad_norm": 0.30048327790291185, "learning_rate": 2.1731340640651537e-06, "loss": 0.3773059844970703, "step": 136210 }, { "epoch": 1.1778108274031354, "grad_norm": 2.5494829579143277, "learning_rate": 2.172938224180723e-06, "loss": 0.0333221435546875, "step": 136215 }, { "epoch": 1.1778540609246786, "grad_norm": 0.26415592281051214, "learning_rate": 2.172742388110629e-06, "loss": 0.10937843322753907, "step": 136220 }, { "epoch": 1.1778972944462218, "grad_norm": 47.37464280794261, "learning_rate": 2.1725465558557726e-06, "loss": 0.20207901000976564, "step": 136225 }, { "epoch": 1.177940527967765, "grad_norm": 7.167886858164615, "learning_rate": 2.172350727417059e-06, "loss": 0.04692916870117188, "step": 136230 }, { "epoch": 1.1779837614893083, "grad_norm": 4.183395875100449, "learning_rate": 2.1721549027953904e-06, "loss": 0.04485626220703125, "step": 136235 }, { "epoch": 1.1780269950108515, "grad_norm": 1.0700643524714175, "learning_rate": 2.171959081991671e-06, "loss": 0.05039901733398437, "step": 136240 }, { "epoch": 1.178070228532395, "grad_norm": 17.80574578934326, "learning_rate": 2.171763265006802e-06, "loss": 0.05625, "step": 136245 }, { "epoch": 1.1781134620539382, "grad_norm": 15.781963427456196, "learning_rate": 2.1715674518416876e-06, "loss": 0.19627685546875, "step": 136250 }, { "epoch": 1.1781566955754814, "grad_norm": 1.8958954486221489, "learning_rate": 2.171371642497232e-06, "loss": 0.0388519287109375, "step": 136255 }, { "epoch": 1.1781999290970246, "grad_norm": 0.819697125615935, "learning_rate": 2.171175836974337e-06, "loss": 0.023729324340820312, "step": 136260 }, { "epoch": 1.1782431626185679, "grad_norm": 0.37785044776699356, "learning_rate": 2.1709800352739056e-06, "loss": 0.1805126190185547, "step": 136265 }, { "epoch": 1.178286396140111, "grad_norm": 4.794270089593496, "learning_rate": 2.170784237396841e-06, "loss": 0.05611724853515625, "step": 136270 }, { "epoch": 1.1783296296616546, "grad_norm": 4.829358527427721, "learning_rate": 2.1705884433440454e-06, "loss": 0.11441688537597657, "step": 136275 }, { "epoch": 1.1783728631831978, "grad_norm": 6.189892080282801, "learning_rate": 2.1703926531164233e-06, "loss": 0.03274688720703125, "step": 136280 }, { "epoch": 1.178416096704741, "grad_norm": 14.425999940738748, "learning_rate": 2.170196866714877e-06, "loss": 0.028591156005859375, "step": 136285 }, { "epoch": 1.1784593302262842, "grad_norm": 3.772347140608787, "learning_rate": 2.1700010841403103e-06, "loss": 0.0946319580078125, "step": 136290 }, { "epoch": 1.1785025637478275, "grad_norm": 3.2090213811632364, "learning_rate": 2.1698053053936247e-06, "loss": 0.04427032470703125, "step": 136295 }, { "epoch": 1.1785457972693707, "grad_norm": 1.415495550839214, "learning_rate": 2.169609530475723e-06, "loss": 0.07074661254882812, "step": 136300 }, { "epoch": 1.178589030790914, "grad_norm": 14.894859903969861, "learning_rate": 2.1694137593875087e-06, "loss": 0.11403212547302247, "step": 136305 }, { "epoch": 1.1786322643124574, "grad_norm": 4.9506147924818915, "learning_rate": 2.169217992129885e-06, "loss": 0.1072906494140625, "step": 136310 }, { "epoch": 1.1786754978340006, "grad_norm": 6.464680680384122, "learning_rate": 2.169022228703755e-06, "loss": 0.16910324096679688, "step": 136315 }, { "epoch": 1.1787187313555438, "grad_norm": 4.944719728653225, "learning_rate": 2.1688264691100208e-06, "loss": 0.1239837646484375, "step": 136320 }, { "epoch": 1.178761964877087, "grad_norm": 4.94528892508745, "learning_rate": 2.168630713349586e-06, "loss": 0.024346542358398438, "step": 136325 }, { "epoch": 1.1788051983986303, "grad_norm": 5.7430786951544315, "learning_rate": 2.168434961423352e-06, "loss": 0.048276519775390624, "step": 136330 }, { "epoch": 1.1788484319201737, "grad_norm": 0.5725401079166434, "learning_rate": 2.168239213332223e-06, "loss": 0.23799095153808594, "step": 136335 }, { "epoch": 1.178891665441717, "grad_norm": 2.365016127486399, "learning_rate": 2.1680434690771e-06, "loss": 0.15163040161132812, "step": 136340 }, { "epoch": 1.1789348989632602, "grad_norm": 8.076197720771543, "learning_rate": 2.167847728658889e-06, "loss": 0.1154815673828125, "step": 136345 }, { "epoch": 1.1789781324848034, "grad_norm": 1.7928178568333342, "learning_rate": 2.16765199207849e-06, "loss": 0.20245819091796874, "step": 136350 }, { "epoch": 1.1790213660063467, "grad_norm": 6.8355753561049415, "learning_rate": 2.1674562593368064e-06, "loss": 0.03519134521484375, "step": 136355 }, { "epoch": 1.17906459952789, "grad_norm": 8.361481837803556, "learning_rate": 2.1672605304347407e-06, "loss": 0.03843374252319336, "step": 136360 }, { "epoch": 1.1791078330494331, "grad_norm": 0.2988396290167496, "learning_rate": 2.1670648053731965e-06, "loss": 0.09163379669189453, "step": 136365 }, { "epoch": 1.1791510665709766, "grad_norm": 6.994795559157086, "learning_rate": 2.166869084153074e-06, "loss": 0.01724395751953125, "step": 136370 }, { "epoch": 1.1791943000925198, "grad_norm": 22.776326565231123, "learning_rate": 2.1666733667752796e-06, "loss": 0.07335205078125, "step": 136375 }, { "epoch": 1.179237533614063, "grad_norm": 1.7053860016971154, "learning_rate": 2.1664776532407137e-06, "loss": 0.03681488037109375, "step": 136380 }, { "epoch": 1.1792807671356063, "grad_norm": 32.63999328582743, "learning_rate": 2.1662819435502793e-06, "loss": 0.15687332153320313, "step": 136385 }, { "epoch": 1.1793240006571495, "grad_norm": 58.01424832048083, "learning_rate": 2.166086237704878e-06, "loss": 0.17108001708984374, "step": 136390 }, { "epoch": 1.1793672341786927, "grad_norm": 1.7166228743893697, "learning_rate": 2.165890535705414e-06, "loss": 0.0530975341796875, "step": 136395 }, { "epoch": 1.1794104677002362, "grad_norm": 0.13507068061889582, "learning_rate": 2.165694837552788e-06, "loss": 0.172320556640625, "step": 136400 }, { "epoch": 1.1794537012217794, "grad_norm": 22.08091228846898, "learning_rate": 2.165499143247905e-06, "loss": 0.058489227294921876, "step": 136405 }, { "epoch": 1.1794969347433226, "grad_norm": 0.10809712032963739, "learning_rate": 2.1653034527916663e-06, "loss": 0.019968032836914062, "step": 136410 }, { "epoch": 1.1795401682648659, "grad_norm": 3.8723652686131445, "learning_rate": 2.165107766184974e-06, "loss": 0.08487014770507813, "step": 136415 }, { "epoch": 1.179583401786409, "grad_norm": 3.8286902882546996, "learning_rate": 2.1649120834287305e-06, "loss": 0.03329315185546875, "step": 136420 }, { "epoch": 1.1796266353079523, "grad_norm": 0.9243040697779685, "learning_rate": 2.1647164045238395e-06, "loss": 0.117962646484375, "step": 136425 }, { "epoch": 1.1796698688294955, "grad_norm": 21.02748355668387, "learning_rate": 2.164520729471202e-06, "loss": 0.31047782897949217, "step": 136430 }, { "epoch": 1.179713102351039, "grad_norm": 1.0796102992355192, "learning_rate": 2.1643250582717204e-06, "loss": 0.0076732635498046875, "step": 136435 }, { "epoch": 1.1797563358725822, "grad_norm": 0.4926092170257662, "learning_rate": 2.164129390926299e-06, "loss": 0.22415733337402344, "step": 136440 }, { "epoch": 1.1797995693941254, "grad_norm": 41.19633531608077, "learning_rate": 2.1639337274358385e-06, "loss": 0.19427108764648438, "step": 136445 }, { "epoch": 1.1798428029156687, "grad_norm": 15.221336666075835, "learning_rate": 2.163738067801242e-06, "loss": 0.05867900848388672, "step": 136450 }, { "epoch": 1.179886036437212, "grad_norm": 39.05702616857593, "learning_rate": 2.163542412023412e-06, "loss": 0.2541961669921875, "step": 136455 }, { "epoch": 1.1799292699587551, "grad_norm": 4.306777506727566, "learning_rate": 2.1633467601032503e-06, "loss": 0.0284881591796875, "step": 136460 }, { "epoch": 1.1799725034802986, "grad_norm": 33.6133963766939, "learning_rate": 2.163151112041658e-06, "loss": 0.17840423583984374, "step": 136465 }, { "epoch": 1.1800157370018418, "grad_norm": 0.6408600879098452, "learning_rate": 2.1629554678395403e-06, "loss": 0.008979034423828126, "step": 136470 }, { "epoch": 1.180058970523385, "grad_norm": 1.7242754273818715, "learning_rate": 2.162759827497797e-06, "loss": 0.14636878967285155, "step": 136475 }, { "epoch": 1.1801022040449283, "grad_norm": 2.346632508866449, "learning_rate": 2.1625641910173325e-06, "loss": 0.034088134765625, "step": 136480 }, { "epoch": 1.1801454375664715, "grad_norm": 3.2632050368505428, "learning_rate": 2.1623685583990477e-06, "loss": 0.02151641845703125, "step": 136485 }, { "epoch": 1.1801886710880147, "grad_norm": 3.2433192491089975, "learning_rate": 2.162172929643845e-06, "loss": 0.0394927978515625, "step": 136490 }, { "epoch": 1.180231904609558, "grad_norm": 0.6447582001114774, "learning_rate": 2.1619773047526265e-06, "loss": 0.04859027862548828, "step": 136495 }, { "epoch": 1.1802751381311014, "grad_norm": 4.691412630271948, "learning_rate": 2.1617816837262943e-06, "loss": 0.24203052520751953, "step": 136500 }, { "epoch": 1.1803183716526446, "grad_norm": 0.7024666180898387, "learning_rate": 2.161586066565751e-06, "loss": 0.1169778823852539, "step": 136505 }, { "epoch": 1.1803616051741879, "grad_norm": 1.3364598192601187, "learning_rate": 2.161390453271899e-06, "loss": 0.017483997344970702, "step": 136510 }, { "epoch": 1.180404838695731, "grad_norm": 0.9287818055722843, "learning_rate": 2.1611948438456406e-06, "loss": 0.054282760620117186, "step": 136515 }, { "epoch": 1.1804480722172743, "grad_norm": 0.7875222552620307, "learning_rate": 2.1609992382878774e-06, "loss": 0.14098892211914063, "step": 136520 }, { "epoch": 1.1804913057388176, "grad_norm": 6.350215030434358, "learning_rate": 2.1608036365995106e-06, "loss": 0.031023406982421876, "step": 136525 }, { "epoch": 1.180534539260361, "grad_norm": 0.8538071823617713, "learning_rate": 2.1606080387814436e-06, "loss": 0.06092414855957031, "step": 136530 }, { "epoch": 1.1805777727819042, "grad_norm": 6.16239614645737, "learning_rate": 2.160412444834578e-06, "loss": 0.020355033874511718, "step": 136535 }, { "epoch": 1.1806210063034475, "grad_norm": 0.15409054977267939, "learning_rate": 2.160216854759817e-06, "loss": 0.025872039794921874, "step": 136540 }, { "epoch": 1.1806642398249907, "grad_norm": 11.921061946181487, "learning_rate": 2.1600212685580614e-06, "loss": 0.21059112548828124, "step": 136545 }, { "epoch": 1.180707473346534, "grad_norm": 3.122732493246429, "learning_rate": 2.1598256862302137e-06, "loss": 0.01569643020629883, "step": 136550 }, { "epoch": 1.1807507068680771, "grad_norm": 0.17874679685379632, "learning_rate": 2.159630107777175e-06, "loss": 0.009689712524414062, "step": 136555 }, { "epoch": 1.1807939403896204, "grad_norm": 1.0013778338974413, "learning_rate": 2.1594345331998475e-06, "loss": 0.149957275390625, "step": 136560 }, { "epoch": 1.1808371739111638, "grad_norm": 13.438023514070018, "learning_rate": 2.1592389624991337e-06, "loss": 0.09857101440429687, "step": 136565 }, { "epoch": 1.180880407432707, "grad_norm": 0.09198660544051841, "learning_rate": 2.159043395675937e-06, "loss": 0.011076927185058594, "step": 136570 }, { "epoch": 1.1809236409542503, "grad_norm": 5.728634838407282, "learning_rate": 2.1588478327311566e-06, "loss": 0.020623397827148438, "step": 136575 }, { "epoch": 1.1809668744757935, "grad_norm": 23.186073643239936, "learning_rate": 2.158652273665697e-06, "loss": 0.139208984375, "step": 136580 }, { "epoch": 1.1810101079973367, "grad_norm": 11.194280322779594, "learning_rate": 2.158456718480457e-06, "loss": 0.08724517822265625, "step": 136585 }, { "epoch": 1.1810533415188802, "grad_norm": 3.8127969401995845, "learning_rate": 2.1582611671763417e-06, "loss": 0.01722869873046875, "step": 136590 }, { "epoch": 1.1810965750404234, "grad_norm": 0.448675272006371, "learning_rate": 2.15806561975425e-06, "loss": 0.012467765808105468, "step": 136595 }, { "epoch": 1.1811398085619667, "grad_norm": 1.5196566644836917, "learning_rate": 2.1578700762150862e-06, "loss": 0.03192596435546875, "step": 136600 }, { "epoch": 1.1811830420835099, "grad_norm": 0.6439247360219893, "learning_rate": 2.157674536559751e-06, "loss": 0.0767303466796875, "step": 136605 }, { "epoch": 1.181226275605053, "grad_norm": 2.509082288910682, "learning_rate": 2.1574790007891473e-06, "loss": 0.4013633728027344, "step": 136610 }, { "epoch": 1.1812695091265963, "grad_norm": 1.4093857398616267, "learning_rate": 2.157283468904175e-06, "loss": 0.10367279052734375, "step": 136615 }, { "epoch": 1.1813127426481396, "grad_norm": 1.0467779480200208, "learning_rate": 2.157087940905737e-06, "loss": 0.05435447692871094, "step": 136620 }, { "epoch": 1.181355976169683, "grad_norm": 0.7418903728325985, "learning_rate": 2.1568924167947347e-06, "loss": 0.11711063385009765, "step": 136625 }, { "epoch": 1.1813992096912262, "grad_norm": 1.7192485428626283, "learning_rate": 2.15669689657207e-06, "loss": 0.04070415496826172, "step": 136630 }, { "epoch": 1.1814424432127695, "grad_norm": 0.5037516877223672, "learning_rate": 2.1565013802386455e-06, "loss": 0.01109771728515625, "step": 136635 }, { "epoch": 1.1814856767343127, "grad_norm": 51.33801394407994, "learning_rate": 2.1563058677953625e-06, "loss": 0.14928741455078126, "step": 136640 }, { "epoch": 1.181528910255856, "grad_norm": 0.2956216859819918, "learning_rate": 2.156110359243121e-06, "loss": 0.039961624145507815, "step": 136645 }, { "epoch": 1.1815721437773992, "grad_norm": 1.7752786313147568, "learning_rate": 2.155914854582825e-06, "loss": 0.0517486572265625, "step": 136650 }, { "epoch": 1.1816153772989426, "grad_norm": 0.6238602011566858, "learning_rate": 2.1557193538153736e-06, "loss": 0.04334716796875, "step": 136655 }, { "epoch": 1.1816586108204858, "grad_norm": 17.169954610452276, "learning_rate": 2.155523856941671e-06, "loss": 0.03969020843505859, "step": 136660 }, { "epoch": 1.181701844342029, "grad_norm": 1.1441389570697666, "learning_rate": 2.1553283639626186e-06, "loss": 0.09718399047851563, "step": 136665 }, { "epoch": 1.1817450778635723, "grad_norm": 12.561684338331917, "learning_rate": 2.1551328748791155e-06, "loss": 0.13990325927734376, "step": 136670 }, { "epoch": 1.1817883113851155, "grad_norm": 2.9715852894651067, "learning_rate": 2.154937389692066e-06, "loss": 0.06011772155761719, "step": 136675 }, { "epoch": 1.1818315449066588, "grad_norm": 4.542533967450781, "learning_rate": 2.1547419084023706e-06, "loss": 0.03621330261230469, "step": 136680 }, { "epoch": 1.181874778428202, "grad_norm": 0.01654486415961691, "learning_rate": 2.154546431010931e-06, "loss": 0.04509658813476562, "step": 136685 }, { "epoch": 1.1819180119497454, "grad_norm": 0.1330640944807108, "learning_rate": 2.154350957518647e-06, "loss": 0.024353790283203124, "step": 136690 }, { "epoch": 1.1819612454712887, "grad_norm": 1.3106228547287895, "learning_rate": 2.154155487926423e-06, "loss": 0.0321929931640625, "step": 136695 }, { "epoch": 1.182004478992832, "grad_norm": 4.5320644462674915, "learning_rate": 2.1539600222351584e-06, "loss": 0.024754714965820313, "step": 136700 }, { "epoch": 1.1820477125143751, "grad_norm": 14.695968314009654, "learning_rate": 2.153764560445756e-06, "loss": 0.0462432861328125, "step": 136705 }, { "epoch": 1.1820909460359184, "grad_norm": 0.05581606702105669, "learning_rate": 2.153569102559117e-06, "loss": 0.2228445053100586, "step": 136710 }, { "epoch": 1.1821341795574616, "grad_norm": 0.7148735019513953, "learning_rate": 2.153373648576142e-06, "loss": 0.003436279296875, "step": 136715 }, { "epoch": 1.182177413079005, "grad_norm": 1.134272987813662, "learning_rate": 2.1531781984977325e-06, "loss": 0.03503265380859375, "step": 136720 }, { "epoch": 1.1822206466005483, "grad_norm": 5.8024899827275975, "learning_rate": 2.152982752324791e-06, "loss": 0.032043838500976564, "step": 136725 }, { "epoch": 1.1822638801220915, "grad_norm": 4.993229992555818, "learning_rate": 2.1527873100582177e-06, "loss": 0.01417388916015625, "step": 136730 }, { "epoch": 1.1823071136436347, "grad_norm": 4.365967059117303, "learning_rate": 2.152591871698915e-06, "loss": 0.02764739990234375, "step": 136735 }, { "epoch": 1.182350347165178, "grad_norm": 4.549407938944957, "learning_rate": 2.152396437247784e-06, "loss": 0.010919189453125, "step": 136740 }, { "epoch": 1.1823935806867212, "grad_norm": 5.429082206625261, "learning_rate": 2.1522010067057254e-06, "loss": 0.17424774169921875, "step": 136745 }, { "epoch": 1.1824368142082644, "grad_norm": 0.8633833831523025, "learning_rate": 2.1520055800736396e-06, "loss": 0.04385986328125, "step": 136750 }, { "epoch": 1.1824800477298079, "grad_norm": 0.5267796314893158, "learning_rate": 2.1518101573524307e-06, "loss": 0.02272186279296875, "step": 136755 }, { "epoch": 1.182523281251351, "grad_norm": 5.774368946545794, "learning_rate": 2.1516147385429974e-06, "loss": 0.04107513427734375, "step": 136760 }, { "epoch": 1.1825665147728943, "grad_norm": 3.4930524697867016, "learning_rate": 2.1514193236462427e-06, "loss": 0.07230300903320312, "step": 136765 }, { "epoch": 1.1826097482944375, "grad_norm": 4.535774997993125, "learning_rate": 2.1512239126630673e-06, "loss": 0.0854248046875, "step": 136770 }, { "epoch": 1.1826529818159808, "grad_norm": 1.0910291656980322, "learning_rate": 2.151028505594372e-06, "loss": 0.06272029876708984, "step": 136775 }, { "epoch": 1.1826962153375242, "grad_norm": 23.598828045018063, "learning_rate": 2.1508331024410575e-06, "loss": 0.2534446716308594, "step": 136780 }, { "epoch": 1.1827394488590675, "grad_norm": 0.3004969819017617, "learning_rate": 2.150637703204026e-06, "loss": 0.02914276123046875, "step": 136785 }, { "epoch": 1.1827826823806107, "grad_norm": 1.401243686675638, "learning_rate": 2.1504423078841783e-06, "loss": 0.168536376953125, "step": 136790 }, { "epoch": 1.182825915902154, "grad_norm": 10.600951279395586, "learning_rate": 2.1502469164824163e-06, "loss": 0.09454765319824218, "step": 136795 }, { "epoch": 1.1828691494236971, "grad_norm": 6.41799146336082, "learning_rate": 2.1500515289996402e-06, "loss": 0.0608245849609375, "step": 136800 }, { "epoch": 1.1829123829452404, "grad_norm": 0.4679313764987, "learning_rate": 2.1498561454367514e-06, "loss": 0.056102943420410153, "step": 136805 }, { "epoch": 1.1829556164667836, "grad_norm": 2.134738201665819, "learning_rate": 2.14966076579465e-06, "loss": 0.21866073608398437, "step": 136810 }, { "epoch": 1.1829988499883268, "grad_norm": 27.641912817160215, "learning_rate": 2.1494653900742386e-06, "loss": 0.20996551513671874, "step": 136815 }, { "epoch": 1.1830420835098703, "grad_norm": 1.6678879293692537, "learning_rate": 2.1492700182764175e-06, "loss": 0.017317962646484376, "step": 136820 }, { "epoch": 1.1830853170314135, "grad_norm": 16.059714501227123, "learning_rate": 2.1490746504020883e-06, "loss": 0.06313629150390625, "step": 136825 }, { "epoch": 1.1831285505529567, "grad_norm": 3.627317986401713, "learning_rate": 2.148879286452152e-06, "loss": 0.019886016845703125, "step": 136830 }, { "epoch": 1.1831717840745, "grad_norm": 2.7366785499228183, "learning_rate": 2.1486839264275087e-06, "loss": 0.143280029296875, "step": 136835 }, { "epoch": 1.1832150175960432, "grad_norm": 1.058094685390683, "learning_rate": 2.1484885703290597e-06, "loss": 0.00976104736328125, "step": 136840 }, { "epoch": 1.1832582511175866, "grad_norm": 0.10475571319044151, "learning_rate": 2.148293218157707e-06, "loss": 0.011181259155273437, "step": 136845 }, { "epoch": 1.1833014846391299, "grad_norm": 0.3854035802545965, "learning_rate": 2.148097869914349e-06, "loss": 0.0686197280883789, "step": 136850 }, { "epoch": 1.183344718160673, "grad_norm": 1.7278234968419726, "learning_rate": 2.14790252559989e-06, "loss": 0.06700286865234376, "step": 136855 }, { "epoch": 1.1833879516822163, "grad_norm": 4.930793047777584, "learning_rate": 2.147707185215229e-06, "loss": 0.31807861328125, "step": 136860 }, { "epoch": 1.1834311852037596, "grad_norm": 30.474588321031835, "learning_rate": 2.147511848761267e-06, "loss": 0.18791236877441406, "step": 136865 }, { "epoch": 1.1834744187253028, "grad_norm": 1.281094465024427, "learning_rate": 2.1473165162389047e-06, "loss": 0.04571609497070313, "step": 136870 }, { "epoch": 1.183517652246846, "grad_norm": 2.8049229901552133, "learning_rate": 2.1471211876490433e-06, "loss": 0.152480411529541, "step": 136875 }, { "epoch": 1.1835608857683895, "grad_norm": 4.349119801372064, "learning_rate": 2.146925862992583e-06, "loss": 0.025677490234375, "step": 136880 }, { "epoch": 1.1836041192899327, "grad_norm": 3.1469440558950295, "learning_rate": 2.146730542270426e-06, "loss": 0.022141265869140624, "step": 136885 }, { "epoch": 1.183647352811476, "grad_norm": 14.7683109342428, "learning_rate": 2.1465352254834727e-06, "loss": 0.14005889892578124, "step": 136890 }, { "epoch": 1.1836905863330192, "grad_norm": 0.07186595986544017, "learning_rate": 2.146339912632623e-06, "loss": 0.03420944213867187, "step": 136895 }, { "epoch": 1.1837338198545624, "grad_norm": 5.427289632426695, "learning_rate": 2.146144603718779e-06, "loss": 0.023936080932617187, "step": 136900 }, { "epoch": 1.1837770533761056, "grad_norm": 3.566298711550155, "learning_rate": 2.1459492987428402e-06, "loss": 0.015008544921875, "step": 136905 }, { "epoch": 1.183820286897649, "grad_norm": 34.81004933421178, "learning_rate": 2.1457539977057065e-06, "loss": 0.09953155517578124, "step": 136910 }, { "epoch": 1.1838635204191923, "grad_norm": 1.4187471257433026, "learning_rate": 2.1455587006082814e-06, "loss": 0.0395172119140625, "step": 136915 }, { "epoch": 1.1839067539407355, "grad_norm": 27.884990071661168, "learning_rate": 2.145363407451464e-06, "loss": 0.5713644981384277, "step": 136920 }, { "epoch": 1.1839499874622788, "grad_norm": 44.69511348364024, "learning_rate": 2.1451681182361545e-06, "loss": 0.3834503173828125, "step": 136925 }, { "epoch": 1.183993220983822, "grad_norm": 1.2595529280880788, "learning_rate": 2.1449728329632547e-06, "loss": 0.04056625366210938, "step": 136930 }, { "epoch": 1.1840364545053652, "grad_norm": 0.08993612357632798, "learning_rate": 2.1447775516336647e-06, "loss": 0.09247512817382812, "step": 136935 }, { "epoch": 1.1840796880269084, "grad_norm": 0.25118987635355866, "learning_rate": 2.1445822742482845e-06, "loss": 0.012154388427734374, "step": 136940 }, { "epoch": 1.184122921548452, "grad_norm": 4.83892200395154, "learning_rate": 2.1443870008080144e-06, "loss": 0.023288726806640625, "step": 136945 }, { "epoch": 1.1841661550699951, "grad_norm": 6.93943063402155, "learning_rate": 2.1441917313137574e-06, "loss": 0.015662765502929686, "step": 136950 }, { "epoch": 1.1842093885915383, "grad_norm": 2.2652473656777405, "learning_rate": 2.1439964657664114e-06, "loss": 0.05804443359375, "step": 136955 }, { "epoch": 1.1842526221130816, "grad_norm": 1.94578336112189, "learning_rate": 2.143801204166879e-06, "loss": 0.004749870300292969, "step": 136960 }, { "epoch": 1.1842958556346248, "grad_norm": 4.336595122702513, "learning_rate": 2.1436059465160598e-06, "loss": 0.05752887725830078, "step": 136965 }, { "epoch": 1.184339089156168, "grad_norm": 17.32503747907069, "learning_rate": 2.143410692814854e-06, "loss": 0.1298431396484375, "step": 136970 }, { "epoch": 1.1843823226777115, "grad_norm": 5.601687467896195, "learning_rate": 2.1432154430641614e-06, "loss": 0.07754898071289062, "step": 136975 }, { "epoch": 1.1844255561992547, "grad_norm": 8.321931031195396, "learning_rate": 2.143020197264885e-06, "loss": 0.12086772918701172, "step": 136980 }, { "epoch": 1.184468789720798, "grad_norm": 3.034973701795758, "learning_rate": 2.1428249554179227e-06, "loss": 0.27593460083007815, "step": 136985 }, { "epoch": 1.1845120232423412, "grad_norm": 11.10600293486134, "learning_rate": 2.1426297175241768e-06, "loss": 0.04179229736328125, "step": 136990 }, { "epoch": 1.1845552567638844, "grad_norm": 2.387834538669062, "learning_rate": 2.1424344835845466e-06, "loss": 0.020232391357421876, "step": 136995 }, { "epoch": 1.1845984902854276, "grad_norm": 0.18426787624535765, "learning_rate": 2.1422392535999327e-06, "loss": 0.014127349853515625, "step": 137000 }, { "epoch": 1.1846417238069709, "grad_norm": 0.2949835790538636, "learning_rate": 2.142044027571235e-06, "loss": 0.06693592071533203, "step": 137005 }, { "epoch": 1.1846849573285143, "grad_norm": 0.681064687118617, "learning_rate": 2.141848805499355e-06, "loss": 0.038551712036132814, "step": 137010 }, { "epoch": 1.1847281908500575, "grad_norm": 12.03628534710598, "learning_rate": 2.141653587385192e-06, "loss": 0.2519031524658203, "step": 137015 }, { "epoch": 1.1847714243716008, "grad_norm": 0.16074680263808322, "learning_rate": 2.1414583732296474e-06, "loss": 0.0064280986785888675, "step": 137020 }, { "epoch": 1.184814657893144, "grad_norm": 2.28329464300501, "learning_rate": 2.1412631630336213e-06, "loss": 0.018253326416015625, "step": 137025 }, { "epoch": 1.1848578914146872, "grad_norm": 7.124391979004687, "learning_rate": 2.141067956798013e-06, "loss": 0.06789360046386719, "step": 137030 }, { "epoch": 1.1849011249362307, "grad_norm": 1.825086565274103, "learning_rate": 2.140872754523723e-06, "loss": 0.08690032958984376, "step": 137035 }, { "epoch": 1.184944358457774, "grad_norm": 12.079539176984746, "learning_rate": 2.1406775562116524e-06, "loss": 0.05164642333984375, "step": 137040 }, { "epoch": 1.1849875919793171, "grad_norm": 27.74030985528184, "learning_rate": 2.140482361862701e-06, "loss": 0.386260986328125, "step": 137045 }, { "epoch": 1.1850308255008604, "grad_norm": 23.413681044676377, "learning_rate": 2.140287171477769e-06, "loss": 0.053958988189697264, "step": 137050 }, { "epoch": 1.1850740590224036, "grad_norm": 90.0075955542109, "learning_rate": 2.1400919850577567e-06, "loss": 0.3309051513671875, "step": 137055 }, { "epoch": 1.1851172925439468, "grad_norm": 4.205869312346468, "learning_rate": 2.1398968026035644e-06, "loss": 0.057592010498046874, "step": 137060 }, { "epoch": 1.18516052606549, "grad_norm": 0.34937137342334146, "learning_rate": 2.1397016241160914e-06, "loss": 0.016500091552734374, "step": 137065 }, { "epoch": 1.1852037595870333, "grad_norm": 52.61697691560923, "learning_rate": 2.139506449596238e-06, "loss": 0.10601329803466797, "step": 137070 }, { "epoch": 1.1852469931085767, "grad_norm": 2.422968634254911, "learning_rate": 2.139311279044906e-06, "loss": 0.02037811279296875, "step": 137075 }, { "epoch": 1.18529022663012, "grad_norm": 1.5529352181438902, "learning_rate": 2.139116112462994e-06, "loss": 0.08039741516113282, "step": 137080 }, { "epoch": 1.1853334601516632, "grad_norm": 6.141964724701772, "learning_rate": 2.138920949851403e-06, "loss": 0.02557220458984375, "step": 137085 }, { "epoch": 1.1853766936732064, "grad_norm": 8.26204797203064, "learning_rate": 2.138725791211032e-06, "loss": 0.05674285888671875, "step": 137090 }, { "epoch": 1.1854199271947496, "grad_norm": 0.8160646529746266, "learning_rate": 2.138530636542781e-06, "loss": 0.035889053344726564, "step": 137095 }, { "epoch": 1.185463160716293, "grad_norm": 32.202677007971154, "learning_rate": 2.138335485847551e-06, "loss": 0.15253639221191406, "step": 137100 }, { "epoch": 1.1855063942378363, "grad_norm": 40.8388507069326, "learning_rate": 2.1381403391262408e-06, "loss": 0.3540149688720703, "step": 137105 }, { "epoch": 1.1855496277593796, "grad_norm": 4.689151459306824, "learning_rate": 2.1379451963797523e-06, "loss": 0.057331085205078125, "step": 137110 }, { "epoch": 1.1855928612809228, "grad_norm": 31.953709160263326, "learning_rate": 2.137750057608984e-06, "loss": 0.09087944030761719, "step": 137115 }, { "epoch": 1.185636094802466, "grad_norm": 52.52071470330433, "learning_rate": 2.137554922814836e-06, "loss": 0.08621063232421874, "step": 137120 }, { "epoch": 1.1856793283240092, "grad_norm": 5.746046987277402, "learning_rate": 2.1373597919982093e-06, "loss": 0.033138275146484375, "step": 137125 }, { "epoch": 1.1857225618455525, "grad_norm": 9.934616066147195, "learning_rate": 2.137164665160003e-06, "loss": 0.046882247924804686, "step": 137130 }, { "epoch": 1.185765795367096, "grad_norm": 0.5739962658291007, "learning_rate": 2.1369695423011154e-06, "loss": 0.10555953979492187, "step": 137135 }, { "epoch": 1.1858090288886391, "grad_norm": 20.500942423273422, "learning_rate": 2.1367744234224493e-06, "loss": 0.04649810791015625, "step": 137140 }, { "epoch": 1.1858522624101824, "grad_norm": 4.0434219076073905, "learning_rate": 2.136579308524904e-06, "loss": 0.12581634521484375, "step": 137145 }, { "epoch": 1.1858954959317256, "grad_norm": 0.6537438579118504, "learning_rate": 2.1363841976093773e-06, "loss": 0.02999725341796875, "step": 137150 }, { "epoch": 1.1859387294532688, "grad_norm": 2.431038969408564, "learning_rate": 2.1361890906767713e-06, "loss": 0.23019866943359374, "step": 137155 }, { "epoch": 1.185981962974812, "grad_norm": 1.2624927994481459, "learning_rate": 2.1359939877279844e-06, "loss": 0.02892303466796875, "step": 137160 }, { "epoch": 1.1860251964963555, "grad_norm": 25.984582247814885, "learning_rate": 2.1357988887639163e-06, "loss": 0.3334201812744141, "step": 137165 }, { "epoch": 1.1860684300178987, "grad_norm": 3.3293495148850223, "learning_rate": 2.1356037937854677e-06, "loss": 0.05373382568359375, "step": 137170 }, { "epoch": 1.186111663539442, "grad_norm": 0.8688116739717535, "learning_rate": 2.135408702793539e-06, "loss": 0.041123580932617185, "step": 137175 }, { "epoch": 1.1861548970609852, "grad_norm": 0.26916002250309307, "learning_rate": 2.1352136157890276e-06, "loss": 0.18870849609375, "step": 137180 }, { "epoch": 1.1861981305825284, "grad_norm": 2.8161999145366337, "learning_rate": 2.1350185327728356e-06, "loss": 0.013839340209960938, "step": 137185 }, { "epoch": 1.1862413641040717, "grad_norm": 4.688389928547216, "learning_rate": 2.1348234537458614e-06, "loss": 0.07870330810546874, "step": 137190 }, { "epoch": 1.186284597625615, "grad_norm": 1.406456205064578, "learning_rate": 2.1346283787090056e-06, "loss": 0.10125274658203125, "step": 137195 }, { "epoch": 1.1863278311471583, "grad_norm": 21.04081026199601, "learning_rate": 2.134433307663166e-06, "loss": 0.07392120361328125, "step": 137200 }, { "epoch": 1.1863710646687016, "grad_norm": 5.095990247888941, "learning_rate": 2.134238240609244e-06, "loss": 0.07143478393554688, "step": 137205 }, { "epoch": 1.1864142981902448, "grad_norm": 2.281301145029493, "learning_rate": 2.1340431775481384e-06, "loss": 0.13701171875, "step": 137210 }, { "epoch": 1.186457531711788, "grad_norm": 7.997680148938295, "learning_rate": 2.13384811848075e-06, "loss": 0.3999660491943359, "step": 137215 }, { "epoch": 1.1865007652333313, "grad_norm": 0.88916598718675, "learning_rate": 2.1336530634079773e-06, "loss": 0.144207763671875, "step": 137220 }, { "epoch": 1.1865439987548745, "grad_norm": 0.980024169717198, "learning_rate": 2.13345801233072e-06, "loss": 0.18268928527832032, "step": 137225 }, { "epoch": 1.186587232276418, "grad_norm": 12.81127941236662, "learning_rate": 2.133262965249877e-06, "loss": 0.02723979949951172, "step": 137230 }, { "epoch": 1.1866304657979612, "grad_norm": 1.1363314446256756, "learning_rate": 2.1330679221663495e-06, "loss": 0.10653457641601563, "step": 137235 }, { "epoch": 1.1866736993195044, "grad_norm": 40.916804692203364, "learning_rate": 2.132872883081036e-06, "loss": 0.3139739990234375, "step": 137240 }, { "epoch": 1.1867169328410476, "grad_norm": 0.34906966183314253, "learning_rate": 2.132677847994836e-06, "loss": 0.03871917724609375, "step": 137245 }, { "epoch": 1.1867601663625909, "grad_norm": 30.81020284477104, "learning_rate": 2.1324828169086498e-06, "loss": 0.23165283203125, "step": 137250 }, { "epoch": 1.186803399884134, "grad_norm": 0.5994872972146069, "learning_rate": 2.1322877898233757e-06, "loss": 0.05431289672851562, "step": 137255 }, { "epoch": 1.1868466334056773, "grad_norm": 30.172131850356045, "learning_rate": 2.132092766739913e-06, "loss": 0.25528945922851565, "step": 137260 }, { "epoch": 1.1868898669272208, "grad_norm": 5.126251235050765, "learning_rate": 2.1318977476591626e-06, "loss": 0.170977783203125, "step": 137265 }, { "epoch": 1.186933100448764, "grad_norm": 6.9828761256617415, "learning_rate": 2.1317027325820226e-06, "loss": 0.012158584594726563, "step": 137270 }, { "epoch": 1.1869763339703072, "grad_norm": 3.9805356151091518, "learning_rate": 2.131507721509393e-06, "loss": 0.1148468017578125, "step": 137275 }, { "epoch": 1.1870195674918504, "grad_norm": 11.50148335775541, "learning_rate": 2.1313127144421736e-06, "loss": 0.04836330413818359, "step": 137280 }, { "epoch": 1.1870628010133937, "grad_norm": 21.35317038921864, "learning_rate": 2.1311177113812633e-06, "loss": 0.1897064208984375, "step": 137285 }, { "epoch": 1.1871060345349371, "grad_norm": 2.731122215148192, "learning_rate": 2.1309227123275604e-06, "loss": 0.05495147705078125, "step": 137290 }, { "epoch": 1.1871492680564804, "grad_norm": 0.4564344002786457, "learning_rate": 2.1307277172819644e-06, "loss": 0.12155914306640625, "step": 137295 }, { "epoch": 1.1871925015780236, "grad_norm": 22.19226150576236, "learning_rate": 2.130532726245377e-06, "loss": 0.12317581176757812, "step": 137300 }, { "epoch": 1.1872357350995668, "grad_norm": 2.1490726570370073, "learning_rate": 2.1303377392186954e-06, "loss": 0.008603668212890625, "step": 137305 }, { "epoch": 1.18727896862111, "grad_norm": 0.25270295623098693, "learning_rate": 2.1301427562028193e-06, "loss": 0.049347305297851564, "step": 137310 }, { "epoch": 1.1873222021426533, "grad_norm": 0.22954838807689326, "learning_rate": 2.129947777198648e-06, "loss": 0.06608295440673828, "step": 137315 }, { "epoch": 1.1873654356641965, "grad_norm": 23.02829829538801, "learning_rate": 2.1297528022070805e-06, "loss": 0.036035919189453126, "step": 137320 }, { "epoch": 1.18740866918574, "grad_norm": 1.6055136361769535, "learning_rate": 2.1295578312290156e-06, "loss": 0.01204376220703125, "step": 137325 }, { "epoch": 1.1874519027072832, "grad_norm": 4.242230152408065, "learning_rate": 2.129362864265354e-06, "loss": 0.047855377197265625, "step": 137330 }, { "epoch": 1.1874951362288264, "grad_norm": 2.6348707881144486, "learning_rate": 2.129167901316994e-06, "loss": 0.06351280212402344, "step": 137335 }, { "epoch": 1.1875383697503696, "grad_norm": 0.8399208592501526, "learning_rate": 2.1289729423848348e-06, "loss": 0.1732147216796875, "step": 137340 }, { "epoch": 1.1875816032719129, "grad_norm": 10.106270888605858, "learning_rate": 2.1287779874697744e-06, "loss": 0.31241455078125, "step": 137345 }, { "epoch": 1.187624836793456, "grad_norm": 6.809666576282685, "learning_rate": 2.128583036572714e-06, "loss": 0.057390785217285155, "step": 137350 }, { "epoch": 1.1876680703149995, "grad_norm": 3.3559958419268194, "learning_rate": 2.12838808969455e-06, "loss": 0.0499969482421875, "step": 137355 }, { "epoch": 1.1877113038365428, "grad_norm": 0.23957578852413727, "learning_rate": 2.1281931468361843e-06, "loss": 0.0247039794921875, "step": 137360 }, { "epoch": 1.187754537358086, "grad_norm": 3.2936734990273693, "learning_rate": 2.1279982079985152e-06, "loss": 0.16567535400390626, "step": 137365 }, { "epoch": 1.1877977708796292, "grad_norm": 2.562703982614621, "learning_rate": 2.1278032731824413e-06, "loss": 0.031397247314453126, "step": 137370 }, { "epoch": 1.1878410044011725, "grad_norm": 0.3758339278626724, "learning_rate": 2.1276083423888607e-06, "loss": 0.05019874572753906, "step": 137375 }, { "epoch": 1.1878842379227157, "grad_norm": 57.89182730928034, "learning_rate": 2.1274134156186744e-06, "loss": 0.21787528991699218, "step": 137380 }, { "epoch": 1.187927471444259, "grad_norm": 20.807236438984056, "learning_rate": 2.12721849287278e-06, "loss": 0.16295814514160156, "step": 137385 }, { "epoch": 1.1879707049658024, "grad_norm": 5.903407770234463, "learning_rate": 2.1270235741520753e-06, "loss": 0.130255126953125, "step": 137390 }, { "epoch": 1.1880139384873456, "grad_norm": 41.61700767129111, "learning_rate": 2.1268286594574626e-06, "loss": 0.24345645904541016, "step": 137395 }, { "epoch": 1.1880571720088888, "grad_norm": 7.948458103355531, "learning_rate": 2.1266337487898384e-06, "loss": 0.09929656982421875, "step": 137400 }, { "epoch": 1.188100405530432, "grad_norm": 4.174400382761302, "learning_rate": 2.126438842150102e-06, "loss": 0.026245880126953124, "step": 137405 }, { "epoch": 1.1881436390519753, "grad_norm": 13.43988807382409, "learning_rate": 2.126243939539153e-06, "loss": 0.09514389038085938, "step": 137410 }, { "epoch": 1.1881868725735185, "grad_norm": 2.3288979360159288, "learning_rate": 2.1260490409578893e-06, "loss": 0.08062477111816406, "step": 137415 }, { "epoch": 1.188230106095062, "grad_norm": 14.36111616245285, "learning_rate": 2.125854146407209e-06, "loss": 0.11404571533203126, "step": 137420 }, { "epoch": 1.1882733396166052, "grad_norm": 46.652270027239794, "learning_rate": 2.125659255888014e-06, "loss": 0.20543212890625, "step": 137425 }, { "epoch": 1.1883165731381484, "grad_norm": 5.521395092084674, "learning_rate": 2.125464369401201e-06, "loss": 0.05611572265625, "step": 137430 }, { "epoch": 1.1883598066596917, "grad_norm": 0.2504743021896586, "learning_rate": 2.1252694869476684e-06, "loss": 0.05844268798828125, "step": 137435 }, { "epoch": 1.1884030401812349, "grad_norm": 0.16859954588531187, "learning_rate": 2.125074608528316e-06, "loss": 0.047338104248046874, "step": 137440 }, { "epoch": 1.188446273702778, "grad_norm": 4.780997661181056, "learning_rate": 2.1248797341440423e-06, "loss": 0.3883644104003906, "step": 137445 }, { "epoch": 1.1884895072243213, "grad_norm": 1.2198136732149059, "learning_rate": 2.1246848637957464e-06, "loss": 0.09355506896972657, "step": 137450 }, { "epoch": 1.1885327407458648, "grad_norm": 8.731268679148892, "learning_rate": 2.1244899974843247e-06, "loss": 0.10717315673828125, "step": 137455 }, { "epoch": 1.188575974267408, "grad_norm": 82.01736336868254, "learning_rate": 2.124295135210679e-06, "loss": 0.23993301391601562, "step": 137460 }, { "epoch": 1.1886192077889512, "grad_norm": 1.2913544774544639, "learning_rate": 2.124100276975707e-06, "loss": 0.020749664306640624, "step": 137465 }, { "epoch": 1.1886624413104945, "grad_norm": 0.28118340465706676, "learning_rate": 2.1239054227803067e-06, "loss": 0.0559600830078125, "step": 137470 }, { "epoch": 1.1887056748320377, "grad_norm": 11.081870406176645, "learning_rate": 2.123710572625378e-06, "loss": 0.17809295654296875, "step": 137475 }, { "epoch": 1.1887489083535812, "grad_norm": 7.117760755958318, "learning_rate": 2.123515726511818e-06, "loss": 0.03301925659179687, "step": 137480 }, { "epoch": 1.1887921418751244, "grad_norm": 14.325210970483273, "learning_rate": 2.1233208844405258e-06, "loss": 0.21500053405761718, "step": 137485 }, { "epoch": 1.1888353753966676, "grad_norm": 1.7875278354113646, "learning_rate": 2.1231260464124003e-06, "loss": 0.1370269775390625, "step": 137490 }, { "epoch": 1.1888786089182108, "grad_norm": 0.11060527183946078, "learning_rate": 2.12293121242834e-06, "loss": 0.03408107757568359, "step": 137495 }, { "epoch": 1.188921842439754, "grad_norm": 6.08927363897772, "learning_rate": 2.122736382489244e-06, "loss": 0.11429901123046875, "step": 137500 }, { "epoch": 1.1889650759612973, "grad_norm": 12.598851247215304, "learning_rate": 2.12254155659601e-06, "loss": 0.12692832946777344, "step": 137505 }, { "epoch": 1.1890083094828405, "grad_norm": 0.22496756478251384, "learning_rate": 2.1223467347495372e-06, "loss": 0.07457389831542968, "step": 137510 }, { "epoch": 1.1890515430043838, "grad_norm": 0.12129762585457404, "learning_rate": 2.122151916950722e-06, "loss": 0.03363800048828125, "step": 137515 }, { "epoch": 1.1890947765259272, "grad_norm": 2.164925192829945, "learning_rate": 2.1219571032004657e-06, "loss": 0.22584686279296876, "step": 137520 }, { "epoch": 1.1891380100474704, "grad_norm": 1.550909313149649, "learning_rate": 2.121762293499666e-06, "loss": 0.1312580108642578, "step": 137525 }, { "epoch": 1.1891812435690137, "grad_norm": 19.825026108780357, "learning_rate": 2.1215674878492213e-06, "loss": 0.18934345245361328, "step": 137530 }, { "epoch": 1.189224477090557, "grad_norm": 17.499321771216525, "learning_rate": 2.1213726862500292e-06, "loss": 0.10388412475585937, "step": 137535 }, { "epoch": 1.1892677106121001, "grad_norm": 2.419091613543521, "learning_rate": 2.1211778887029886e-06, "loss": 0.03184089660644531, "step": 137540 }, { "epoch": 1.1893109441336436, "grad_norm": 0.4084604796397496, "learning_rate": 2.120983095208998e-06, "loss": 0.026990509033203124, "step": 137545 }, { "epoch": 1.1893541776551868, "grad_norm": 6.222174300463918, "learning_rate": 2.1207883057689546e-06, "loss": 0.17747955322265624, "step": 137550 }, { "epoch": 1.18939741117673, "grad_norm": 6.333259464462372, "learning_rate": 2.1205935203837596e-06, "loss": 0.09151573181152343, "step": 137555 }, { "epoch": 1.1894406446982733, "grad_norm": 2.828350467896455, "learning_rate": 2.120398739054309e-06, "loss": 0.08280305862426758, "step": 137560 }, { "epoch": 1.1894838782198165, "grad_norm": 1.5067547221801298, "learning_rate": 2.1202039617815017e-06, "loss": 0.07875709533691407, "step": 137565 }, { "epoch": 1.1895271117413597, "grad_norm": 15.734736719250641, "learning_rate": 2.1200091885662355e-06, "loss": 0.12082901000976562, "step": 137570 }, { "epoch": 1.189570345262903, "grad_norm": 0.5476125396026765, "learning_rate": 2.1198144194094103e-06, "loss": 0.08797683715820312, "step": 137575 }, { "epoch": 1.1896135787844464, "grad_norm": 0.3418596421656417, "learning_rate": 2.119619654311921e-06, "loss": 0.0260040283203125, "step": 137580 }, { "epoch": 1.1896568123059896, "grad_norm": 35.855786522940825, "learning_rate": 2.11942489327467e-06, "loss": 0.10571937561035157, "step": 137585 }, { "epoch": 1.1897000458275329, "grad_norm": 61.87890273516517, "learning_rate": 2.1192301362985533e-06, "loss": 0.14497604370117187, "step": 137590 }, { "epoch": 1.189743279349076, "grad_norm": 0.14575877658429642, "learning_rate": 2.1190353833844696e-06, "loss": 0.02748565673828125, "step": 137595 }, { "epoch": 1.1897865128706193, "grad_norm": 0.7885369593488748, "learning_rate": 2.118840634533316e-06, "loss": 0.02067985534667969, "step": 137600 }, { "epoch": 1.1898297463921625, "grad_norm": 0.7353016498471289, "learning_rate": 2.1186458897459923e-06, "loss": 0.10811424255371094, "step": 137605 }, { "epoch": 1.189872979913706, "grad_norm": 7.614877512805711, "learning_rate": 2.1184511490233945e-06, "loss": 0.07537689208984374, "step": 137610 }, { "epoch": 1.1899162134352492, "grad_norm": 0.9869098566035494, "learning_rate": 2.118256412366424e-06, "loss": 0.0317840576171875, "step": 137615 }, { "epoch": 1.1899594469567925, "grad_norm": 2.548830150869308, "learning_rate": 2.1180616797759763e-06, "loss": 0.152825927734375, "step": 137620 }, { "epoch": 1.1900026804783357, "grad_norm": 0.13846603729841386, "learning_rate": 2.1178669512529505e-06, "loss": 0.01729583740234375, "step": 137625 }, { "epoch": 1.190045913999879, "grad_norm": 1.708779816299705, "learning_rate": 2.1176722267982438e-06, "loss": 0.01761016845703125, "step": 137630 }, { "epoch": 1.1900891475214221, "grad_norm": 0.09204880629319573, "learning_rate": 2.1174775064127553e-06, "loss": 0.1507843017578125, "step": 137635 }, { "epoch": 1.1901323810429654, "grad_norm": 2.5535933198542566, "learning_rate": 2.117282790097382e-06, "loss": 0.09757156372070312, "step": 137640 }, { "epoch": 1.1901756145645088, "grad_norm": 5.507273124430331, "learning_rate": 2.117088077853022e-06, "loss": 0.034100341796875, "step": 137645 }, { "epoch": 1.190218848086052, "grad_norm": 6.71374906471874, "learning_rate": 2.1168933696805744e-06, "loss": 0.036106109619140625, "step": 137650 }, { "epoch": 1.1902620816075953, "grad_norm": 14.79099220022775, "learning_rate": 2.1166986655809377e-06, "loss": 0.03944854736328125, "step": 137655 }, { "epoch": 1.1903053151291385, "grad_norm": 0.30377494979857106, "learning_rate": 2.116503965555007e-06, "loss": 0.0188690185546875, "step": 137660 }, { "epoch": 1.1903485486506817, "grad_norm": 13.02830776181758, "learning_rate": 2.116309269603683e-06, "loss": 0.12111167907714844, "step": 137665 }, { "epoch": 1.190391782172225, "grad_norm": 15.51413138196862, "learning_rate": 2.1161145777278624e-06, "loss": 0.05136871337890625, "step": 137670 }, { "epoch": 1.1904350156937684, "grad_norm": 26.60662491613819, "learning_rate": 2.1159198899284417e-06, "loss": 0.17907371520996093, "step": 137675 }, { "epoch": 1.1904782492153116, "grad_norm": 11.365465894192079, "learning_rate": 2.1157252062063225e-06, "loss": 0.07342605590820313, "step": 137680 }, { "epoch": 1.1905214827368549, "grad_norm": 0.1917140491558968, "learning_rate": 2.1155305265623997e-06, "loss": 0.1084197998046875, "step": 137685 }, { "epoch": 1.190564716258398, "grad_norm": 0.11998106378482247, "learning_rate": 2.1153358509975716e-06, "loss": 0.052997589111328125, "step": 137690 }, { "epoch": 1.1906079497799413, "grad_norm": 0.1574867808296858, "learning_rate": 2.115141179512737e-06, "loss": 0.08243789672851562, "step": 137695 }, { "epoch": 1.1906511833014846, "grad_norm": 0.4413787498025961, "learning_rate": 2.114946512108793e-06, "loss": 0.1404582977294922, "step": 137700 }, { "epoch": 1.1906944168230278, "grad_norm": 0.11858392228554977, "learning_rate": 2.1147518487866364e-06, "loss": 0.017664718627929687, "step": 137705 }, { "epoch": 1.1907376503445712, "grad_norm": 16.937275296150165, "learning_rate": 2.1145571895471673e-06, "loss": 0.07893295288085937, "step": 137710 }, { "epoch": 1.1907808838661145, "grad_norm": 48.12533470964757, "learning_rate": 2.1143625343912817e-06, "loss": 0.10647964477539062, "step": 137715 }, { "epoch": 1.1908241173876577, "grad_norm": 4.356749958869818, "learning_rate": 2.1141678833198783e-06, "loss": 0.0968048095703125, "step": 137720 }, { "epoch": 1.190867350909201, "grad_norm": 16.05790242187009, "learning_rate": 2.1139732363338546e-06, "loss": 0.0633819580078125, "step": 137725 }, { "epoch": 1.1909105844307442, "grad_norm": 12.10972311601907, "learning_rate": 2.113778593434107e-06, "loss": 0.05055656433105469, "step": 137730 }, { "epoch": 1.1909538179522876, "grad_norm": 2.404158742265544, "learning_rate": 2.113583954621535e-06, "loss": 0.010098648071289063, "step": 137735 }, { "epoch": 1.1909970514738308, "grad_norm": 1.6752891124394274, "learning_rate": 2.1133893198970344e-06, "loss": 0.24563999176025392, "step": 137740 }, { "epoch": 1.191040284995374, "grad_norm": 6.382591956062064, "learning_rate": 2.1131946892615045e-06, "loss": 0.3754158020019531, "step": 137745 }, { "epoch": 1.1910835185169173, "grad_norm": 0.47357986769278, "learning_rate": 2.113000062715843e-06, "loss": 0.0501495361328125, "step": 137750 }, { "epoch": 1.1911267520384605, "grad_norm": 0.9277473808029624, "learning_rate": 2.1128054402609465e-06, "loss": 0.11808242797851562, "step": 137755 }, { "epoch": 1.1911699855600038, "grad_norm": 51.725840170632836, "learning_rate": 2.112610821897713e-06, "loss": 0.13841094970703124, "step": 137760 }, { "epoch": 1.191213219081547, "grad_norm": 6.024746063996118, "learning_rate": 2.1124162076270398e-06, "loss": 0.053800201416015624, "step": 137765 }, { "epoch": 1.1912564526030902, "grad_norm": 7.498135079542265, "learning_rate": 2.112221597449824e-06, "loss": 0.02793464660644531, "step": 137770 }, { "epoch": 1.1912996861246337, "grad_norm": 4.24425435474748, "learning_rate": 2.112026991366964e-06, "loss": 0.0856292724609375, "step": 137775 }, { "epoch": 1.191342919646177, "grad_norm": 4.339584737339486, "learning_rate": 2.111832389379358e-06, "loss": 0.019499588012695312, "step": 137780 }, { "epoch": 1.1913861531677201, "grad_norm": 4.140186289529695, "learning_rate": 2.1116377914879018e-06, "loss": 0.12003974914550782, "step": 137785 }, { "epoch": 1.1914293866892633, "grad_norm": 6.269459448643126, "learning_rate": 2.1114431976934944e-06, "loss": 0.03525390625, "step": 137790 }, { "epoch": 1.1914726202108066, "grad_norm": 1.3422257979118062, "learning_rate": 2.111248607997031e-06, "loss": 0.08025836944580078, "step": 137795 }, { "epoch": 1.19151585373235, "grad_norm": 5.847138439243364, "learning_rate": 2.111054022399412e-06, "loss": 0.016106414794921874, "step": 137800 }, { "epoch": 1.1915590872538933, "grad_norm": 1.7052932416333735, "learning_rate": 2.1108594409015313e-06, "loss": 0.099176025390625, "step": 137805 }, { "epoch": 1.1916023207754365, "grad_norm": 0.7249524029075066, "learning_rate": 2.1106648635042896e-06, "loss": 0.06364383697509765, "step": 137810 }, { "epoch": 1.1916455542969797, "grad_norm": 9.213152323462928, "learning_rate": 2.1104702902085835e-06, "loss": 0.06638107299804688, "step": 137815 }, { "epoch": 1.191688787818523, "grad_norm": 11.498910800985753, "learning_rate": 2.1102757210153095e-06, "loss": 0.0813375473022461, "step": 137820 }, { "epoch": 1.1917320213400662, "grad_norm": 0.22831961198252695, "learning_rate": 2.1100811559253647e-06, "loss": 0.0709136962890625, "step": 137825 }, { "epoch": 1.1917752548616094, "grad_norm": 1.6069270245299494, "learning_rate": 2.1098865949396477e-06, "loss": 0.40871992111206057, "step": 137830 }, { "epoch": 1.1918184883831529, "grad_norm": 0.23595335978444293, "learning_rate": 2.1096920380590535e-06, "loss": 0.034456825256347655, "step": 137835 }, { "epoch": 1.191861721904696, "grad_norm": 3.7479101921971822, "learning_rate": 2.1094974852844823e-06, "loss": 0.057379150390625, "step": 137840 }, { "epoch": 1.1919049554262393, "grad_norm": 23.104569805806676, "learning_rate": 2.1093029366168305e-06, "loss": 0.08448486328125, "step": 137845 }, { "epoch": 1.1919481889477825, "grad_norm": 7.59316562675714, "learning_rate": 2.109108392056994e-06, "loss": 0.06657791137695312, "step": 137850 }, { "epoch": 1.1919914224693258, "grad_norm": 0.09555783638906098, "learning_rate": 2.1089138516058714e-06, "loss": 0.07741851806640625, "step": 137855 }, { "epoch": 1.192034655990869, "grad_norm": 63.03501882601356, "learning_rate": 2.108719315264359e-06, "loss": 0.1252227783203125, "step": 137860 }, { "epoch": 1.1920778895124124, "grad_norm": 7.944073934128864, "learning_rate": 2.1085247830333532e-06, "loss": 0.030339813232421874, "step": 137865 }, { "epoch": 1.1921211230339557, "grad_norm": 6.724615050063124, "learning_rate": 2.1083302549137538e-06, "loss": 0.0276031494140625, "step": 137870 }, { "epoch": 1.192164356555499, "grad_norm": 15.36303601889355, "learning_rate": 2.1081357309064565e-06, "loss": 0.056256103515625, "step": 137875 }, { "epoch": 1.1922075900770421, "grad_norm": 0.1951698192847201, "learning_rate": 2.107941211012359e-06, "loss": 0.052915191650390624, "step": 137880 }, { "epoch": 1.1922508235985854, "grad_norm": 0.6757887819315873, "learning_rate": 2.107746695232356e-06, "loss": 0.029254150390625, "step": 137885 }, { "epoch": 1.1922940571201286, "grad_norm": 12.962988295819047, "learning_rate": 2.1075521835673476e-06, "loss": 0.04498138427734375, "step": 137890 }, { "epoch": 1.1923372906416718, "grad_norm": 8.01304031507938, "learning_rate": 2.10735767601823e-06, "loss": 0.08148155212402344, "step": 137895 }, { "epoch": 1.1923805241632153, "grad_norm": 35.910615293426346, "learning_rate": 2.1071631725858983e-06, "loss": 0.23514404296875, "step": 137900 }, { "epoch": 1.1924237576847585, "grad_norm": 4.232832150468731, "learning_rate": 2.1069686732712522e-06, "loss": 0.03245849609375, "step": 137905 }, { "epoch": 1.1924669912063017, "grad_norm": 0.6886658640231189, "learning_rate": 2.106774178075188e-06, "loss": 0.034474945068359374, "step": 137910 }, { "epoch": 1.192510224727845, "grad_norm": 5.524143130255841, "learning_rate": 2.1065796869986015e-06, "loss": 0.0257568359375, "step": 137915 }, { "epoch": 1.1925534582493882, "grad_norm": 2.5167090471038867, "learning_rate": 2.106385200042392e-06, "loss": 0.013330841064453125, "step": 137920 }, { "epoch": 1.1925966917709314, "grad_norm": 0.3197912940264087, "learning_rate": 2.106190717207454e-06, "loss": 0.15560836791992189, "step": 137925 }, { "epoch": 1.1926399252924749, "grad_norm": 5.794103369455954, "learning_rate": 2.1059962384946845e-06, "loss": 0.07259521484375, "step": 137930 }, { "epoch": 1.192683158814018, "grad_norm": 8.678584401931689, "learning_rate": 2.1058017639049827e-06, "loss": 0.08668975830078125, "step": 137935 }, { "epoch": 1.1927263923355613, "grad_norm": 2.783810475409061, "learning_rate": 2.1056072934392432e-06, "loss": 0.03922119140625, "step": 137940 }, { "epoch": 1.1927696258571046, "grad_norm": 16.47827753362577, "learning_rate": 2.105412827098365e-06, "loss": 0.19488601684570311, "step": 137945 }, { "epoch": 1.1928128593786478, "grad_norm": 6.837107092744151, "learning_rate": 2.1052183648832435e-06, "loss": 0.01691093444824219, "step": 137950 }, { "epoch": 1.192856092900191, "grad_norm": 3.798798218069593, "learning_rate": 2.105023906794776e-06, "loss": 0.03571243286132812, "step": 137955 }, { "epoch": 1.1928993264217342, "grad_norm": 0.9102629171838861, "learning_rate": 2.104829452833857e-06, "loss": 0.33994140625, "step": 137960 }, { "epoch": 1.1929425599432777, "grad_norm": 3.1748335170986555, "learning_rate": 2.1046350030013876e-06, "loss": 0.024411773681640624, "step": 137965 }, { "epoch": 1.192985793464821, "grad_norm": 0.44478171929132043, "learning_rate": 2.104440557298262e-06, "loss": 0.07269706726074218, "step": 137970 }, { "epoch": 1.1930290269863641, "grad_norm": 0.6498312361355235, "learning_rate": 2.1042461157253776e-06, "loss": 0.007224273681640625, "step": 137975 }, { "epoch": 1.1930722605079074, "grad_norm": 1.4739204814972127, "learning_rate": 2.1040516782836313e-06, "loss": 0.05400161743164063, "step": 137980 }, { "epoch": 1.1931154940294506, "grad_norm": 6.482035943159915, "learning_rate": 2.103857244973919e-06, "loss": 0.018175506591796876, "step": 137985 }, { "epoch": 1.193158727550994, "grad_norm": 0.9923257687229898, "learning_rate": 2.103662815797138e-06, "loss": 0.08499832153320312, "step": 137990 }, { "epoch": 1.1932019610725373, "grad_norm": 4.027999553003646, "learning_rate": 2.103468390754184e-06, "loss": 0.08896560668945312, "step": 137995 }, { "epoch": 1.1932451945940805, "grad_norm": 14.37700842191297, "learning_rate": 2.1032739698459553e-06, "loss": 0.066796875, "step": 138000 }, { "epoch": 1.1932884281156237, "grad_norm": 3.449792700406486, "learning_rate": 2.1030795530733476e-06, "loss": 0.02721405029296875, "step": 138005 }, { "epoch": 1.193331661637167, "grad_norm": 8.303652423978894, "learning_rate": 2.102885140437259e-06, "loss": 0.057745361328125, "step": 138010 }, { "epoch": 1.1933748951587102, "grad_norm": 0.8810503000678901, "learning_rate": 2.102690731938584e-06, "loss": 0.10467910766601562, "step": 138015 }, { "epoch": 1.1934181286802534, "grad_norm": 3.102726624704454, "learning_rate": 2.1024963275782195e-06, "loss": 0.09535064697265624, "step": 138020 }, { "epoch": 1.1934613622017967, "grad_norm": 18.451189754854177, "learning_rate": 2.1023019273570626e-06, "loss": 0.04011669158935547, "step": 138025 }, { "epoch": 1.1935045957233401, "grad_norm": 3.8069738990875273, "learning_rate": 2.10210753127601e-06, "loss": 0.04205474853515625, "step": 138030 }, { "epoch": 1.1935478292448833, "grad_norm": 4.158613887743789, "learning_rate": 2.101913139335959e-06, "loss": 0.017534255981445312, "step": 138035 }, { "epoch": 1.1935910627664266, "grad_norm": 4.9363999087786645, "learning_rate": 2.101718751537805e-06, "loss": 0.064080810546875, "step": 138040 }, { "epoch": 1.1936342962879698, "grad_norm": 1.1082326623880172, "learning_rate": 2.1015243678824445e-06, "loss": 0.041435623168945314, "step": 138045 }, { "epoch": 1.193677529809513, "grad_norm": 10.00564356162984, "learning_rate": 2.101329988370774e-06, "loss": 0.083538818359375, "step": 138050 }, { "epoch": 1.1937207633310565, "grad_norm": 0.09756205065592521, "learning_rate": 2.1011356130036897e-06, "loss": 0.017784881591796874, "step": 138055 }, { "epoch": 1.1937639968525997, "grad_norm": 13.670370284813188, "learning_rate": 2.1009412417820893e-06, "loss": 0.05382347106933594, "step": 138060 }, { "epoch": 1.193807230374143, "grad_norm": 12.450916040364744, "learning_rate": 2.1007468747068685e-06, "loss": 0.07175521850585938, "step": 138065 }, { "epoch": 1.1938504638956862, "grad_norm": 2.3945761072124334, "learning_rate": 2.1005525117789235e-06, "loss": 0.17200965881347657, "step": 138070 }, { "epoch": 1.1938936974172294, "grad_norm": 12.918399793220221, "learning_rate": 2.1003581529991512e-06, "loss": 0.041389083862304686, "step": 138075 }, { "epoch": 1.1939369309387726, "grad_norm": 4.37245416102477, "learning_rate": 2.1001637983684475e-06, "loss": 0.0693603515625, "step": 138080 }, { "epoch": 1.1939801644603159, "grad_norm": 4.325097501946686, "learning_rate": 2.0999694478877084e-06, "loss": 0.024981689453125, "step": 138085 }, { "epoch": 1.1940233979818593, "grad_norm": 0.19362267207847159, "learning_rate": 2.09977510155783e-06, "loss": 0.0341949462890625, "step": 138090 }, { "epoch": 1.1940666315034025, "grad_norm": 3.4869788755918507, "learning_rate": 2.0995807593797104e-06, "loss": 0.0405364990234375, "step": 138095 }, { "epoch": 1.1941098650249458, "grad_norm": 2.985077493573906, "learning_rate": 2.099386421354245e-06, "loss": 0.01493072509765625, "step": 138100 }, { "epoch": 1.194153098546489, "grad_norm": 25.10243619089974, "learning_rate": 2.0991920874823297e-06, "loss": 0.08132095336914062, "step": 138105 }, { "epoch": 1.1941963320680322, "grad_norm": 0.8960202924663462, "learning_rate": 2.0989977577648605e-06, "loss": 0.022348403930664062, "step": 138110 }, { "epoch": 1.1942395655895754, "grad_norm": 0.20051784912347667, "learning_rate": 2.0988034322027342e-06, "loss": 0.23114700317382814, "step": 138115 }, { "epoch": 1.194282799111119, "grad_norm": 3.1331592597824, "learning_rate": 2.0986091107968457e-06, "loss": 0.14591445922851562, "step": 138120 }, { "epoch": 1.1943260326326621, "grad_norm": 77.99370239935206, "learning_rate": 2.098414793548094e-06, "loss": 0.2564300537109375, "step": 138125 }, { "epoch": 1.1943692661542054, "grad_norm": 0.03297569164901899, "learning_rate": 2.0982204804573726e-06, "loss": 0.031846237182617185, "step": 138130 }, { "epoch": 1.1944124996757486, "grad_norm": 5.627899920601307, "learning_rate": 2.09802617152558e-06, "loss": 0.08205900192260743, "step": 138135 }, { "epoch": 1.1944557331972918, "grad_norm": 11.433073976511029, "learning_rate": 2.0978318667536093e-06, "loss": 0.06284103393554688, "step": 138140 }, { "epoch": 1.194498966718835, "grad_norm": 15.00347545546601, "learning_rate": 2.0976375661423597e-06, "loss": 0.17294387817382811, "step": 138145 }, { "epoch": 1.1945422002403783, "grad_norm": 0.6872176131329228, "learning_rate": 2.0974432696927253e-06, "loss": 0.09503631591796875, "step": 138150 }, { "epoch": 1.1945854337619217, "grad_norm": 16.41601683058593, "learning_rate": 2.0972489774056023e-06, "loss": 0.12457866668701172, "step": 138155 }, { "epoch": 1.194628667283465, "grad_norm": 1.8956751509376206, "learning_rate": 2.0970546892818876e-06, "loss": 0.011481857299804688, "step": 138160 }, { "epoch": 1.1946719008050082, "grad_norm": 19.606196051226775, "learning_rate": 2.096860405322477e-06, "loss": 0.23611602783203126, "step": 138165 }, { "epoch": 1.1947151343265514, "grad_norm": 0.5676773126468059, "learning_rate": 2.0966661255282665e-06, "loss": 0.04676055908203125, "step": 138170 }, { "epoch": 1.1947583678480946, "grad_norm": 0.532565004696452, "learning_rate": 2.096471849900152e-06, "loss": 0.03890151977539062, "step": 138175 }, { "epoch": 1.1948016013696379, "grad_norm": 0.2233156898432003, "learning_rate": 2.09627757843903e-06, "loss": 0.030123138427734376, "step": 138180 }, { "epoch": 1.1948448348911813, "grad_norm": 0.603490845304141, "learning_rate": 2.0960833111457945e-06, "loss": 0.06516952514648437, "step": 138185 }, { "epoch": 1.1948880684127245, "grad_norm": 0.866246808704552, "learning_rate": 2.0958890480213437e-06, "loss": 0.243011474609375, "step": 138190 }, { "epoch": 1.1949313019342678, "grad_norm": 1.5176137653337138, "learning_rate": 2.0956947890665723e-06, "loss": 0.03977622985839844, "step": 138195 }, { "epoch": 1.194974535455811, "grad_norm": 32.13627928673331, "learning_rate": 2.0955005342823776e-06, "loss": 0.07114028930664062, "step": 138200 }, { "epoch": 1.1950177689773542, "grad_norm": 2.3281974177493487, "learning_rate": 2.095306283669654e-06, "loss": 0.0669891357421875, "step": 138205 }, { "epoch": 1.1950610024988975, "grad_norm": 15.041649565497732, "learning_rate": 2.0951120372292986e-06, "loss": 0.024398040771484376, "step": 138210 }, { "epoch": 1.1951042360204407, "grad_norm": 3.2722610638281604, "learning_rate": 2.094917794962205e-06, "loss": 0.07773609161376953, "step": 138215 }, { "epoch": 1.1951474695419841, "grad_norm": 0.6250416258276534, "learning_rate": 2.0947235568692713e-06, "loss": 0.02699432373046875, "step": 138220 }, { "epoch": 1.1951907030635274, "grad_norm": 5.616437475266587, "learning_rate": 2.0945293229513924e-06, "loss": 0.14965591430664063, "step": 138225 }, { "epoch": 1.1952339365850706, "grad_norm": 0.25336318973926364, "learning_rate": 2.094335093209465e-06, "loss": 0.018896484375, "step": 138230 }, { "epoch": 1.1952771701066138, "grad_norm": 6.575807013066722, "learning_rate": 2.094140867644384e-06, "loss": 0.037738800048828125, "step": 138235 }, { "epoch": 1.195320403628157, "grad_norm": 0.5562916260954754, "learning_rate": 2.093946646257045e-06, "loss": 0.045664215087890626, "step": 138240 }, { "epoch": 1.1953636371497005, "grad_norm": 10.905499649229931, "learning_rate": 2.0937524290483434e-06, "loss": 0.0689697265625, "step": 138245 }, { "epoch": 1.1954068706712437, "grad_norm": 0.690670586527416, "learning_rate": 2.0935582160191758e-06, "loss": 0.030800628662109374, "step": 138250 }, { "epoch": 1.195450104192787, "grad_norm": 2.2914956769024166, "learning_rate": 2.0933640071704376e-06, "loss": 0.03530864715576172, "step": 138255 }, { "epoch": 1.1954933377143302, "grad_norm": 0.8726024331113922, "learning_rate": 2.093169802503025e-06, "loss": 0.054721832275390625, "step": 138260 }, { "epoch": 1.1955365712358734, "grad_norm": 0.4004979133893406, "learning_rate": 2.092975602017833e-06, "loss": 0.050618553161621095, "step": 138265 }, { "epoch": 1.1955798047574167, "grad_norm": 4.145566692148898, "learning_rate": 2.0927814057157575e-06, "loss": 0.028289031982421876, "step": 138270 }, { "epoch": 1.1956230382789599, "grad_norm": 0.734158376898886, "learning_rate": 2.0925872135976937e-06, "loss": 0.04007415771484375, "step": 138275 }, { "epoch": 1.1956662718005033, "grad_norm": 0.07871884423728934, "learning_rate": 2.092393025664537e-06, "loss": 0.025353240966796874, "step": 138280 }, { "epoch": 1.1957095053220466, "grad_norm": 0.49123508931981236, "learning_rate": 2.0921988419171838e-06, "loss": 0.07340621948242188, "step": 138285 }, { "epoch": 1.1957527388435898, "grad_norm": 15.326411016921698, "learning_rate": 2.0920046623565295e-06, "loss": 0.11606254577636718, "step": 138290 }, { "epoch": 1.195795972365133, "grad_norm": 6.5118513461586405, "learning_rate": 2.0918104869834702e-06, "loss": 0.10066070556640624, "step": 138295 }, { "epoch": 1.1958392058866762, "grad_norm": 0.8271208741345119, "learning_rate": 2.0916163157989003e-06, "loss": 0.013069725036621094, "step": 138300 }, { "epoch": 1.1958824394082195, "grad_norm": 33.115033398521305, "learning_rate": 2.091422148803715e-06, "loss": 0.04830474853515625, "step": 138305 }, { "epoch": 1.195925672929763, "grad_norm": 4.411261011670754, "learning_rate": 2.0912279859988104e-06, "loss": 0.29379119873046877, "step": 138310 }, { "epoch": 1.1959689064513062, "grad_norm": 4.683466670791225, "learning_rate": 2.091033827385082e-06, "loss": 0.033917999267578124, "step": 138315 }, { "epoch": 1.1960121399728494, "grad_norm": 4.505914669316596, "learning_rate": 2.090839672963426e-06, "loss": 0.08066825866699219, "step": 138320 }, { "epoch": 1.1960553734943926, "grad_norm": 1.0009859855106404, "learning_rate": 2.0906455227347372e-06, "loss": 0.017730712890625, "step": 138325 }, { "epoch": 1.1960986070159358, "grad_norm": 0.8981141544217036, "learning_rate": 2.090451376699911e-06, "loss": 0.03164834976196289, "step": 138330 }, { "epoch": 1.196141840537479, "grad_norm": 0.7285847061860797, "learning_rate": 2.090257234859842e-06, "loss": 0.0679168701171875, "step": 138335 }, { "epoch": 1.1961850740590223, "grad_norm": 0.08264759272752045, "learning_rate": 2.0900630972154267e-06, "loss": 0.026834869384765626, "step": 138340 }, { "epoch": 1.1962283075805658, "grad_norm": 27.49370624787061, "learning_rate": 2.0898689637675587e-06, "loss": 0.16729812622070311, "step": 138345 }, { "epoch": 1.196271541102109, "grad_norm": 1.8873153155922928, "learning_rate": 2.0896748345171357e-06, "loss": 0.051966094970703126, "step": 138350 }, { "epoch": 1.1963147746236522, "grad_norm": 0.09556188105289044, "learning_rate": 2.0894807094650523e-06, "loss": 0.03498077392578125, "step": 138355 }, { "epoch": 1.1963580081451954, "grad_norm": 0.9075238121539583, "learning_rate": 2.0892865886122033e-06, "loss": 0.09491729736328125, "step": 138360 }, { "epoch": 1.1964012416667387, "grad_norm": 5.127958259075319, "learning_rate": 2.0890924719594833e-06, "loss": 0.017502593994140624, "step": 138365 }, { "epoch": 1.196444475188282, "grad_norm": 1.341783036614073, "learning_rate": 2.088898359507789e-06, "loss": 0.19848098754882812, "step": 138370 }, { "epoch": 1.1964877087098253, "grad_norm": 1.4998091717999034, "learning_rate": 2.088704251258014e-06, "loss": 0.06176300048828125, "step": 138375 }, { "epoch": 1.1965309422313686, "grad_norm": 8.406292240125703, "learning_rate": 2.0885101472110557e-06, "loss": 0.08324594497680664, "step": 138380 }, { "epoch": 1.1965741757529118, "grad_norm": 2.9612974159191494, "learning_rate": 2.088316047367808e-06, "loss": 0.06438713073730469, "step": 138385 }, { "epoch": 1.196617409274455, "grad_norm": 2.0111788588812463, "learning_rate": 2.088121951729165e-06, "loss": 0.192657470703125, "step": 138390 }, { "epoch": 1.1966606427959983, "grad_norm": 3.7032780701818844, "learning_rate": 2.0879278602960243e-06, "loss": 0.05791397094726562, "step": 138395 }, { "epoch": 1.1967038763175415, "grad_norm": 0.5256806085004351, "learning_rate": 2.087733773069279e-06, "loss": 0.10604705810546874, "step": 138400 }, { "epoch": 1.1967471098390847, "grad_norm": 4.728646848791551, "learning_rate": 2.0875396900498247e-06, "loss": 0.016472625732421874, "step": 138405 }, { "epoch": 1.1967903433606282, "grad_norm": 6.041676201483051, "learning_rate": 2.0873456112385574e-06, "loss": 0.14843978881835937, "step": 138410 }, { "epoch": 1.1968335768821714, "grad_norm": 4.517720288641565, "learning_rate": 2.0871515366363713e-06, "loss": 0.15311660766601562, "step": 138415 }, { "epoch": 1.1968768104037146, "grad_norm": 1.8318378670988997, "learning_rate": 2.0869574662441612e-06, "loss": 0.20757293701171875, "step": 138420 }, { "epoch": 1.1969200439252579, "grad_norm": 1.4447967437051255, "learning_rate": 2.086763400062823e-06, "loss": 0.034782028198242186, "step": 138425 }, { "epoch": 1.196963277446801, "grad_norm": 9.209066195681386, "learning_rate": 2.086569338093252e-06, "loss": 0.23064498901367186, "step": 138430 }, { "epoch": 1.1970065109683445, "grad_norm": 2.2492318567024983, "learning_rate": 2.0863752803363417e-06, "loss": 0.0356170654296875, "step": 138435 }, { "epoch": 1.1970497444898878, "grad_norm": 0.5704659612423328, "learning_rate": 2.086181226792987e-06, "loss": 0.021524810791015626, "step": 138440 }, { "epoch": 1.197092978011431, "grad_norm": 17.56426291594204, "learning_rate": 2.085987177464085e-06, "loss": 0.11206893920898438, "step": 138445 }, { "epoch": 1.1971362115329742, "grad_norm": 4.292062985375581, "learning_rate": 2.0857931323505286e-06, "loss": 0.10605392456054688, "step": 138450 }, { "epoch": 1.1971794450545175, "grad_norm": 9.946993763955577, "learning_rate": 2.0855990914532145e-06, "loss": 0.02379150390625, "step": 138455 }, { "epoch": 1.1972226785760607, "grad_norm": 36.92619633594002, "learning_rate": 2.085405054773036e-06, "loss": 0.1119476318359375, "step": 138460 }, { "epoch": 1.197265912097604, "grad_norm": 43.26817775059952, "learning_rate": 2.085211022310889e-06, "loss": 0.2081939697265625, "step": 138465 }, { "epoch": 1.1973091456191471, "grad_norm": 9.564128066200288, "learning_rate": 2.0850169940676665e-06, "loss": 0.03826217651367188, "step": 138470 }, { "epoch": 1.1973523791406906, "grad_norm": 7.5604547065769925, "learning_rate": 2.084822970044266e-06, "loss": 0.05338134765625, "step": 138475 }, { "epoch": 1.1973956126622338, "grad_norm": 27.730173085136496, "learning_rate": 2.0846289502415803e-06, "loss": 0.09583206176757812, "step": 138480 }, { "epoch": 1.197438846183777, "grad_norm": 1.827268996821012, "learning_rate": 2.084434934660506e-06, "loss": 0.1669635772705078, "step": 138485 }, { "epoch": 1.1974820797053203, "grad_norm": 50.56727758190179, "learning_rate": 2.084240923301937e-06, "loss": 0.15257911682128905, "step": 138490 }, { "epoch": 1.1975253132268635, "grad_norm": 9.510304679810035, "learning_rate": 2.0840469161667675e-06, "loss": 0.03919029235839844, "step": 138495 }, { "epoch": 1.197568546748407, "grad_norm": 6.7504831684726225, "learning_rate": 2.0838529132558923e-06, "loss": 0.07259521484375, "step": 138500 }, { "epoch": 1.1976117802699502, "grad_norm": 35.74554289496441, "learning_rate": 2.0836589145702064e-06, "loss": 0.14317779541015624, "step": 138505 }, { "epoch": 1.1976550137914934, "grad_norm": 71.0981210174626, "learning_rate": 2.0834649201106046e-06, "loss": 0.1505096435546875, "step": 138510 }, { "epoch": 1.1976982473130366, "grad_norm": 8.141586778641734, "learning_rate": 2.0832709298779826e-06, "loss": 0.10998973846435547, "step": 138515 }, { "epoch": 1.1977414808345799, "grad_norm": 0.8397990715300293, "learning_rate": 2.0830769438732337e-06, "loss": 0.06506233215332032, "step": 138520 }, { "epoch": 1.197784714356123, "grad_norm": 2.516117626181883, "learning_rate": 2.082882962097253e-06, "loss": 0.02259063720703125, "step": 138525 }, { "epoch": 1.1978279478776663, "grad_norm": 28.814858547259202, "learning_rate": 2.082688984550934e-06, "loss": 0.1390361785888672, "step": 138530 }, { "epoch": 1.1978711813992098, "grad_norm": 3.9423053839557243, "learning_rate": 2.0824950112351734e-06, "loss": 0.05373077392578125, "step": 138535 }, { "epoch": 1.197914414920753, "grad_norm": 9.87058988623747, "learning_rate": 2.082301042150864e-06, "loss": 0.05084991455078125, "step": 138540 }, { "epoch": 1.1979576484422962, "grad_norm": 0.11337187061878826, "learning_rate": 2.0821070772989015e-06, "loss": 0.036787796020507815, "step": 138545 }, { "epoch": 1.1980008819638395, "grad_norm": 1.3365535222723552, "learning_rate": 2.0819131166801807e-06, "loss": 0.03709831237792969, "step": 138550 }, { "epoch": 1.1980441154853827, "grad_norm": 67.7189786953445, "learning_rate": 2.0817191602955945e-06, "loss": 0.43768310546875, "step": 138555 }, { "epoch": 1.198087349006926, "grad_norm": 3.7296577321991866, "learning_rate": 2.081525208146039e-06, "loss": 0.06714668273925781, "step": 138560 }, { "epoch": 1.1981305825284694, "grad_norm": 0.45790687364074134, "learning_rate": 2.0813312602324066e-06, "loss": 0.03677520751953125, "step": 138565 }, { "epoch": 1.1981738160500126, "grad_norm": 14.78608568838092, "learning_rate": 2.081137316555595e-06, "loss": 0.04607086181640625, "step": 138570 }, { "epoch": 1.1982170495715558, "grad_norm": 1.7386733189715495, "learning_rate": 2.0809433771164966e-06, "loss": 0.03914794921875, "step": 138575 }, { "epoch": 1.198260283093099, "grad_norm": 0.9108891661201046, "learning_rate": 2.080749441916006e-06, "loss": 0.051531219482421876, "step": 138580 }, { "epoch": 1.1983035166146423, "grad_norm": 0.415713540779652, "learning_rate": 2.0805555109550176e-06, "loss": 0.08670806884765625, "step": 138585 }, { "epoch": 1.1983467501361855, "grad_norm": 1.9285563098838598, "learning_rate": 2.080361584234426e-06, "loss": 0.017218017578125, "step": 138590 }, { "epoch": 1.1983899836577288, "grad_norm": 9.374548362792027, "learning_rate": 2.0801676617551264e-06, "loss": 0.17249679565429688, "step": 138595 }, { "epoch": 1.1984332171792722, "grad_norm": 0.06294607625877498, "learning_rate": 2.0799737435180103e-06, "loss": 0.04450492858886719, "step": 138600 }, { "epoch": 1.1984764507008154, "grad_norm": 0.12932393928566646, "learning_rate": 2.0797798295239754e-06, "loss": 0.09863109588623047, "step": 138605 }, { "epoch": 1.1985196842223587, "grad_norm": 22.05418790855996, "learning_rate": 2.0795859197739143e-06, "loss": 0.14305534362792968, "step": 138610 }, { "epoch": 1.198562917743902, "grad_norm": 0.6775201561490614, "learning_rate": 2.079392014268721e-06, "loss": 0.07960662841796876, "step": 138615 }, { "epoch": 1.1986061512654451, "grad_norm": 4.784914825187814, "learning_rate": 2.0791981130092917e-06, "loss": 0.0273162841796875, "step": 138620 }, { "epoch": 1.1986493847869883, "grad_norm": 12.80148028787467, "learning_rate": 2.079004215996519e-06, "loss": 0.08386878967285157, "step": 138625 }, { "epoch": 1.1986926183085318, "grad_norm": 22.3033440283415, "learning_rate": 2.078810323231296e-06, "loss": 0.2934417724609375, "step": 138630 }, { "epoch": 1.198735851830075, "grad_norm": 18.775963522755152, "learning_rate": 2.07861643471452e-06, "loss": 0.04985198974609375, "step": 138635 }, { "epoch": 1.1987790853516183, "grad_norm": 1.2537140107650424, "learning_rate": 2.078422550447083e-06, "loss": 0.019592857360839842, "step": 138640 }, { "epoch": 1.1988223188731615, "grad_norm": 1.168938058567609, "learning_rate": 2.0782286704298797e-06, "loss": 0.25589447021484374, "step": 138645 }, { "epoch": 1.1988655523947047, "grad_norm": 1.0992447393138978, "learning_rate": 2.0780347946638054e-06, "loss": 0.042428207397460935, "step": 138650 }, { "epoch": 1.198908785916248, "grad_norm": 2.5876202929846217, "learning_rate": 2.077840923149752e-06, "loss": 0.049239730834960936, "step": 138655 }, { "epoch": 1.1989520194377912, "grad_norm": 9.020595921074108, "learning_rate": 2.077647055888614e-06, "loss": 0.13937034606933593, "step": 138660 }, { "epoch": 1.1989952529593346, "grad_norm": 13.89041991723094, "learning_rate": 2.0774531928812883e-06, "loss": 0.18696937561035157, "step": 138665 }, { "epoch": 1.1990384864808779, "grad_norm": 8.683198077958954, "learning_rate": 2.0772593341286657e-06, "loss": 0.14476165771484376, "step": 138670 }, { "epoch": 1.199081720002421, "grad_norm": 13.467176245224667, "learning_rate": 2.0770654796316416e-06, "loss": 0.27303314208984375, "step": 138675 }, { "epoch": 1.1991249535239643, "grad_norm": 2.088386348079561, "learning_rate": 2.07687162939111e-06, "loss": 0.0765869140625, "step": 138680 }, { "epoch": 1.1991681870455075, "grad_norm": 7.81511845007021, "learning_rate": 2.0766777834079657e-06, "loss": 0.03342704772949219, "step": 138685 }, { "epoch": 1.199211420567051, "grad_norm": 1.2743632266740106, "learning_rate": 2.0764839416831016e-06, "loss": 0.03330078125, "step": 138690 }, { "epoch": 1.1992546540885942, "grad_norm": 7.98463285767262, "learning_rate": 2.0762901042174105e-06, "loss": 0.030873870849609374, "step": 138695 }, { "epoch": 1.1992978876101374, "grad_norm": 1.4992717359860193, "learning_rate": 2.076096271011789e-06, "loss": 0.027114105224609376, "step": 138700 }, { "epoch": 1.1993411211316807, "grad_norm": 2.4379750002412948, "learning_rate": 2.0759024420671295e-06, "loss": 0.013969039916992188, "step": 138705 }, { "epoch": 1.199384354653224, "grad_norm": 11.8963290696172, "learning_rate": 2.075708617384327e-06, "loss": 0.0284149169921875, "step": 138710 }, { "epoch": 1.1994275881747671, "grad_norm": 8.176094294988113, "learning_rate": 2.0755147969642743e-06, "loss": 0.0224365234375, "step": 138715 }, { "epoch": 1.1994708216963104, "grad_norm": 7.024796922103817, "learning_rate": 2.075320980807866e-06, "loss": 0.06451568603515626, "step": 138720 }, { "epoch": 1.1995140552178536, "grad_norm": 3.3746191015709472, "learning_rate": 2.0751271689159943e-06, "loss": 0.0557159423828125, "step": 138725 }, { "epoch": 1.199557288739397, "grad_norm": 0.3084159253440839, "learning_rate": 2.0749333612895557e-06, "loss": 0.06317825317382812, "step": 138730 }, { "epoch": 1.1996005222609403, "grad_norm": 2.3190123573699664, "learning_rate": 2.0747395579294417e-06, "loss": 0.03651351928710937, "step": 138735 }, { "epoch": 1.1996437557824835, "grad_norm": 55.521320898157995, "learning_rate": 2.074545758836548e-06, "loss": 0.11585311889648438, "step": 138740 }, { "epoch": 1.1996869893040267, "grad_norm": 3.1322996100004765, "learning_rate": 2.0743519640117672e-06, "loss": 0.015975761413574218, "step": 138745 }, { "epoch": 1.19973022282557, "grad_norm": 1.5099279593775283, "learning_rate": 2.0741581734559936e-06, "loss": 0.07155303955078125, "step": 138750 }, { "epoch": 1.1997734563471134, "grad_norm": 11.690420210197294, "learning_rate": 2.0739643871701203e-06, "loss": 0.03268280029296875, "step": 138755 }, { "epoch": 1.1998166898686566, "grad_norm": 3.8831104302547357, "learning_rate": 2.0737706051550413e-06, "loss": 0.06447219848632812, "step": 138760 }, { "epoch": 1.1998599233901999, "grad_norm": 1.2889312348557873, "learning_rate": 2.07357682741165e-06, "loss": 0.01617774963378906, "step": 138765 }, { "epoch": 1.199903156911743, "grad_norm": 1.1070346069598263, "learning_rate": 2.0733830539408416e-06, "loss": 0.07914047241210938, "step": 138770 }, { "epoch": 1.1999463904332863, "grad_norm": 0.23433750585399524, "learning_rate": 2.0731892847435086e-06, "loss": 0.26970348358154295, "step": 138775 }, { "epoch": 1.1999896239548296, "grad_norm": 1.3177997147309488, "learning_rate": 2.072995519820545e-06, "loss": 0.043627357482910155, "step": 138780 }, { "epoch": 1.2000328574763728, "grad_norm": 0.8714877254375202, "learning_rate": 2.072801759172843e-06, "loss": 0.03763885498046875, "step": 138785 }, { "epoch": 1.2000760909979162, "grad_norm": 4.530243170505061, "learning_rate": 2.0726080028012964e-06, "loss": 0.022182464599609375, "step": 138790 }, { "epoch": 1.2001193245194595, "grad_norm": 7.081251939475107, "learning_rate": 2.0724142507068016e-06, "loss": 0.07630233764648438, "step": 138795 }, { "epoch": 1.2001625580410027, "grad_norm": 1.6196915358730715, "learning_rate": 2.0722205028902506e-06, "loss": 0.04997940063476562, "step": 138800 }, { "epoch": 1.200205791562546, "grad_norm": 6.590157563503012, "learning_rate": 2.072026759352536e-06, "loss": 0.046916961669921875, "step": 138805 }, { "epoch": 1.2002490250840891, "grad_norm": 17.206188846768665, "learning_rate": 2.0718330200945515e-06, "loss": 0.035660076141357425, "step": 138810 }, { "epoch": 1.2002922586056324, "grad_norm": 16.695945331128982, "learning_rate": 2.071639285117192e-06, "loss": 0.10666007995605468, "step": 138815 }, { "epoch": 1.2003354921271758, "grad_norm": 0.15093806809762003, "learning_rate": 2.071445554421349e-06, "loss": 0.031043243408203126, "step": 138820 }, { "epoch": 1.200378725648719, "grad_norm": 5.287174530362721, "learning_rate": 2.0712518280079178e-06, "loss": 0.03523674011230469, "step": 138825 }, { "epoch": 1.2004219591702623, "grad_norm": 1.0031024828327668, "learning_rate": 2.071058105877791e-06, "loss": 0.06273345947265625, "step": 138830 }, { "epoch": 1.2004651926918055, "grad_norm": 10.144336758910134, "learning_rate": 2.0708643880318628e-06, "loss": 0.0472625732421875, "step": 138835 }, { "epoch": 1.2005084262133487, "grad_norm": 4.084581263196554, "learning_rate": 2.070670674471025e-06, "loss": 0.14051971435546876, "step": 138840 }, { "epoch": 1.200551659734892, "grad_norm": 0.2471104377034229, "learning_rate": 2.0704769651961725e-06, "loss": 0.016692733764648436, "step": 138845 }, { "epoch": 1.2005948932564352, "grad_norm": 1.5234147025540095, "learning_rate": 2.0702832602081983e-06, "loss": 0.01384592056274414, "step": 138850 }, { "epoch": 1.2006381267779787, "grad_norm": 6.596572318643876, "learning_rate": 2.070089559507994e-06, "loss": 0.052633476257324216, "step": 138855 }, { "epoch": 1.2006813602995219, "grad_norm": 3.5855446289003683, "learning_rate": 2.0698958630964557e-06, "loss": 0.0594146728515625, "step": 138860 }, { "epoch": 1.2007245938210651, "grad_norm": 4.703663656205993, "learning_rate": 2.0697021709744757e-06, "loss": 0.07279052734375, "step": 138865 }, { "epoch": 1.2007678273426083, "grad_norm": 32.31271162725099, "learning_rate": 2.0695084831429463e-06, "loss": 0.19767417907714843, "step": 138870 }, { "epoch": 1.2008110608641516, "grad_norm": 2.2609101563958305, "learning_rate": 2.0693147996027624e-06, "loss": 0.02229766845703125, "step": 138875 }, { "epoch": 1.2008542943856948, "grad_norm": 1.229664002573435, "learning_rate": 2.0691211203548164e-06, "loss": 0.18943939208984376, "step": 138880 }, { "epoch": 1.2008975279072382, "grad_norm": 4.5833890150033465, "learning_rate": 2.0689274454e-06, "loss": 0.03517608642578125, "step": 138885 }, { "epoch": 1.2009407614287815, "grad_norm": 0.5071037120265555, "learning_rate": 2.0687337747392096e-06, "loss": 0.28788909912109373, "step": 138890 }, { "epoch": 1.2009839949503247, "grad_norm": 9.824257484779148, "learning_rate": 2.0685401083733365e-06, "loss": 0.10642318725585938, "step": 138895 }, { "epoch": 1.201027228471868, "grad_norm": 1.526308707674228, "learning_rate": 2.0683464463032737e-06, "loss": 0.09889678955078125, "step": 138900 }, { "epoch": 1.2010704619934112, "grad_norm": 0.026319510569815772, "learning_rate": 2.068152788529915e-06, "loss": 0.031116485595703125, "step": 138905 }, { "epoch": 1.2011136955149544, "grad_norm": 3.175098195274948, "learning_rate": 2.0679591350541534e-06, "loss": 0.11084346771240235, "step": 138910 }, { "epoch": 1.2011569290364976, "grad_norm": 0.14045639493853057, "learning_rate": 2.067765485876881e-06, "loss": 0.05997772216796875, "step": 138915 }, { "epoch": 1.201200162558041, "grad_norm": 47.435288563079666, "learning_rate": 2.0675718409989926e-06, "loss": 0.2546714782714844, "step": 138920 }, { "epoch": 1.2012433960795843, "grad_norm": 1.2770914222114067, "learning_rate": 2.067378200421381e-06, "loss": 0.04309234619140625, "step": 138925 }, { "epoch": 1.2012866296011275, "grad_norm": 1.0167417575734998, "learning_rate": 2.0671845641449377e-06, "loss": 0.0735321044921875, "step": 138930 }, { "epoch": 1.2013298631226708, "grad_norm": 1.1072408190719893, "learning_rate": 2.0669909321705577e-06, "loss": 0.010124588012695312, "step": 138935 }, { "epoch": 1.201373096644214, "grad_norm": 1.713064002724102, "learning_rate": 2.0667973044991326e-06, "loss": 0.06317615509033203, "step": 138940 }, { "epoch": 1.2014163301657574, "grad_norm": 1.655250656785791, "learning_rate": 2.0666036811315564e-06, "loss": 0.029586029052734376, "step": 138945 }, { "epoch": 1.2014595636873007, "grad_norm": 0.574065553782038, "learning_rate": 2.06641006206872e-06, "loss": 0.018476104736328124, "step": 138950 }, { "epoch": 1.201502797208844, "grad_norm": 16.688370767650454, "learning_rate": 2.066216447311519e-06, "loss": 0.1478952407836914, "step": 138955 }, { "epoch": 1.2015460307303871, "grad_norm": 0.7208191375106024, "learning_rate": 2.066022836860845e-06, "loss": 0.03370933532714844, "step": 138960 }, { "epoch": 1.2015892642519304, "grad_norm": 1.8760587539129094, "learning_rate": 2.0658292307175916e-06, "loss": 0.06501312255859375, "step": 138965 }, { "epoch": 1.2016324977734736, "grad_norm": 3.409713864557175, "learning_rate": 2.0656356288826514e-06, "loss": 0.08510150909423828, "step": 138970 }, { "epoch": 1.2016757312950168, "grad_norm": 29.48817719305051, "learning_rate": 2.0654420313569167e-06, "loss": 0.18905487060546874, "step": 138975 }, { "epoch": 1.2017189648165603, "grad_norm": 7.862188708265569, "learning_rate": 2.06524843814128e-06, "loss": 0.2007542610168457, "step": 138980 }, { "epoch": 1.2017621983381035, "grad_norm": 7.154959308415255, "learning_rate": 2.065054849236635e-06, "loss": 0.06446304321289062, "step": 138985 }, { "epoch": 1.2018054318596467, "grad_norm": 38.32972070072011, "learning_rate": 2.0648612646438755e-06, "loss": 0.20425338745117189, "step": 138990 }, { "epoch": 1.20184866538119, "grad_norm": 30.72693862140157, "learning_rate": 2.064667684363893e-06, "loss": 0.19514598846435546, "step": 138995 }, { "epoch": 1.2018918989027332, "grad_norm": 10.10904973123971, "learning_rate": 2.0644741083975803e-06, "loss": 0.24425086975097657, "step": 139000 }, { "epoch": 1.2019351324242764, "grad_norm": 12.448394970279669, "learning_rate": 2.0642805367458307e-06, "loss": 0.035228919982910153, "step": 139005 }, { "epoch": 1.2019783659458199, "grad_norm": 40.15899021204217, "learning_rate": 2.0640869694095357e-06, "loss": 0.23628158569335939, "step": 139010 }, { "epoch": 1.202021599467363, "grad_norm": 5.222547706434134, "learning_rate": 2.063893406389589e-06, "loss": 0.30140533447265627, "step": 139015 }, { "epoch": 1.2020648329889063, "grad_norm": 0.6908431385555702, "learning_rate": 2.063699847686884e-06, "loss": 0.04617919921875, "step": 139020 }, { "epoch": 1.2021080665104495, "grad_norm": 6.191427941370251, "learning_rate": 2.063506293302312e-06, "loss": 0.1102996826171875, "step": 139025 }, { "epoch": 1.2021513000319928, "grad_norm": 2.7212380962606675, "learning_rate": 2.0633127432367673e-06, "loss": 0.015325164794921875, "step": 139030 }, { "epoch": 1.202194533553536, "grad_norm": 0.5054274624079205, "learning_rate": 2.06311919749114e-06, "loss": 0.06713676452636719, "step": 139035 }, { "epoch": 1.2022377670750792, "grad_norm": 7.989931209125742, "learning_rate": 2.062925656066325e-06, "loss": 0.07756500244140625, "step": 139040 }, { "epoch": 1.2022810005966227, "grad_norm": 0.9381644003970205, "learning_rate": 2.062732118963213e-06, "loss": 0.059373092651367185, "step": 139045 }, { "epoch": 1.202324234118166, "grad_norm": 2.5393431865737837, "learning_rate": 2.0625385861826984e-06, "loss": 0.03130340576171875, "step": 139050 }, { "epoch": 1.2023674676397091, "grad_norm": 0.0522676794436516, "learning_rate": 2.0623450577256735e-06, "loss": 0.08855857849121093, "step": 139055 }, { "epoch": 1.2024107011612524, "grad_norm": 4.128083844236364, "learning_rate": 2.0621515335930304e-06, "loss": 0.15209503173828126, "step": 139060 }, { "epoch": 1.2024539346827956, "grad_norm": 2.032670549529516, "learning_rate": 2.0619580137856607e-06, "loss": 0.01489410400390625, "step": 139065 }, { "epoch": 1.2024971682043388, "grad_norm": 4.392331570377542, "learning_rate": 2.061764498304458e-06, "loss": 0.060703086853027347, "step": 139070 }, { "epoch": 1.2025404017258823, "grad_norm": 0.866786958100705, "learning_rate": 2.061570987150314e-06, "loss": 0.18980712890625, "step": 139075 }, { "epoch": 1.2025836352474255, "grad_norm": 0.35827667330501733, "learning_rate": 2.0613774803241226e-06, "loss": 0.24125595092773439, "step": 139080 }, { "epoch": 1.2026268687689687, "grad_norm": 19.12933264766434, "learning_rate": 2.0611839778267754e-06, "loss": 0.059391021728515625, "step": 139085 }, { "epoch": 1.202670102290512, "grad_norm": 1.291647036254134, "learning_rate": 2.0609904796591646e-06, "loss": 0.03344039916992188, "step": 139090 }, { "epoch": 1.2027133358120552, "grad_norm": 0.7248510978672822, "learning_rate": 2.060796985822182e-06, "loss": 0.09651641845703125, "step": 139095 }, { "epoch": 1.2027565693335984, "grad_norm": 2.5774074850390605, "learning_rate": 2.060603496316722e-06, "loss": 0.14166412353515626, "step": 139100 }, { "epoch": 1.2027998028551417, "grad_norm": 0.3757501392729066, "learning_rate": 2.060410011143675e-06, "loss": 0.06319503784179688, "step": 139105 }, { "epoch": 1.202843036376685, "grad_norm": 17.511871537923625, "learning_rate": 2.0602165303039326e-06, "loss": 0.099810791015625, "step": 139110 }, { "epoch": 1.2028862698982283, "grad_norm": 11.753939995947341, "learning_rate": 2.0600230537983902e-06, "loss": 0.1826223373413086, "step": 139115 }, { "epoch": 1.2029295034197716, "grad_norm": 1.8801444213318668, "learning_rate": 2.0598295816279384e-06, "loss": 0.11477890014648437, "step": 139120 }, { "epoch": 1.2029727369413148, "grad_norm": 33.714345962868045, "learning_rate": 2.059636113793469e-06, "loss": 0.1781597137451172, "step": 139125 }, { "epoch": 1.203015970462858, "grad_norm": 21.371722574056626, "learning_rate": 2.059442650295875e-06, "loss": 0.041020774841308595, "step": 139130 }, { "epoch": 1.2030592039844012, "grad_norm": 0.6930301058336009, "learning_rate": 2.0592491911360487e-06, "loss": 0.1900482177734375, "step": 139135 }, { "epoch": 1.2031024375059447, "grad_norm": 0.6959321196484762, "learning_rate": 2.0590557363148805e-06, "loss": 0.02346038818359375, "step": 139140 }, { "epoch": 1.203145671027488, "grad_norm": 40.427223433558694, "learning_rate": 2.0588622858332655e-06, "loss": 0.21114501953125, "step": 139145 }, { "epoch": 1.2031889045490312, "grad_norm": 0.42941728352611047, "learning_rate": 2.0586688396920945e-06, "loss": 0.01255340576171875, "step": 139150 }, { "epoch": 1.2032321380705744, "grad_norm": 5.306556435127014, "learning_rate": 2.058475397892259e-06, "loss": 0.0505584716796875, "step": 139155 }, { "epoch": 1.2032753715921176, "grad_norm": 1.6957211185970598, "learning_rate": 2.0582819604346525e-06, "loss": 0.017661285400390626, "step": 139160 }, { "epoch": 1.2033186051136608, "grad_norm": 5.342441807876128, "learning_rate": 2.058088527320166e-06, "loss": 0.07253150939941407, "step": 139165 }, { "epoch": 1.203361838635204, "grad_norm": 2.4293014582084425, "learning_rate": 2.057895098549691e-06, "loss": 0.012178421020507812, "step": 139170 }, { "epoch": 1.2034050721567475, "grad_norm": 0.6350918640938813, "learning_rate": 2.057701674124122e-06, "loss": 0.025646209716796875, "step": 139175 }, { "epoch": 1.2034483056782908, "grad_norm": 1.8635669830979515, "learning_rate": 2.0575082540443492e-06, "loss": 0.110919189453125, "step": 139180 }, { "epoch": 1.203491539199834, "grad_norm": 6.2798055750294575, "learning_rate": 2.0573148383112647e-06, "loss": 0.027515411376953125, "step": 139185 }, { "epoch": 1.2035347727213772, "grad_norm": 0.4858795682611497, "learning_rate": 2.0571214269257615e-06, "loss": 0.018305206298828126, "step": 139190 }, { "epoch": 1.2035780062429204, "grad_norm": 10.660147415242976, "learning_rate": 2.056928019888731e-06, "loss": 0.1860107421875, "step": 139195 }, { "epoch": 1.203621239764464, "grad_norm": 2.010544736890455, "learning_rate": 2.056734617201065e-06, "loss": 0.111004638671875, "step": 139200 }, { "epoch": 1.2036644732860071, "grad_norm": 4.556341545277373, "learning_rate": 2.056541218863654e-06, "loss": 0.14046249389648438, "step": 139205 }, { "epoch": 1.2037077068075503, "grad_norm": 3.610608576086802, "learning_rate": 2.056347824877393e-06, "loss": 0.03601150512695313, "step": 139210 }, { "epoch": 1.2037509403290936, "grad_norm": 0.054369869930672166, "learning_rate": 2.0561544352431725e-06, "loss": 0.028932571411132812, "step": 139215 }, { "epoch": 1.2037941738506368, "grad_norm": 0.9337348688176222, "learning_rate": 2.0559610499618846e-06, "loss": 0.1479715347290039, "step": 139220 }, { "epoch": 1.20383740737218, "grad_norm": 5.793630355947047, "learning_rate": 2.0557676690344207e-06, "loss": 0.05323333740234375, "step": 139225 }, { "epoch": 1.2038806408937233, "grad_norm": 5.46968446446485, "learning_rate": 2.0555742924616725e-06, "loss": 0.05544853210449219, "step": 139230 }, { "epoch": 1.2039238744152667, "grad_norm": 27.343578929476994, "learning_rate": 2.055380920244532e-06, "loss": 0.2606483459472656, "step": 139235 }, { "epoch": 1.20396710793681, "grad_norm": 7.02436336738699, "learning_rate": 2.055187552383891e-06, "loss": 0.18802490234375, "step": 139240 }, { "epoch": 1.2040103414583532, "grad_norm": 14.310067929977285, "learning_rate": 2.0549941888806427e-06, "loss": 0.0507354736328125, "step": 139245 }, { "epoch": 1.2040535749798964, "grad_norm": 25.438000341488017, "learning_rate": 2.054800829735677e-06, "loss": 0.09795866012573243, "step": 139250 }, { "epoch": 1.2040968085014396, "grad_norm": 0.11173212455017006, "learning_rate": 2.054607474949887e-06, "loss": 0.03194427490234375, "step": 139255 }, { "epoch": 1.2041400420229829, "grad_norm": 1.0148909795309906, "learning_rate": 2.054414124524163e-06, "loss": 0.18052902221679687, "step": 139260 }, { "epoch": 1.2041832755445263, "grad_norm": 0.9557777543041679, "learning_rate": 2.0542207784593976e-06, "loss": 0.13628616333007812, "step": 139265 }, { "epoch": 1.2042265090660695, "grad_norm": 3.921521599223851, "learning_rate": 2.054027436756482e-06, "loss": 0.10600337982177735, "step": 139270 }, { "epoch": 1.2042697425876128, "grad_norm": 4.018145095557568, "learning_rate": 2.053834099416309e-06, "loss": 0.02635498046875, "step": 139275 }, { "epoch": 1.204312976109156, "grad_norm": 56.990347488530304, "learning_rate": 2.0536407664397697e-06, "loss": 0.39272308349609375, "step": 139280 }, { "epoch": 1.2043562096306992, "grad_norm": 0.5416865124326556, "learning_rate": 2.053447437827756e-06, "loss": 0.018795013427734375, "step": 139285 }, { "epoch": 1.2043994431522425, "grad_norm": 2.7778366812444033, "learning_rate": 2.053254113581158e-06, "loss": 0.11019287109375, "step": 139290 }, { "epoch": 1.2044426766737857, "grad_norm": 3.31331005962144, "learning_rate": 2.0530607937008684e-06, "loss": 0.0163818359375, "step": 139295 }, { "epoch": 1.2044859101953291, "grad_norm": 1.3895563510177944, "learning_rate": 2.0528674781877786e-06, "loss": 0.05475616455078125, "step": 139300 }, { "epoch": 1.2045291437168724, "grad_norm": 4.994873169130787, "learning_rate": 2.052674167042781e-06, "loss": 0.009003829956054688, "step": 139305 }, { "epoch": 1.2045723772384156, "grad_norm": 16.861257034281927, "learning_rate": 2.052480860266766e-06, "loss": 0.08458328247070312, "step": 139310 }, { "epoch": 1.2046156107599588, "grad_norm": 1.3131697231537902, "learning_rate": 2.0522875578606264e-06, "loss": 0.08985786437988282, "step": 139315 }, { "epoch": 1.204658844281502, "grad_norm": 1.2805051575142714, "learning_rate": 2.0520942598252518e-06, "loss": 0.14641380310058594, "step": 139320 }, { "epoch": 1.2047020778030453, "grad_norm": 42.44522590090933, "learning_rate": 2.0519009661615352e-06, "loss": 0.12601165771484374, "step": 139325 }, { "epoch": 1.2047453113245887, "grad_norm": 0.08546782925337697, "learning_rate": 2.0517076768703667e-06, "loss": 0.021690750122070314, "step": 139330 }, { "epoch": 1.204788544846132, "grad_norm": 5.253749183851321, "learning_rate": 2.0515143919526397e-06, "loss": 0.0674044132232666, "step": 139335 }, { "epoch": 1.2048317783676752, "grad_norm": 19.45502126205231, "learning_rate": 2.0513211114092446e-06, "loss": 0.12439842224121093, "step": 139340 }, { "epoch": 1.2048750118892184, "grad_norm": 3.714974243993426, "learning_rate": 2.0511278352410724e-06, "loss": 0.048846435546875, "step": 139345 }, { "epoch": 1.2049182454107616, "grad_norm": 3.4483158892067327, "learning_rate": 2.050934563449014e-06, "loss": 0.15435791015625, "step": 139350 }, { "epoch": 1.2049614789323049, "grad_norm": 2.7875867731884143, "learning_rate": 2.0507412960339626e-06, "loss": 0.020399856567382812, "step": 139355 }, { "epoch": 1.205004712453848, "grad_norm": 30.195368425730724, "learning_rate": 2.0505480329968073e-06, "loss": 0.2744121551513672, "step": 139360 }, { "epoch": 1.2050479459753916, "grad_norm": 1.8679556293893982, "learning_rate": 2.0503547743384417e-06, "loss": 0.153253173828125, "step": 139365 }, { "epoch": 1.2050911794969348, "grad_norm": 1.4251560074162741, "learning_rate": 2.0501615200597557e-06, "loss": 0.025920581817626954, "step": 139370 }, { "epoch": 1.205134413018478, "grad_norm": 15.035326038725234, "learning_rate": 2.049968270161641e-06, "loss": 0.059661865234375, "step": 139375 }, { "epoch": 1.2051776465400212, "grad_norm": 0.249002286483663, "learning_rate": 2.049775024644988e-06, "loss": 0.020980072021484376, "step": 139380 }, { "epoch": 1.2052208800615645, "grad_norm": 2.9291305775328507, "learning_rate": 2.0495817835106896e-06, "loss": 0.06386260986328125, "step": 139385 }, { "epoch": 1.205264113583108, "grad_norm": 0.09913777279858105, "learning_rate": 2.0493885467596356e-06, "loss": 0.03094329833984375, "step": 139390 }, { "epoch": 1.2053073471046512, "grad_norm": 1.4803144967074513, "learning_rate": 2.0491953143927166e-06, "loss": 0.0105499267578125, "step": 139395 }, { "epoch": 1.2053505806261944, "grad_norm": 7.2842023246948235, "learning_rate": 2.049002086410826e-06, "loss": 0.11195068359375, "step": 139400 }, { "epoch": 1.2053938141477376, "grad_norm": 2.4642549809159138, "learning_rate": 2.0488088628148537e-06, "loss": 0.01859130859375, "step": 139405 }, { "epoch": 1.2054370476692808, "grad_norm": 0.7063541286123396, "learning_rate": 2.0486156436056902e-06, "loss": 0.008090782165527343, "step": 139410 }, { "epoch": 1.205480281190824, "grad_norm": 9.285075276757246, "learning_rate": 2.048422428784228e-06, "loss": 0.03944816589355469, "step": 139415 }, { "epoch": 1.2055235147123673, "grad_norm": 3.362958954629544, "learning_rate": 2.0482292183513578e-06, "loss": 0.05256462097167969, "step": 139420 }, { "epoch": 1.2055667482339105, "grad_norm": 8.540883677221512, "learning_rate": 2.048036012307969e-06, "loss": 0.06324176788330078, "step": 139425 }, { "epoch": 1.205609981755454, "grad_norm": 2.908737424795161, "learning_rate": 2.047842810654955e-06, "loss": 0.04590187072753906, "step": 139430 }, { "epoch": 1.2056532152769972, "grad_norm": 1.0895706747366238, "learning_rate": 2.047649613393205e-06, "loss": 0.010724258422851563, "step": 139435 }, { "epoch": 1.2056964487985404, "grad_norm": 3.8280220485451912, "learning_rate": 2.0474564205236114e-06, "loss": 0.038720703125, "step": 139440 }, { "epoch": 1.2057396823200837, "grad_norm": 10.945638315710006, "learning_rate": 2.0472632320470653e-06, "loss": 0.03608551025390625, "step": 139445 }, { "epoch": 1.205782915841627, "grad_norm": 3.1576678041021737, "learning_rate": 2.0470700479644564e-06, "loss": 0.0512908935546875, "step": 139450 }, { "epoch": 1.2058261493631703, "grad_norm": 0.7715339430546068, "learning_rate": 2.0468768682766755e-06, "loss": 0.05257492065429688, "step": 139455 }, { "epoch": 1.2058693828847136, "grad_norm": 5.29944290090136, "learning_rate": 2.046683692984615e-06, "loss": 0.04202423095703125, "step": 139460 }, { "epoch": 1.2059126164062568, "grad_norm": 2.1249736161324106, "learning_rate": 2.046490522089164e-06, "loss": 0.04300117492675781, "step": 139465 }, { "epoch": 1.2059558499278, "grad_norm": 4.130695047413964, "learning_rate": 2.0462973555912163e-06, "loss": 0.0297393798828125, "step": 139470 }, { "epoch": 1.2059990834493433, "grad_norm": 0.7527503026227602, "learning_rate": 2.0461041934916603e-06, "loss": 0.015238189697265625, "step": 139475 }, { "epoch": 1.2060423169708865, "grad_norm": 1.013630334093553, "learning_rate": 2.0459110357913876e-06, "loss": 0.11386489868164062, "step": 139480 }, { "epoch": 1.2060855504924297, "grad_norm": 0.652919747277603, "learning_rate": 2.0457178824912885e-06, "loss": 0.010098838806152343, "step": 139485 }, { "epoch": 1.2061287840139732, "grad_norm": 1.6652672977803988, "learning_rate": 2.0455247335922535e-06, "loss": 0.021869087219238283, "step": 139490 }, { "epoch": 1.2061720175355164, "grad_norm": 0.22591019644579582, "learning_rate": 2.045331589095174e-06, "loss": 0.09204444885253907, "step": 139495 }, { "epoch": 1.2062152510570596, "grad_norm": 37.67362126180664, "learning_rate": 2.0451384490009425e-06, "loss": 0.11650238037109376, "step": 139500 }, { "epoch": 1.2062584845786029, "grad_norm": 0.6314037103490787, "learning_rate": 2.0449453133104474e-06, "loss": 0.15914039611816405, "step": 139505 }, { "epoch": 1.206301718100146, "grad_norm": 1.742963835705288, "learning_rate": 2.0447521820245803e-06, "loss": 0.05939407348632812, "step": 139510 }, { "epoch": 1.2063449516216893, "grad_norm": 42.14444058066805, "learning_rate": 2.0445590551442316e-06, "loss": 0.17737960815429688, "step": 139515 }, { "epoch": 1.2063881851432328, "grad_norm": 3.9616469572583877, "learning_rate": 2.0443659326702914e-06, "loss": 0.02843608856201172, "step": 139520 }, { "epoch": 1.206431418664776, "grad_norm": 2.69748776241272, "learning_rate": 2.0441728146036513e-06, "loss": 0.0647216796875, "step": 139525 }, { "epoch": 1.2064746521863192, "grad_norm": 1.6263718547537729, "learning_rate": 2.0439797009452023e-06, "loss": 0.0551727294921875, "step": 139530 }, { "epoch": 1.2065178857078624, "grad_norm": 21.50283317220181, "learning_rate": 2.0437865916958346e-06, "loss": 0.16129608154296876, "step": 139535 }, { "epoch": 1.2065611192294057, "grad_norm": 2.8805914228287204, "learning_rate": 2.0435934868564385e-06, "loss": 0.02361602783203125, "step": 139540 }, { "epoch": 1.206604352750949, "grad_norm": 14.056432453287899, "learning_rate": 2.043400386427904e-06, "loss": 0.12866439819335937, "step": 139545 }, { "epoch": 1.2066475862724921, "grad_norm": 0.34805912874822603, "learning_rate": 2.043207290411123e-06, "loss": 0.005389404296875, "step": 139550 }, { "epoch": 1.2066908197940356, "grad_norm": 2.0285069246869507, "learning_rate": 2.043014198806984e-06, "loss": 0.03259716033935547, "step": 139555 }, { "epoch": 1.2067340533155788, "grad_norm": 1.011370625784376, "learning_rate": 2.042821111616381e-06, "loss": 0.11862411499023437, "step": 139560 }, { "epoch": 1.206777286837122, "grad_norm": 1.0360881157431732, "learning_rate": 2.042628028840202e-06, "loss": 0.09553031921386719, "step": 139565 }, { "epoch": 1.2068205203586653, "grad_norm": 1.4337831678633857, "learning_rate": 2.0424349504793377e-06, "loss": 0.10296840667724609, "step": 139570 }, { "epoch": 1.2068637538802085, "grad_norm": 25.02270432317581, "learning_rate": 2.0422418765346785e-06, "loss": 0.0569732666015625, "step": 139575 }, { "epoch": 1.2069069874017517, "grad_norm": 1.7345847702469233, "learning_rate": 2.0420488070071153e-06, "loss": 0.181451416015625, "step": 139580 }, { "epoch": 1.2069502209232952, "grad_norm": 33.785672674640146, "learning_rate": 2.0418557418975383e-06, "loss": 0.10498046875, "step": 139585 }, { "epoch": 1.2069934544448384, "grad_norm": 2.350455511150478, "learning_rate": 2.041662681206838e-06, "loss": 0.10851860046386719, "step": 139590 }, { "epoch": 1.2070366879663816, "grad_norm": 0.5320858895632081, "learning_rate": 2.041469624935905e-06, "loss": 0.11904258728027343, "step": 139595 }, { "epoch": 1.2070799214879249, "grad_norm": 6.2268778470625, "learning_rate": 2.0412765730856294e-06, "loss": 0.13038253784179688, "step": 139600 }, { "epoch": 1.207123155009468, "grad_norm": 1.0146629593092227, "learning_rate": 2.0410835256569012e-06, "loss": 0.34444427490234375, "step": 139605 }, { "epoch": 1.2071663885310113, "grad_norm": 36.9294373144376, "learning_rate": 2.0408904826506113e-06, "loss": 0.21581611633300782, "step": 139610 }, { "epoch": 1.2072096220525546, "grad_norm": 8.448793743442774, "learning_rate": 2.0406974440676485e-06, "loss": 0.024158763885498046, "step": 139615 }, { "epoch": 1.207252855574098, "grad_norm": 0.12420131056500613, "learning_rate": 2.0405044099089057e-06, "loss": 0.027253246307373045, "step": 139620 }, { "epoch": 1.2072960890956412, "grad_norm": 0.7529377688797733, "learning_rate": 2.040311380175272e-06, "loss": 0.08290481567382812, "step": 139625 }, { "epoch": 1.2073393226171845, "grad_norm": 13.96020921929768, "learning_rate": 2.040118354867637e-06, "loss": 0.1175069808959961, "step": 139630 }, { "epoch": 1.2073825561387277, "grad_norm": 1.7162568001154255, "learning_rate": 2.0399253339868917e-06, "loss": 0.02520904541015625, "step": 139635 }, { "epoch": 1.207425789660271, "grad_norm": 0.18017791568840028, "learning_rate": 2.0397323175339255e-06, "loss": 0.08025665283203125, "step": 139640 }, { "epoch": 1.2074690231818144, "grad_norm": 1.1261241254493584, "learning_rate": 2.039539305509629e-06, "loss": 0.07297210693359375, "step": 139645 }, { "epoch": 1.2075122567033576, "grad_norm": 0.17728197463286488, "learning_rate": 2.039346297914892e-06, "loss": 0.017099761962890626, "step": 139650 }, { "epoch": 1.2075554902249008, "grad_norm": 13.735681785864282, "learning_rate": 2.039153294750605e-06, "loss": 0.1351837158203125, "step": 139655 }, { "epoch": 1.207598723746444, "grad_norm": 1.221940149558493, "learning_rate": 2.0389602960176584e-06, "loss": 0.040182876586914065, "step": 139660 }, { "epoch": 1.2076419572679873, "grad_norm": 0.0841153619994259, "learning_rate": 2.038767301716942e-06, "loss": 0.20501861572265626, "step": 139665 }, { "epoch": 1.2076851907895305, "grad_norm": 0.3882819599844353, "learning_rate": 2.0385743118493466e-06, "loss": 0.035833740234375, "step": 139670 }, { "epoch": 1.2077284243110737, "grad_norm": 0.4002253655246514, "learning_rate": 2.038381326415761e-06, "loss": 0.044652557373046874, "step": 139675 }, { "epoch": 1.207771657832617, "grad_norm": 0.2582203797354362, "learning_rate": 2.0381883454170746e-06, "loss": 0.030743408203125, "step": 139680 }, { "epoch": 1.2078148913541604, "grad_norm": 11.781702713000456, "learning_rate": 2.0379953688541797e-06, "loss": 0.13493213653564454, "step": 139685 }, { "epoch": 1.2078581248757037, "grad_norm": 3.9071061311516955, "learning_rate": 2.0378023967279647e-06, "loss": 0.33951416015625, "step": 139690 }, { "epoch": 1.2079013583972469, "grad_norm": 0.5797469008463965, "learning_rate": 2.037609429039321e-06, "loss": 0.015479660034179688, "step": 139695 }, { "epoch": 1.2079445919187901, "grad_norm": 4.23047793044427, "learning_rate": 2.037416465789137e-06, "loss": 0.009943771362304687, "step": 139700 }, { "epoch": 1.2079878254403333, "grad_norm": 11.456451427577377, "learning_rate": 2.0372235069783032e-06, "loss": 0.06753768920898437, "step": 139705 }, { "epoch": 1.2080310589618768, "grad_norm": 11.905771331265605, "learning_rate": 2.0370305526077085e-06, "loss": 0.06013507843017578, "step": 139710 }, { "epoch": 1.20807429248342, "grad_norm": 5.667336073366325, "learning_rate": 2.0368376026782455e-06, "loss": 0.12918777465820314, "step": 139715 }, { "epoch": 1.2081175260049632, "grad_norm": 0.9059056391861329, "learning_rate": 2.0366446571908007e-06, "loss": 0.0572601318359375, "step": 139720 }, { "epoch": 1.2081607595265065, "grad_norm": 6.612498318692961, "learning_rate": 2.036451716146267e-06, "loss": 0.07929763793945313, "step": 139725 }, { "epoch": 1.2082039930480497, "grad_norm": 1.1768635634297386, "learning_rate": 2.036258779545533e-06, "loss": 0.02359161376953125, "step": 139730 }, { "epoch": 1.208247226569593, "grad_norm": 0.6867440160427659, "learning_rate": 2.036065847389488e-06, "loss": 0.018752288818359376, "step": 139735 }, { "epoch": 1.2082904600911362, "grad_norm": 12.822357098407247, "learning_rate": 2.0358729196790214e-06, "loss": 0.2908821105957031, "step": 139740 }, { "epoch": 1.2083336936126796, "grad_norm": 5.148481816252109, "learning_rate": 2.035679996415024e-06, "loss": 0.025402641296386717, "step": 139745 }, { "epoch": 1.2083769271342228, "grad_norm": 21.539528955429347, "learning_rate": 2.035487077598385e-06, "loss": 0.10669975280761719, "step": 139750 }, { "epoch": 1.208420160655766, "grad_norm": 4.373920172955343, "learning_rate": 2.035294163229995e-06, "loss": 0.0509857177734375, "step": 139755 }, { "epoch": 1.2084633941773093, "grad_norm": 17.76037944012616, "learning_rate": 2.035101253310743e-06, "loss": 0.11336517333984375, "step": 139760 }, { "epoch": 1.2085066276988525, "grad_norm": 3.194555838510667, "learning_rate": 2.0349083478415193e-06, "loss": 0.02049560546875, "step": 139765 }, { "epoch": 1.2085498612203958, "grad_norm": 1.5655250310284758, "learning_rate": 2.0347154468232115e-06, "loss": 0.06271743774414062, "step": 139770 }, { "epoch": 1.2085930947419392, "grad_norm": 0.32450200128898854, "learning_rate": 2.0345225502567116e-06, "loss": 0.021880340576171876, "step": 139775 }, { "epoch": 1.2086363282634824, "grad_norm": 1.3565167559770759, "learning_rate": 2.034329658142908e-06, "loss": 0.18780364990234374, "step": 139780 }, { "epoch": 1.2086795617850257, "grad_norm": 18.072938644104493, "learning_rate": 2.0341367704826914e-06, "loss": 0.1752786636352539, "step": 139785 }, { "epoch": 1.208722795306569, "grad_norm": 1.4135827796598677, "learning_rate": 2.0339438872769503e-06, "loss": 0.15996856689453126, "step": 139790 }, { "epoch": 1.2087660288281121, "grad_norm": 2.4510227550103654, "learning_rate": 2.033751008526575e-06, "loss": 0.058963775634765625, "step": 139795 }, { "epoch": 1.2088092623496554, "grad_norm": 9.983784978169506, "learning_rate": 2.033558134232454e-06, "loss": 0.049652099609375, "step": 139800 }, { "epoch": 1.2088524958711986, "grad_norm": 0.47551464243961367, "learning_rate": 2.033365264395478e-06, "loss": 0.01348114013671875, "step": 139805 }, { "epoch": 1.208895729392742, "grad_norm": 0.2626725202723936, "learning_rate": 2.033172399016535e-06, "loss": 0.05123443603515625, "step": 139810 }, { "epoch": 1.2089389629142853, "grad_norm": 2.126596786178463, "learning_rate": 2.032979538096517e-06, "loss": 0.13270339965820313, "step": 139815 }, { "epoch": 1.2089821964358285, "grad_norm": 1.9324120459186036, "learning_rate": 2.0327866816363116e-06, "loss": 0.09095916748046876, "step": 139820 }, { "epoch": 1.2090254299573717, "grad_norm": 0.38417144833485944, "learning_rate": 2.0325938296368085e-06, "loss": 0.17640151977539062, "step": 139825 }, { "epoch": 1.209068663478915, "grad_norm": 9.493004403827257, "learning_rate": 2.0324009820988964e-06, "loss": 0.0954833984375, "step": 139830 }, { "epoch": 1.2091118970004582, "grad_norm": 4.674357486313514, "learning_rate": 2.032208139023466e-06, "loss": 0.06851272583007813, "step": 139835 }, { "epoch": 1.2091551305220016, "grad_norm": 4.976038107870917, "learning_rate": 2.032015300411405e-06, "loss": 0.10085372924804688, "step": 139840 }, { "epoch": 1.2091983640435449, "grad_norm": 2.5466724400635123, "learning_rate": 2.0318224662636053e-06, "loss": 0.04265022277832031, "step": 139845 }, { "epoch": 1.209241597565088, "grad_norm": 0.048478645791180296, "learning_rate": 2.031629636580955e-06, "loss": 0.09256515502929688, "step": 139850 }, { "epoch": 1.2092848310866313, "grad_norm": 4.288291555613859, "learning_rate": 2.0314368113643423e-06, "loss": 0.0386016845703125, "step": 139855 }, { "epoch": 1.2093280646081745, "grad_norm": 0.9797798977273501, "learning_rate": 2.0312439906146578e-06, "loss": 0.23282432556152344, "step": 139860 }, { "epoch": 1.2093712981297178, "grad_norm": 1.2878387292759501, "learning_rate": 2.031051174332791e-06, "loss": 0.15465240478515624, "step": 139865 }, { "epoch": 1.209414531651261, "grad_norm": 0.15031616729467315, "learning_rate": 2.030858362519629e-06, "loss": 0.1097900390625, "step": 139870 }, { "epoch": 1.2094577651728045, "grad_norm": 11.602121687138766, "learning_rate": 2.0306655551760642e-06, "loss": 0.07202262878417968, "step": 139875 }, { "epoch": 1.2095009986943477, "grad_norm": 14.060684229122735, "learning_rate": 2.030472752302984e-06, "loss": 0.16922836303710936, "step": 139880 }, { "epoch": 1.209544232215891, "grad_norm": 21.195008171560495, "learning_rate": 2.030279953901277e-06, "loss": 0.067041015625, "step": 139885 }, { "epoch": 1.2095874657374341, "grad_norm": 0.07591991960471828, "learning_rate": 2.030087159971834e-06, "loss": 0.024592208862304687, "step": 139890 }, { "epoch": 1.2096306992589774, "grad_norm": 4.224053635813416, "learning_rate": 2.029894370515543e-06, "loss": 0.11344947814941406, "step": 139895 }, { "epoch": 1.2096739327805208, "grad_norm": 15.200443484211348, "learning_rate": 2.029701585533294e-06, "loss": 0.0672271728515625, "step": 139900 }, { "epoch": 1.209717166302064, "grad_norm": 0.8504616321669202, "learning_rate": 2.029508805025974e-06, "loss": 0.026032257080078124, "step": 139905 }, { "epoch": 1.2097603998236073, "grad_norm": 0.25176745746191587, "learning_rate": 2.0293160289944746e-06, "loss": 0.038701629638671874, "step": 139910 }, { "epoch": 1.2098036333451505, "grad_norm": 15.252964250956019, "learning_rate": 2.029123257439684e-06, "loss": 0.035770225524902347, "step": 139915 }, { "epoch": 1.2098468668666937, "grad_norm": 0.3059590197995821, "learning_rate": 2.028930490362491e-06, "loss": 0.07645149230957031, "step": 139920 }, { "epoch": 1.209890100388237, "grad_norm": 1.4653887638393863, "learning_rate": 2.028737727763785e-06, "loss": 0.05410614013671875, "step": 139925 }, { "epoch": 1.2099333339097802, "grad_norm": 0.1366827138340963, "learning_rate": 2.0285449696444554e-06, "loss": 0.029379653930664062, "step": 139930 }, { "epoch": 1.2099765674313236, "grad_norm": 0.7297903554592856, "learning_rate": 2.0283522160053886e-06, "loss": 0.07010040283203126, "step": 139935 }, { "epoch": 1.2100198009528669, "grad_norm": 0.26422714658280694, "learning_rate": 2.028159466847477e-06, "loss": 0.12401924133300782, "step": 139940 }, { "epoch": 1.21006303447441, "grad_norm": 0.3305619539152582, "learning_rate": 2.0279667221716073e-06, "loss": 0.0452301025390625, "step": 139945 }, { "epoch": 1.2101062679959533, "grad_norm": 36.06707466811635, "learning_rate": 2.0277739819786697e-06, "loss": 0.10259323120117188, "step": 139950 }, { "epoch": 1.2101495015174966, "grad_norm": 22.093982386294524, "learning_rate": 2.027581246269553e-06, "loss": 0.048557281494140625, "step": 139955 }, { "epoch": 1.2101927350390398, "grad_norm": 26.037156003915452, "learning_rate": 2.0273885150451454e-06, "loss": 0.12795791625976563, "step": 139960 }, { "epoch": 1.2102359685605832, "grad_norm": 0.9933265294102277, "learning_rate": 2.0271957883063348e-06, "loss": 0.10067901611328126, "step": 139965 }, { "epoch": 1.2102792020821265, "grad_norm": 1.1539311866385595, "learning_rate": 2.0270030660540125e-06, "loss": 0.05173492431640625, "step": 139970 }, { "epoch": 1.2103224356036697, "grad_norm": 20.732652570586588, "learning_rate": 2.0268103482890657e-06, "loss": 0.1119659423828125, "step": 139975 }, { "epoch": 1.210365669125213, "grad_norm": 0.1676844032828477, "learning_rate": 2.026617635012384e-06, "loss": 0.024686813354492188, "step": 139980 }, { "epoch": 1.2104089026467562, "grad_norm": 13.395597998779333, "learning_rate": 2.0264249262248557e-06, "loss": 0.051910400390625, "step": 139985 }, { "epoch": 1.2104521361682994, "grad_norm": 0.3298458405831369, "learning_rate": 2.02623222192737e-06, "loss": 0.020125722885131835, "step": 139990 }, { "epoch": 1.2104953696898426, "grad_norm": 2.8687611303266674, "learning_rate": 2.026039522120814e-06, "loss": 0.009920310974121094, "step": 139995 }, { "epoch": 1.210538603211386, "grad_norm": 4.925465038426819, "learning_rate": 2.025846826806078e-06, "loss": 0.01595458984375, "step": 140000 }, { "epoch": 1.2105818367329293, "grad_norm": 8.056770540700754, "learning_rate": 2.0256541359840504e-06, "loss": 0.036773681640625, "step": 140005 }, { "epoch": 1.2106250702544725, "grad_norm": 0.735007858660802, "learning_rate": 2.0254614496556206e-06, "loss": 0.0482086181640625, "step": 140010 }, { "epoch": 1.2106683037760158, "grad_norm": 0.5229254645056007, "learning_rate": 2.0252687678216764e-06, "loss": 0.16441192626953124, "step": 140015 }, { "epoch": 1.210711537297559, "grad_norm": 0.5054654477393337, "learning_rate": 2.0250760904831067e-06, "loss": 0.029074478149414062, "step": 140020 }, { "epoch": 1.2107547708191022, "grad_norm": 20.43704119124159, "learning_rate": 2.0248834176407984e-06, "loss": 0.12162551879882813, "step": 140025 }, { "epoch": 1.2107980043406457, "grad_norm": 0.41474711825934024, "learning_rate": 2.0246907492956417e-06, "loss": 0.27179527282714844, "step": 140030 }, { "epoch": 1.210841237862189, "grad_norm": 0.08860701642613113, "learning_rate": 2.0244980854485264e-06, "loss": 0.011606216430664062, "step": 140035 }, { "epoch": 1.2108844713837321, "grad_norm": 3.2088505470768847, "learning_rate": 2.0243054261003393e-06, "loss": 0.10785846710205078, "step": 140040 }, { "epoch": 1.2109277049052753, "grad_norm": 1.3122105282547787, "learning_rate": 2.02411277125197e-06, "loss": 0.14800186157226564, "step": 140045 }, { "epoch": 1.2109709384268186, "grad_norm": 0.8060342532584741, "learning_rate": 2.023920120904306e-06, "loss": 0.013637542724609375, "step": 140050 }, { "epoch": 1.2110141719483618, "grad_norm": 1.5110129320186803, "learning_rate": 2.0237274750582357e-06, "loss": 0.06529541015625, "step": 140055 }, { "epoch": 1.211057405469905, "grad_norm": 17.775475951497587, "learning_rate": 2.0235348337146483e-06, "loss": 0.0378509521484375, "step": 140060 }, { "epoch": 1.2111006389914485, "grad_norm": 1.3397224497972928, "learning_rate": 2.0233421968744306e-06, "loss": 0.021779632568359374, "step": 140065 }, { "epoch": 1.2111438725129917, "grad_norm": 8.164723738289803, "learning_rate": 2.023149564538474e-06, "loss": 0.11655406951904297, "step": 140070 }, { "epoch": 1.211187106034535, "grad_norm": 0.6569561078861365, "learning_rate": 2.022956936707665e-06, "loss": 0.02468414306640625, "step": 140075 }, { "epoch": 1.2112303395560782, "grad_norm": 4.176216838755505, "learning_rate": 2.0227643133828923e-06, "loss": 0.054470062255859375, "step": 140080 }, { "epoch": 1.2112735730776214, "grad_norm": 0.0987429422357778, "learning_rate": 2.022571694565044e-06, "loss": 0.027890777587890624, "step": 140085 }, { "epoch": 1.2113168065991649, "grad_norm": 0.9115052143825515, "learning_rate": 2.0223790802550097e-06, "loss": 0.03125762939453125, "step": 140090 }, { "epoch": 1.211360040120708, "grad_norm": 5.807628494364213, "learning_rate": 2.0221864704536744e-06, "loss": 0.031757354736328125, "step": 140095 }, { "epoch": 1.2114032736422513, "grad_norm": 0.543267449670483, "learning_rate": 2.02199386516193e-06, "loss": 0.09321279525756836, "step": 140100 }, { "epoch": 1.2114465071637945, "grad_norm": 0.28642928777722504, "learning_rate": 2.0218012643806644e-06, "loss": 0.02277717590332031, "step": 140105 }, { "epoch": 1.2114897406853378, "grad_norm": 6.19992956981547, "learning_rate": 2.021608668110764e-06, "loss": 0.03958415985107422, "step": 140110 }, { "epoch": 1.211532974206881, "grad_norm": 73.60117789314782, "learning_rate": 2.021416076353118e-06, "loss": 0.2743492126464844, "step": 140115 }, { "epoch": 1.2115762077284242, "grad_norm": 1.5611149925076584, "learning_rate": 2.0212234891086152e-06, "loss": 0.02339591979980469, "step": 140120 }, { "epoch": 1.2116194412499675, "grad_norm": 2.2771751803030007, "learning_rate": 2.0210309063781412e-06, "loss": 0.047454833984375, "step": 140125 }, { "epoch": 1.211662674771511, "grad_norm": 0.2544447715490515, "learning_rate": 2.0208383281625883e-06, "loss": 0.05205726623535156, "step": 140130 }, { "epoch": 1.2117059082930541, "grad_norm": 0.45601727571953743, "learning_rate": 2.020645754462842e-06, "loss": 0.169305419921875, "step": 140135 }, { "epoch": 1.2117491418145974, "grad_norm": 13.759664723510317, "learning_rate": 2.0204531852797905e-06, "loss": 0.03459014892578125, "step": 140140 }, { "epoch": 1.2117923753361406, "grad_norm": 0.19039581203845649, "learning_rate": 2.020260620614323e-06, "loss": 0.3806610107421875, "step": 140145 }, { "epoch": 1.2118356088576838, "grad_norm": 0.5160610672413545, "learning_rate": 2.020068060467327e-06, "loss": 0.07267837524414063, "step": 140150 }, { "epoch": 1.2118788423792273, "grad_norm": 0.3620082143976579, "learning_rate": 2.0198755048396904e-06, "loss": 0.16048507690429686, "step": 140155 }, { "epoch": 1.2119220759007705, "grad_norm": 3.487039446887768, "learning_rate": 2.0196829537323e-06, "loss": 0.028572845458984374, "step": 140160 }, { "epoch": 1.2119653094223137, "grad_norm": 4.314570890093095, "learning_rate": 2.019490407146046e-06, "loss": 0.025070953369140624, "step": 140165 }, { "epoch": 1.212008542943857, "grad_norm": 0.1931163034122204, "learning_rate": 2.019297865081816e-06, "loss": 0.02334442138671875, "step": 140170 }, { "epoch": 1.2120517764654002, "grad_norm": 1.133673469467243, "learning_rate": 2.0191053275404975e-06, "loss": 0.08493919372558593, "step": 140175 }, { "epoch": 1.2120950099869434, "grad_norm": 3.2609125609082534, "learning_rate": 2.0189127945229788e-06, "loss": 0.1475383758544922, "step": 140180 }, { "epoch": 1.2121382435084866, "grad_norm": 0.7775598074776742, "learning_rate": 2.0187202660301475e-06, "loss": 0.04136962890625, "step": 140185 }, { "epoch": 1.21218147703003, "grad_norm": 4.383322721713326, "learning_rate": 2.018527742062891e-06, "loss": 0.08815765380859375, "step": 140190 }, { "epoch": 1.2122247105515733, "grad_norm": 3.9323521763349887, "learning_rate": 2.018335222622098e-06, "loss": 0.137860107421875, "step": 140195 }, { "epoch": 1.2122679440731166, "grad_norm": 0.8878368376615524, "learning_rate": 2.0181427077086564e-06, "loss": 0.025531005859375, "step": 140200 }, { "epoch": 1.2123111775946598, "grad_norm": 0.7349908842680041, "learning_rate": 2.017950197323454e-06, "loss": 0.1511077880859375, "step": 140205 }, { "epoch": 1.212354411116203, "grad_norm": 0.6868189211427367, "learning_rate": 2.017757691467379e-06, "loss": 0.09233102798461915, "step": 140210 }, { "epoch": 1.2123976446377462, "grad_norm": 15.288484181176237, "learning_rate": 2.0175651901413187e-06, "loss": 0.24751777648925782, "step": 140215 }, { "epoch": 1.2124408781592897, "grad_norm": 2.767399612783215, "learning_rate": 2.0173726933461594e-06, "loss": 0.0597930908203125, "step": 140220 }, { "epoch": 1.212484111680833, "grad_norm": 0.37522012064823645, "learning_rate": 2.0171802010827915e-06, "loss": 0.05294647216796875, "step": 140225 }, { "epoch": 1.2125273452023762, "grad_norm": 0.19576332500098542, "learning_rate": 2.016987713352101e-06, "loss": 0.08464813232421875, "step": 140230 }, { "epoch": 1.2125705787239194, "grad_norm": 9.127169230221387, "learning_rate": 2.0167952301549776e-06, "loss": 0.03025360107421875, "step": 140235 }, { "epoch": 1.2126138122454626, "grad_norm": 5.6463762376291875, "learning_rate": 2.0166027514923072e-06, "loss": 0.029411888122558592, "step": 140240 }, { "epoch": 1.2126570457670058, "grad_norm": 63.17359862810207, "learning_rate": 2.0164102773649782e-06, "loss": 0.1848682403564453, "step": 140245 }, { "epoch": 1.212700279288549, "grad_norm": 1.5398731983930085, "learning_rate": 2.0162178077738774e-06, "loss": 0.035726165771484374, "step": 140250 }, { "epoch": 1.2127435128100925, "grad_norm": 3.481164132089516, "learning_rate": 2.0160253427198925e-06, "loss": 0.021065521240234374, "step": 140255 }, { "epoch": 1.2127867463316357, "grad_norm": 1.7779612685989818, "learning_rate": 2.015832882203913e-06, "loss": 0.07366065979003907, "step": 140260 }, { "epoch": 1.212829979853179, "grad_norm": 0.20027654148665378, "learning_rate": 2.0156404262268254e-06, "loss": 0.008084869384765625, "step": 140265 }, { "epoch": 1.2128732133747222, "grad_norm": 0.25240793580355614, "learning_rate": 2.015447974789517e-06, "loss": 0.04245567321777344, "step": 140270 }, { "epoch": 1.2129164468962654, "grad_norm": 5.3479597889101305, "learning_rate": 2.0152555278928758e-06, "loss": 0.019360733032226563, "step": 140275 }, { "epoch": 1.2129596804178087, "grad_norm": 2.546025078136413, "learning_rate": 2.0150630855377882e-06, "loss": 0.013583660125732422, "step": 140280 }, { "epoch": 1.2130029139393521, "grad_norm": 1.0414856322113772, "learning_rate": 2.0148706477251423e-06, "loss": 0.03056182861328125, "step": 140285 }, { "epoch": 1.2130461474608953, "grad_norm": 1.672630795737818, "learning_rate": 2.014678214455827e-06, "loss": 0.17556915283203126, "step": 140290 }, { "epoch": 1.2130893809824386, "grad_norm": 1.3318081863462032, "learning_rate": 2.0144857857307285e-06, "loss": 0.039811897277832034, "step": 140295 }, { "epoch": 1.2131326145039818, "grad_norm": 4.007285066037913, "learning_rate": 2.0142933615507348e-06, "loss": 0.14546241760253906, "step": 140300 }, { "epoch": 1.213175848025525, "grad_norm": 2.8328345334552694, "learning_rate": 2.014100941916732e-06, "loss": 0.020847320556640625, "step": 140305 }, { "epoch": 1.2132190815470683, "grad_norm": 15.257200786457823, "learning_rate": 2.0139085268296096e-06, "loss": 0.03293819427490234, "step": 140310 }, { "epoch": 1.2132623150686115, "grad_norm": 2.3884909831970225, "learning_rate": 2.0137161162902523e-06, "loss": 0.07324399948120117, "step": 140315 }, { "epoch": 1.213305548590155, "grad_norm": 8.595202477497754, "learning_rate": 2.0135237102995508e-06, "loss": 0.11244354248046876, "step": 140320 }, { "epoch": 1.2133487821116982, "grad_norm": 5.493477874671657, "learning_rate": 2.0133313088583907e-06, "loss": 0.11007766723632813, "step": 140325 }, { "epoch": 1.2133920156332414, "grad_norm": 1.8511493181019836, "learning_rate": 2.0131389119676594e-06, "loss": 0.11615447998046875, "step": 140330 }, { "epoch": 1.2134352491547846, "grad_norm": 5.097957862001608, "learning_rate": 2.012946519628243e-06, "loss": 0.14271697998046876, "step": 140335 }, { "epoch": 1.2134784826763279, "grad_norm": 6.016849525942738, "learning_rate": 2.0127541318410315e-06, "loss": 0.036612319946289065, "step": 140340 }, { "epoch": 1.2135217161978713, "grad_norm": 9.832056532594775, "learning_rate": 2.0125617486069107e-06, "loss": 0.16987380981445313, "step": 140345 }, { "epoch": 1.2135649497194145, "grad_norm": 0.9820026219126622, "learning_rate": 2.0123693699267656e-06, "loss": 0.07818374633789063, "step": 140350 }, { "epoch": 1.2136081832409578, "grad_norm": 76.52589446850574, "learning_rate": 2.012176995801488e-06, "loss": 0.15225868225097655, "step": 140355 }, { "epoch": 1.213651416762501, "grad_norm": 0.09722349466059102, "learning_rate": 2.0119846262319624e-06, "loss": 0.33330307006835935, "step": 140360 }, { "epoch": 1.2136946502840442, "grad_norm": 2.828652792674765, "learning_rate": 2.011792261219076e-06, "loss": 0.01505584716796875, "step": 140365 }, { "epoch": 1.2137378838055874, "grad_norm": 3.5797469192327704, "learning_rate": 2.0115999007637167e-06, "loss": 0.0213165283203125, "step": 140370 }, { "epoch": 1.2137811173271307, "grad_norm": 6.919719094540412, "learning_rate": 2.0114075448667715e-06, "loss": 0.10519447326660156, "step": 140375 }, { "epoch": 1.213824350848674, "grad_norm": 1.6292155144468814, "learning_rate": 2.0112151935291257e-06, "loss": 0.011606407165527344, "step": 140380 }, { "epoch": 1.2138675843702174, "grad_norm": 0.6767407921337387, "learning_rate": 2.0110228467516697e-06, "loss": 0.038639068603515625, "step": 140385 }, { "epoch": 1.2139108178917606, "grad_norm": 42.712609651409736, "learning_rate": 2.0108305045352892e-06, "loss": 0.09156036376953125, "step": 140390 }, { "epoch": 1.2139540514133038, "grad_norm": 0.05978132416874409, "learning_rate": 2.0106381668808702e-06, "loss": 0.0653076171875, "step": 140395 }, { "epoch": 1.213997284934847, "grad_norm": 4.643424699177445, "learning_rate": 2.010445833789301e-06, "loss": 0.03949470520019531, "step": 140400 }, { "epoch": 1.2140405184563903, "grad_norm": 6.318203580586382, "learning_rate": 2.010253505261468e-06, "loss": 0.11029891967773438, "step": 140405 }, { "epoch": 1.2140837519779337, "grad_norm": 6.181507687176012, "learning_rate": 2.0100611812982587e-06, "loss": 0.07860870361328125, "step": 140410 }, { "epoch": 1.214126985499477, "grad_norm": 5.779845658203179, "learning_rate": 2.0098688619005583e-06, "loss": 0.016881561279296874, "step": 140415 }, { "epoch": 1.2141702190210202, "grad_norm": 2.090672327491244, "learning_rate": 2.0096765470692565e-06, "loss": 0.0474822998046875, "step": 140420 }, { "epoch": 1.2142134525425634, "grad_norm": 1.5856699688669602, "learning_rate": 2.0094842368052386e-06, "loss": 0.009302520751953125, "step": 140425 }, { "epoch": 1.2142566860641066, "grad_norm": 0.6300018423517066, "learning_rate": 2.0092919311093922e-06, "loss": 0.014987945556640625, "step": 140430 }, { "epoch": 1.2142999195856499, "grad_norm": 1.3540020705151137, "learning_rate": 2.009099629982604e-06, "loss": 0.11170158386230469, "step": 140435 }, { "epoch": 1.214343153107193, "grad_norm": 41.78240564632321, "learning_rate": 2.008907333425761e-06, "loss": 0.2362689971923828, "step": 140440 }, { "epoch": 1.2143863866287365, "grad_norm": 1.4408721209125297, "learning_rate": 2.0087150414397483e-06, "loss": 0.09886283874511718, "step": 140445 }, { "epoch": 1.2144296201502798, "grad_norm": 1.3898926386424633, "learning_rate": 2.0085227540254555e-06, "loss": 0.007867050170898438, "step": 140450 }, { "epoch": 1.214472853671823, "grad_norm": 1.7824363373594891, "learning_rate": 2.0083304711837676e-06, "loss": 0.012253189086914062, "step": 140455 }, { "epoch": 1.2145160871933662, "grad_norm": 1.3505278542039583, "learning_rate": 2.0081381929155722e-06, "loss": 0.022190093994140625, "step": 140460 }, { "epoch": 1.2145593207149095, "grad_norm": 1.6123376735437676, "learning_rate": 2.0079459192217563e-06, "loss": 0.02534942626953125, "step": 140465 }, { "epoch": 1.2146025542364527, "grad_norm": 61.081644132839166, "learning_rate": 2.007753650103206e-06, "loss": 0.17270050048828126, "step": 140470 }, { "epoch": 1.2146457877579961, "grad_norm": 0.34501647394974655, "learning_rate": 2.0075613855608066e-06, "loss": 0.08810272216796874, "step": 140475 }, { "epoch": 1.2146890212795394, "grad_norm": 9.07092764261592, "learning_rate": 2.0073691255954474e-06, "loss": 0.11444473266601562, "step": 140480 }, { "epoch": 1.2147322548010826, "grad_norm": 14.366109535073724, "learning_rate": 2.0071768702080145e-06, "loss": 0.06019859313964844, "step": 140485 }, { "epoch": 1.2147754883226258, "grad_norm": 0.6266883828379609, "learning_rate": 2.0069846193993945e-06, "loss": 0.035941696166992186, "step": 140490 }, { "epoch": 1.214818721844169, "grad_norm": 25.314517389003193, "learning_rate": 2.0067923731704736e-06, "loss": 0.05788497924804688, "step": 140495 }, { "epoch": 1.2148619553657123, "grad_norm": 3.038948417635599, "learning_rate": 2.0066001315221375e-06, "loss": 0.0774200439453125, "step": 140500 }, { "epoch": 1.2149051888872555, "grad_norm": 16.77429997557612, "learning_rate": 2.0064078944552747e-06, "loss": 0.10893764495849609, "step": 140505 }, { "epoch": 1.214948422408799, "grad_norm": 14.279389003690353, "learning_rate": 2.00621566197077e-06, "loss": 0.17122154235839843, "step": 140510 }, { "epoch": 1.2149916559303422, "grad_norm": 0.5148453322600505, "learning_rate": 2.0060234340695114e-06, "loss": 0.014269256591796875, "step": 140515 }, { "epoch": 1.2150348894518854, "grad_norm": 1.31819980080145, "learning_rate": 2.0058312107523853e-06, "loss": 0.25166854858398435, "step": 140520 }, { "epoch": 1.2150781229734287, "grad_norm": 12.643225687942868, "learning_rate": 2.0056389920202774e-06, "loss": 0.0477813720703125, "step": 140525 }, { "epoch": 1.2151213564949719, "grad_norm": 17.11381191663115, "learning_rate": 2.0054467778740745e-06, "loss": 0.08583984375, "step": 140530 }, { "epoch": 1.2151645900165151, "grad_norm": 1.8120019278563049, "learning_rate": 2.005254568314664e-06, "loss": 0.11251792907714844, "step": 140535 }, { "epoch": 1.2152078235380586, "grad_norm": 1.0247960992923035, "learning_rate": 2.0050623633429294e-06, "loss": 0.009865379333496094, "step": 140540 }, { "epoch": 1.2152510570596018, "grad_norm": 1.7577028083386586, "learning_rate": 2.004870162959761e-06, "loss": 0.0727935791015625, "step": 140545 }, { "epoch": 1.215294290581145, "grad_norm": 28.955403219510806, "learning_rate": 2.0046779671660437e-06, "loss": 0.17102203369140626, "step": 140550 }, { "epoch": 1.2153375241026882, "grad_norm": 7.203972578918077, "learning_rate": 2.0044857759626637e-06, "loss": 0.267987060546875, "step": 140555 }, { "epoch": 1.2153807576242315, "grad_norm": 1.1879595743513403, "learning_rate": 2.0042935893505064e-06, "loss": 0.13020763397216797, "step": 140560 }, { "epoch": 1.2154239911457747, "grad_norm": 0.12651274682367367, "learning_rate": 2.0041014073304594e-06, "loss": 0.009039211273193359, "step": 140565 }, { "epoch": 1.215467224667318, "grad_norm": 0.6629122198236199, "learning_rate": 2.003909229903408e-06, "loss": 0.020627593994140624, "step": 140570 }, { "epoch": 1.2155104581888614, "grad_norm": 0.26489253871805035, "learning_rate": 2.0037170570702405e-06, "loss": 0.23653488159179686, "step": 140575 }, { "epoch": 1.2155536917104046, "grad_norm": 33.262521222052385, "learning_rate": 2.003524888831842e-06, "loss": 0.040422821044921876, "step": 140580 }, { "epoch": 1.2155969252319478, "grad_norm": 2.2268378877643134, "learning_rate": 2.003332725189098e-06, "loss": 0.018187332153320312, "step": 140585 }, { "epoch": 1.215640158753491, "grad_norm": 1.5405494218553755, "learning_rate": 2.0031405661428952e-06, "loss": 0.10072650909423828, "step": 140590 }, { "epoch": 1.2156833922750343, "grad_norm": 14.915125521769255, "learning_rate": 2.002948411694121e-06, "loss": 0.0628936767578125, "step": 140595 }, { "epoch": 1.2157266257965778, "grad_norm": 10.513575528068936, "learning_rate": 2.00275626184366e-06, "loss": 0.11415786743164062, "step": 140600 }, { "epoch": 1.215769859318121, "grad_norm": 2.1652164268615706, "learning_rate": 2.002564116592398e-06, "loss": 0.23974990844726562, "step": 140605 }, { "epoch": 1.2158130928396642, "grad_norm": 29.170510990298887, "learning_rate": 2.002371975941223e-06, "loss": 0.1275686264038086, "step": 140610 }, { "epoch": 1.2158563263612074, "grad_norm": 0.26359539453416597, "learning_rate": 2.002179839891021e-06, "loss": 0.2494415283203125, "step": 140615 }, { "epoch": 1.2158995598827507, "grad_norm": 3.772735778512824, "learning_rate": 2.0019877084426763e-06, "loss": 0.0704376220703125, "step": 140620 }, { "epoch": 1.215942793404294, "grad_norm": 20.708733730158112, "learning_rate": 2.0017955815970773e-06, "loss": 0.135858154296875, "step": 140625 }, { "epoch": 1.2159860269258371, "grad_norm": 81.22541697474563, "learning_rate": 2.0016034593551084e-06, "loss": 0.1857015609741211, "step": 140630 }, { "epoch": 1.2160292604473806, "grad_norm": 0.8601814484965307, "learning_rate": 2.001411341717655e-06, "loss": 0.027973747253417967, "step": 140635 }, { "epoch": 1.2160724939689238, "grad_norm": 4.45436635013066, "learning_rate": 2.0012192286856053e-06, "loss": 0.08009681701660157, "step": 140640 }, { "epoch": 1.216115727490467, "grad_norm": 10.846256091800015, "learning_rate": 2.0010271202598444e-06, "loss": 0.05378589630126953, "step": 140645 }, { "epoch": 1.2161589610120103, "grad_norm": 30.79198952369164, "learning_rate": 2.0008350164412575e-06, "loss": 0.05342864990234375, "step": 140650 }, { "epoch": 1.2162021945335535, "grad_norm": 9.189307524374609, "learning_rate": 2.0006429172307323e-06, "loss": 0.1805511474609375, "step": 140655 }, { "epoch": 1.2162454280550967, "grad_norm": 6.461755484696611, "learning_rate": 2.0004508226291526e-06, "loss": 0.039585113525390625, "step": 140660 }, { "epoch": 1.2162886615766402, "grad_norm": 11.57021706958065, "learning_rate": 2.000258732637405e-06, "loss": 0.07118301391601563, "step": 140665 }, { "epoch": 1.2163318950981834, "grad_norm": 10.669210935246928, "learning_rate": 2.0000666472563765e-06, "loss": 0.03241806030273438, "step": 140670 }, { "epoch": 1.2163751286197266, "grad_norm": 25.664996149440704, "learning_rate": 1.999874566486953e-06, "loss": 0.14585533142089843, "step": 140675 }, { "epoch": 1.2164183621412699, "grad_norm": 0.5886992022724072, "learning_rate": 1.9996824903300183e-06, "loss": 0.025051116943359375, "step": 140680 }, { "epoch": 1.216461595662813, "grad_norm": 1.6736278213160216, "learning_rate": 1.9994904187864606e-06, "loss": 0.020003509521484376, "step": 140685 }, { "epoch": 1.2165048291843563, "grad_norm": 0.7154343427556892, "learning_rate": 1.999298351857165e-06, "loss": 0.04954376220703125, "step": 140690 }, { "epoch": 1.2165480627058995, "grad_norm": 0.5061933171160626, "learning_rate": 1.999106289543016e-06, "loss": 0.06869297027587891, "step": 140695 }, { "epoch": 1.216591296227443, "grad_norm": 0.7726596095773555, "learning_rate": 1.9989142318449e-06, "loss": 0.035506820678710936, "step": 140700 }, { "epoch": 1.2166345297489862, "grad_norm": 2.2709828103965872, "learning_rate": 1.9987221787637033e-06, "loss": 0.10700149536132812, "step": 140705 }, { "epoch": 1.2166777632705295, "grad_norm": 40.73598446175824, "learning_rate": 1.998530130300312e-06, "loss": 0.13417816162109375, "step": 140710 }, { "epoch": 1.2167209967920727, "grad_norm": 6.146863624732085, "learning_rate": 1.9983380864556113e-06, "loss": 0.07944564819335938, "step": 140715 }, { "epoch": 1.216764230313616, "grad_norm": 2.0955983408452075, "learning_rate": 1.9981460472304872e-06, "loss": 0.025618743896484376, "step": 140720 }, { "epoch": 1.2168074638351591, "grad_norm": 0.5083189423072064, "learning_rate": 1.997954012625824e-06, "loss": 0.25545425415039064, "step": 140725 }, { "epoch": 1.2168506973567026, "grad_norm": 0.6203307025335768, "learning_rate": 1.9977619826425085e-06, "loss": 0.06712074279785156, "step": 140730 }, { "epoch": 1.2168939308782458, "grad_norm": 8.59942253227895, "learning_rate": 1.997569957281426e-06, "loss": 0.03761215209960937, "step": 140735 }, { "epoch": 1.216937164399789, "grad_norm": 16.58247468220582, "learning_rate": 1.9973779365434633e-06, "loss": 0.06048927307128906, "step": 140740 }, { "epoch": 1.2169803979213323, "grad_norm": 1.1968608699053407, "learning_rate": 1.9971859204295044e-06, "loss": 0.030479049682617186, "step": 140745 }, { "epoch": 1.2170236314428755, "grad_norm": 0.27541815154361604, "learning_rate": 1.9969939089404357e-06, "loss": 0.04263916015625, "step": 140750 }, { "epoch": 1.2170668649644187, "grad_norm": 0.16985141017265337, "learning_rate": 1.9968019020771423e-06, "loss": 0.050848388671875, "step": 140755 }, { "epoch": 1.217110098485962, "grad_norm": 0.22522500874037185, "learning_rate": 1.99660989984051e-06, "loss": 0.08697357177734374, "step": 140760 }, { "epoch": 1.2171533320075054, "grad_norm": 4.318837069446647, "learning_rate": 1.9964179022314225e-06, "loss": 0.02510986328125, "step": 140765 }, { "epoch": 1.2171965655290486, "grad_norm": 0.8861892940568605, "learning_rate": 1.9962259092507687e-06, "loss": 0.0204254150390625, "step": 140770 }, { "epoch": 1.2172397990505919, "grad_norm": 44.296172079290514, "learning_rate": 1.9960339208994328e-06, "loss": 0.10777740478515625, "step": 140775 }, { "epoch": 1.217283032572135, "grad_norm": 3.980190525725364, "learning_rate": 1.995841937178299e-06, "loss": 0.03228912353515625, "step": 140780 }, { "epoch": 1.2173262660936783, "grad_norm": 6.968870230104541, "learning_rate": 1.995649958088253e-06, "loss": 0.07015571594238282, "step": 140785 }, { "epoch": 1.2173694996152216, "grad_norm": 0.6801394908984536, "learning_rate": 1.9954579836301814e-06, "loss": 0.07122879028320313, "step": 140790 }, { "epoch": 1.217412733136765, "grad_norm": 2.0976310654912558, "learning_rate": 1.995266013804968e-06, "loss": 0.03221397399902344, "step": 140795 }, { "epoch": 1.2174559666583082, "grad_norm": 0.8964285969848492, "learning_rate": 1.9950740486134997e-06, "loss": 0.10086536407470703, "step": 140800 }, { "epoch": 1.2174992001798515, "grad_norm": 2.162987941576405, "learning_rate": 1.994882088056661e-06, "loss": 0.05950241088867188, "step": 140805 }, { "epoch": 1.2175424337013947, "grad_norm": 14.447920950305704, "learning_rate": 1.9946901321353376e-06, "loss": 0.04850616455078125, "step": 140810 }, { "epoch": 1.217585667222938, "grad_norm": 1.5218360596434706, "learning_rate": 1.9944981808504138e-06, "loss": 0.1653350830078125, "step": 140815 }, { "epoch": 1.2176289007444812, "grad_norm": 0.9186204659951333, "learning_rate": 1.9943062342027755e-06, "loss": 0.155914306640625, "step": 140820 }, { "epoch": 1.2176721342660244, "grad_norm": 2.3369510323292673, "learning_rate": 1.9941142921933076e-06, "loss": 0.03065147399902344, "step": 140825 }, { "epoch": 1.2177153677875678, "grad_norm": 0.20548893412674232, "learning_rate": 1.9939223548228964e-06, "loss": 0.011295700073242187, "step": 140830 }, { "epoch": 1.217758601309111, "grad_norm": 12.022649240169303, "learning_rate": 1.9937304220924266e-06, "loss": 0.05782928466796875, "step": 140835 }, { "epoch": 1.2178018348306543, "grad_norm": 4.1680865938330065, "learning_rate": 1.9935384940027835e-06, "loss": 0.031083297729492188, "step": 140840 }, { "epoch": 1.2178450683521975, "grad_norm": 1.2684035931066044, "learning_rate": 1.993346570554851e-06, "loss": 0.06354827880859375, "step": 140845 }, { "epoch": 1.2178883018737408, "grad_norm": 4.25309071333854, "learning_rate": 1.993154651749516e-06, "loss": 0.05004119873046875, "step": 140850 }, { "epoch": 1.2179315353952842, "grad_norm": 0.42302315683706837, "learning_rate": 1.9929627375876626e-06, "loss": 0.09168777465820313, "step": 140855 }, { "epoch": 1.2179747689168274, "grad_norm": 3.1430414655917795, "learning_rate": 1.992770828070175e-06, "loss": 0.09715843200683594, "step": 140860 }, { "epoch": 1.2180180024383707, "grad_norm": 0.5936226171902683, "learning_rate": 1.9925789231979402e-06, "loss": 0.0022029876708984375, "step": 140865 }, { "epoch": 1.218061235959914, "grad_norm": 1.3988264842684999, "learning_rate": 1.9923870229718423e-06, "loss": 0.037530517578125, "step": 140870 }, { "epoch": 1.2181044694814571, "grad_norm": 2.8730676106236133, "learning_rate": 1.9921951273927666e-06, "loss": 0.07248802185058593, "step": 140875 }, { "epoch": 1.2181477030030003, "grad_norm": 0.18608987348993977, "learning_rate": 1.992003236461598e-06, "loss": 0.10708341598510743, "step": 140880 }, { "epoch": 1.2181909365245436, "grad_norm": 0.44150795310781427, "learning_rate": 1.991811350179221e-06, "loss": 0.0483856201171875, "step": 140885 }, { "epoch": 1.218234170046087, "grad_norm": 37.83128216776173, "learning_rate": 1.9916194685465205e-06, "loss": 0.25026473999023435, "step": 140890 }, { "epoch": 1.2182774035676303, "grad_norm": 0.6609855682248017, "learning_rate": 1.9914275915643826e-06, "loss": 0.007085037231445312, "step": 140895 }, { "epoch": 1.2183206370891735, "grad_norm": 0.8211611527252134, "learning_rate": 1.9912357192336915e-06, "loss": 0.051020050048828126, "step": 140900 }, { "epoch": 1.2183638706107167, "grad_norm": 10.505247697382226, "learning_rate": 1.991043851555332e-06, "loss": 0.2402069091796875, "step": 140905 }, { "epoch": 1.21840710413226, "grad_norm": 4.033384785230556, "learning_rate": 1.9908519885301894e-06, "loss": 0.047078704833984374, "step": 140910 }, { "epoch": 1.2184503376538032, "grad_norm": 26.475057702377796, "learning_rate": 1.9906601301591484e-06, "loss": 0.11803903579711914, "step": 140915 }, { "epoch": 1.2184935711753466, "grad_norm": 0.17408421321584316, "learning_rate": 1.9904682764430926e-06, "loss": 0.06175689697265625, "step": 140920 }, { "epoch": 1.2185368046968899, "grad_norm": 8.794245246132142, "learning_rate": 1.9902764273829093e-06, "loss": 0.13883514404296876, "step": 140925 }, { "epoch": 1.218580038218433, "grad_norm": 0.5634775445025958, "learning_rate": 1.9900845829794805e-06, "loss": 0.027320098876953126, "step": 140930 }, { "epoch": 1.2186232717399763, "grad_norm": 0.9324537723351829, "learning_rate": 1.9898927432336937e-06, "loss": 0.09393482208251953, "step": 140935 }, { "epoch": 1.2186665052615195, "grad_norm": 3.2151774182862067, "learning_rate": 1.9897009081464318e-06, "loss": 0.0838043212890625, "step": 140940 }, { "epoch": 1.2187097387830628, "grad_norm": 4.304573904890287, "learning_rate": 1.9895090777185804e-06, "loss": 0.015129852294921874, "step": 140945 }, { "epoch": 1.218752972304606, "grad_norm": 1.3305622471669631, "learning_rate": 1.9893172519510233e-06, "loss": 0.09127159118652343, "step": 140950 }, { "epoch": 1.2187962058261494, "grad_norm": 0.0877454612337358, "learning_rate": 1.9891254308446454e-06, "loss": 0.009015655517578125, "step": 140955 }, { "epoch": 1.2188394393476927, "grad_norm": 3.6016335262329164, "learning_rate": 1.9889336144003314e-06, "loss": 0.07487716674804687, "step": 140960 }, { "epoch": 1.218882672869236, "grad_norm": 2.3575450617534077, "learning_rate": 1.9887418026189676e-06, "loss": 0.098321533203125, "step": 140965 }, { "epoch": 1.2189259063907791, "grad_norm": 0.07945150488781115, "learning_rate": 1.988549995501437e-06, "loss": 0.32132530212402344, "step": 140970 }, { "epoch": 1.2189691399123224, "grad_norm": 0.17039686405221557, "learning_rate": 1.988358193048624e-06, "loss": 0.054683685302734375, "step": 140975 }, { "epoch": 1.2190123734338656, "grad_norm": 9.505497397550561, "learning_rate": 1.988166395261414e-06, "loss": 0.038896942138671876, "step": 140980 }, { "epoch": 1.219055606955409, "grad_norm": 3.0154399584596283, "learning_rate": 1.98797460214069e-06, "loss": 0.027663040161132812, "step": 140985 }, { "epoch": 1.2190988404769523, "grad_norm": 0.5332339561715662, "learning_rate": 1.987782813687338e-06, "loss": 0.17336082458496094, "step": 140990 }, { "epoch": 1.2191420739984955, "grad_norm": 0.991386437955399, "learning_rate": 1.987591029902243e-06, "loss": 0.04337158203125, "step": 140995 }, { "epoch": 1.2191853075200387, "grad_norm": 34.08485398013248, "learning_rate": 1.987399250786289e-06, "loss": 0.14418907165527345, "step": 141000 }, { "epoch": 1.219228541041582, "grad_norm": 1.26033169050509, "learning_rate": 1.98720747634036e-06, "loss": 0.1311309814453125, "step": 141005 }, { "epoch": 1.2192717745631252, "grad_norm": 8.44367455064434, "learning_rate": 1.98701570656534e-06, "loss": 0.074609375, "step": 141010 }, { "epoch": 1.2193150080846684, "grad_norm": 0.3885185882317903, "learning_rate": 1.986823941462114e-06, "loss": 0.27303614616394045, "step": 141015 }, { "epoch": 1.2193582416062119, "grad_norm": 0.48062105979331354, "learning_rate": 1.9866321810315667e-06, "loss": 0.16050262451171876, "step": 141020 }, { "epoch": 1.219401475127755, "grad_norm": 16.403195527995646, "learning_rate": 1.986440425274583e-06, "loss": 0.0389373779296875, "step": 141025 }, { "epoch": 1.2194447086492983, "grad_norm": 0.9861995282717301, "learning_rate": 1.986248674192046e-06, "loss": 0.02875823974609375, "step": 141030 }, { "epoch": 1.2194879421708416, "grad_norm": 10.620288423525555, "learning_rate": 1.9860569277848405e-06, "loss": 0.08197174072265626, "step": 141035 }, { "epoch": 1.2195311756923848, "grad_norm": 0.08487067940072823, "learning_rate": 1.9858651860538506e-06, "loss": 0.19883193969726562, "step": 141040 }, { "epoch": 1.2195744092139282, "grad_norm": 0.5858440841031135, "learning_rate": 1.985673448999961e-06, "loss": 0.009450912475585938, "step": 141045 }, { "epoch": 1.2196176427354715, "grad_norm": 9.81395073049504, "learning_rate": 1.985481716624055e-06, "loss": 0.0170562744140625, "step": 141050 }, { "epoch": 1.2196608762570147, "grad_norm": 7.2544653998525215, "learning_rate": 1.9852899889270187e-06, "loss": 0.03974132537841797, "step": 141055 }, { "epoch": 1.219704109778558, "grad_norm": 7.5543057531903655, "learning_rate": 1.9850982659097356e-06, "loss": 0.04426765441894531, "step": 141060 }, { "epoch": 1.2197473433001012, "grad_norm": 1.5796924157452692, "learning_rate": 1.9849065475730898e-06, "loss": 0.010412216186523438, "step": 141065 }, { "epoch": 1.2197905768216444, "grad_norm": 1.67674919062047, "learning_rate": 1.984714833917964e-06, "loss": 0.023188018798828126, "step": 141070 }, { "epoch": 1.2198338103431876, "grad_norm": 3.3416454839491214, "learning_rate": 1.9845231249452446e-06, "loss": 0.025777816772460938, "step": 141075 }, { "epoch": 1.2198770438647308, "grad_norm": 3.5253042442331557, "learning_rate": 1.9843314206558134e-06, "loss": 0.02974090576171875, "step": 141080 }, { "epoch": 1.2199202773862743, "grad_norm": 1.2163097121228181, "learning_rate": 1.9841397210505575e-06, "loss": 0.01330413818359375, "step": 141085 }, { "epoch": 1.2199635109078175, "grad_norm": 0.9233236327554285, "learning_rate": 1.9839480261303593e-06, "loss": 0.056557083129882814, "step": 141090 }, { "epoch": 1.2200067444293607, "grad_norm": 1.4069418996372294, "learning_rate": 1.9837563358961033e-06, "loss": 0.11871109008789063, "step": 141095 }, { "epoch": 1.220049977950904, "grad_norm": 34.14974629943967, "learning_rate": 1.9835646503486725e-06, "loss": 0.1964447021484375, "step": 141100 }, { "epoch": 1.2200932114724472, "grad_norm": 1.4548117646355416, "learning_rate": 1.983372969488952e-06, "loss": 0.029533767700195314, "step": 141105 }, { "epoch": 1.2201364449939907, "grad_norm": 1.2148897128292708, "learning_rate": 1.983181293317826e-06, "loss": 0.024575042724609374, "step": 141110 }, { "epoch": 1.2201796785155339, "grad_norm": 0.3652405780064848, "learning_rate": 1.9829896218361764e-06, "loss": 0.061452484130859374, "step": 141115 }, { "epoch": 1.2202229120370771, "grad_norm": 16.159377679082077, "learning_rate": 1.9827979550448904e-06, "loss": 0.1818115234375, "step": 141120 }, { "epoch": 1.2202661455586203, "grad_norm": 0.7609465882473255, "learning_rate": 1.9826062929448494e-06, "loss": 0.0451568603515625, "step": 141125 }, { "epoch": 1.2203093790801636, "grad_norm": 3.974706707275685, "learning_rate": 1.982414635536939e-06, "loss": 0.13760147094726563, "step": 141130 }, { "epoch": 1.2203526126017068, "grad_norm": 8.84411177697163, "learning_rate": 1.9822229828220425e-06, "loss": 0.067071533203125, "step": 141135 }, { "epoch": 1.22039584612325, "grad_norm": 9.072219096859802, "learning_rate": 1.9820313348010435e-06, "loss": 0.02883453369140625, "step": 141140 }, { "epoch": 1.2204390796447935, "grad_norm": 0.39213265464493724, "learning_rate": 1.981839691474825e-06, "loss": 0.05863037109375, "step": 141145 }, { "epoch": 1.2204823131663367, "grad_norm": 4.656818176020054, "learning_rate": 1.9816480528442725e-06, "loss": 0.08474884033203126, "step": 141150 }, { "epoch": 1.22052554668788, "grad_norm": 0.8858106689567573, "learning_rate": 1.981456418910269e-06, "loss": 0.0339752197265625, "step": 141155 }, { "epoch": 1.2205687802094232, "grad_norm": 5.38373767029048, "learning_rate": 1.9812647896736996e-06, "loss": 0.043585586547851565, "step": 141160 }, { "epoch": 1.2206120137309664, "grad_norm": 1.9614505497812635, "learning_rate": 1.9810731651354464e-06, "loss": 0.08838882446289062, "step": 141165 }, { "epoch": 1.2206552472525096, "grad_norm": 64.77456629546514, "learning_rate": 1.980881545296394e-06, "loss": 0.10036582946777343, "step": 141170 }, { "epoch": 1.220698480774053, "grad_norm": 0.5469054150960144, "learning_rate": 1.9806899301574245e-06, "loss": 0.05063304901123047, "step": 141175 }, { "epoch": 1.2207417142955963, "grad_norm": 0.2090521205057768, "learning_rate": 1.9804983197194243e-06, "loss": 0.00876026153564453, "step": 141180 }, { "epoch": 1.2207849478171395, "grad_norm": 3.264427427436359, "learning_rate": 1.9803067139832752e-06, "loss": 0.01843719482421875, "step": 141185 }, { "epoch": 1.2208281813386828, "grad_norm": 4.741449157270875, "learning_rate": 1.980115112949862e-06, "loss": 0.05986366271972656, "step": 141190 }, { "epoch": 1.220871414860226, "grad_norm": 2.8477319429912877, "learning_rate": 1.9799235166200676e-06, "loss": 0.02176361083984375, "step": 141195 }, { "epoch": 1.2209146483817692, "grad_norm": 0.5810458952944344, "learning_rate": 1.979731924994776e-06, "loss": 0.05415163040161133, "step": 141200 }, { "epoch": 1.2209578819033124, "grad_norm": 13.062079302658079, "learning_rate": 1.97954033807487e-06, "loss": 0.07268218994140625, "step": 141205 }, { "epoch": 1.221001115424856, "grad_norm": 0.4267174240388807, "learning_rate": 1.9793487558612336e-06, "loss": 0.0683349609375, "step": 141210 }, { "epoch": 1.2210443489463991, "grad_norm": 0.044846218774763934, "learning_rate": 1.9791571783547505e-06, "loss": 0.0645965576171875, "step": 141215 }, { "epoch": 1.2210875824679424, "grad_norm": 0.8817891872223484, "learning_rate": 1.978965605556305e-06, "loss": 0.1040679931640625, "step": 141220 }, { "epoch": 1.2211308159894856, "grad_norm": 0.10611081015261811, "learning_rate": 1.97877403746678e-06, "loss": 0.019805145263671876, "step": 141225 }, { "epoch": 1.2211740495110288, "grad_norm": 3.2551330025351173, "learning_rate": 1.978582474087059e-06, "loss": 0.05342559814453125, "step": 141230 }, { "epoch": 1.221217283032572, "grad_norm": 0.3763554539315313, "learning_rate": 1.9783909154180245e-06, "loss": 0.02349090576171875, "step": 141235 }, { "epoch": 1.2212605165541155, "grad_norm": 18.513978326948237, "learning_rate": 1.978199361460561e-06, "loss": 0.10657424926757812, "step": 141240 }, { "epoch": 1.2213037500756587, "grad_norm": 25.11170358205364, "learning_rate": 1.978007812215552e-06, "loss": 0.08030014038085938, "step": 141245 }, { "epoch": 1.221346983597202, "grad_norm": 17.461510126656105, "learning_rate": 1.9778162676838806e-06, "loss": 0.07957963943481446, "step": 141250 }, { "epoch": 1.2213902171187452, "grad_norm": 3.0174162834165204, "learning_rate": 1.9776247278664308e-06, "loss": 0.11644439697265625, "step": 141255 }, { "epoch": 1.2214334506402884, "grad_norm": 4.6928636128623085, "learning_rate": 1.9774331927640856e-06, "loss": 0.20832366943359376, "step": 141260 }, { "epoch": 1.2214766841618316, "grad_norm": 1.329154045719109, "learning_rate": 1.977241662377727e-06, "loss": 0.07120075225830078, "step": 141265 }, { "epoch": 1.2215199176833749, "grad_norm": 2.4054281551301115, "learning_rate": 1.97705013670824e-06, "loss": 0.031144142150878906, "step": 141270 }, { "epoch": 1.2215631512049183, "grad_norm": 26.59436695668375, "learning_rate": 1.9768586157565067e-06, "loss": 0.16597480773925782, "step": 141275 }, { "epoch": 1.2216063847264615, "grad_norm": 0.09369976764461306, "learning_rate": 1.976667099523412e-06, "loss": 0.2617940902709961, "step": 141280 }, { "epoch": 1.2216496182480048, "grad_norm": 15.085202993149494, "learning_rate": 1.9764755880098383e-06, "loss": 0.05022735595703125, "step": 141285 }, { "epoch": 1.221692851769548, "grad_norm": 5.006958151465451, "learning_rate": 1.9762840812166687e-06, "loss": 0.09387741088867188, "step": 141290 }, { "epoch": 1.2217360852910912, "grad_norm": 2.7513808992974567, "learning_rate": 1.9760925791447858e-06, "loss": 0.041020965576171874, "step": 141295 }, { "epoch": 1.2217793188126347, "grad_norm": 7.081677636905429, "learning_rate": 1.9759010817950743e-06, "loss": 0.0735321044921875, "step": 141300 }, { "epoch": 1.221822552334178, "grad_norm": 1.2043135925636603, "learning_rate": 1.975709589168415e-06, "loss": 0.15402755737304688, "step": 141305 }, { "epoch": 1.2218657858557211, "grad_norm": 11.212554905467863, "learning_rate": 1.975518101265694e-06, "loss": 0.023905181884765626, "step": 141310 }, { "epoch": 1.2219090193772644, "grad_norm": 3.0029982160716537, "learning_rate": 1.975326618087793e-06, "loss": 0.07681732177734375, "step": 141315 }, { "epoch": 1.2219522528988076, "grad_norm": 1.3992009095791915, "learning_rate": 1.975135139635595e-06, "loss": 0.057916259765625, "step": 141320 }, { "epoch": 1.2219954864203508, "grad_norm": 11.049989356829949, "learning_rate": 1.974943665909982e-06, "loss": 0.0363525390625, "step": 141325 }, { "epoch": 1.222038719941894, "grad_norm": 2.8652818490461307, "learning_rate": 1.97475219691184e-06, "loss": 0.01975555419921875, "step": 141330 }, { "epoch": 1.2220819534634373, "grad_norm": 0.3465826101022387, "learning_rate": 1.974560732642048e-06, "loss": 0.04587745666503906, "step": 141335 }, { "epoch": 1.2221251869849807, "grad_norm": 2.559741815745791, "learning_rate": 1.974369273101493e-06, "loss": 0.029764366149902344, "step": 141340 }, { "epoch": 1.222168420506524, "grad_norm": 1.6828484197975762, "learning_rate": 1.9741778182910565e-06, "loss": 0.027362060546875, "step": 141345 }, { "epoch": 1.2222116540280672, "grad_norm": 1.8803117151496367, "learning_rate": 1.9739863682116203e-06, "loss": 0.06016044616699219, "step": 141350 }, { "epoch": 1.2222548875496104, "grad_norm": 0.19372525726439188, "learning_rate": 1.973794922864069e-06, "loss": 0.01986217498779297, "step": 141355 }, { "epoch": 1.2222981210711537, "grad_norm": 2.1172201210182875, "learning_rate": 1.973603482249285e-06, "loss": 0.0448028564453125, "step": 141360 }, { "epoch": 1.222341354592697, "grad_norm": 0.6659410224019999, "learning_rate": 1.973412046368151e-06, "loss": 0.08628787994384765, "step": 141365 }, { "epoch": 1.2223845881142403, "grad_norm": 37.013459014408696, "learning_rate": 1.9732206152215486e-06, "loss": 0.11680011749267578, "step": 141370 }, { "epoch": 1.2224278216357836, "grad_norm": 1.6344850604643326, "learning_rate": 1.973029188810363e-06, "loss": 0.33595962524414064, "step": 141375 }, { "epoch": 1.2224710551573268, "grad_norm": 16.302117318775384, "learning_rate": 1.972837767135476e-06, "loss": 0.08929519653320313, "step": 141380 }, { "epoch": 1.22251428867887, "grad_norm": 0.3875525125020718, "learning_rate": 1.9726463501977705e-06, "loss": 0.03979969024658203, "step": 141385 }, { "epoch": 1.2225575222004132, "grad_norm": 0.7993692659737194, "learning_rate": 1.9724549379981293e-06, "loss": 0.08572921752929688, "step": 141390 }, { "epoch": 1.2226007557219565, "grad_norm": 14.08549524871495, "learning_rate": 1.9722635305374354e-06, "loss": 0.0342437744140625, "step": 141395 }, { "epoch": 1.2226439892435, "grad_norm": 1.8800561231921182, "learning_rate": 1.97207212781657e-06, "loss": 0.042234039306640624, "step": 141400 }, { "epoch": 1.2226872227650432, "grad_norm": 0.2617587395367098, "learning_rate": 1.9718807298364184e-06, "loss": 0.039813232421875, "step": 141405 }, { "epoch": 1.2227304562865864, "grad_norm": 0.6177416970568261, "learning_rate": 1.9716893365978612e-06, "loss": 0.027564620971679686, "step": 141410 }, { "epoch": 1.2227736898081296, "grad_norm": 0.6707868718485699, "learning_rate": 1.9714979481017827e-06, "loss": 0.0667938232421875, "step": 141415 }, { "epoch": 1.2228169233296728, "grad_norm": 3.389593970832987, "learning_rate": 1.971306564349065e-06, "loss": 0.07109527587890625, "step": 141420 }, { "epoch": 1.222860156851216, "grad_norm": 1.5816188169572625, "learning_rate": 1.9711151853405902e-06, "loss": 0.02935943603515625, "step": 141425 }, { "epoch": 1.2229033903727595, "grad_norm": 4.182108872647343, "learning_rate": 1.9709238110772402e-06, "loss": 0.03829269409179688, "step": 141430 }, { "epoch": 1.2229466238943028, "grad_norm": 7.785221425638135, "learning_rate": 1.9707324415599e-06, "loss": 0.0508331298828125, "step": 141435 }, { "epoch": 1.222989857415846, "grad_norm": 3.0408369026810416, "learning_rate": 1.97054107678945e-06, "loss": 0.04234771728515625, "step": 141440 }, { "epoch": 1.2230330909373892, "grad_norm": 2.2320414808362736, "learning_rate": 1.970349716766774e-06, "loss": 0.0706085205078125, "step": 141445 }, { "epoch": 1.2230763244589324, "grad_norm": 13.8196970589355, "learning_rate": 1.970158361492754e-06, "loss": 0.017993736267089843, "step": 141450 }, { "epoch": 1.2231195579804757, "grad_norm": 20.01938645487057, "learning_rate": 1.969967010968273e-06, "loss": 0.4075477600097656, "step": 141455 }, { "epoch": 1.223162791502019, "grad_norm": 9.02784259403716, "learning_rate": 1.9697756651942127e-06, "loss": 0.0502288818359375, "step": 141460 }, { "epoch": 1.2232060250235623, "grad_norm": 0.11519225414615952, "learning_rate": 1.9695843241714556e-06, "loss": 0.04036445617675781, "step": 141465 }, { "epoch": 1.2232492585451056, "grad_norm": 0.2836823338167552, "learning_rate": 1.969392987900885e-06, "loss": 0.019281005859375, "step": 141470 }, { "epoch": 1.2232924920666488, "grad_norm": 20.628679102293287, "learning_rate": 1.969201656383383e-06, "loss": 0.2187103271484375, "step": 141475 }, { "epoch": 1.223335725588192, "grad_norm": 39.14061962329286, "learning_rate": 1.9690103296198327e-06, "loss": 0.25757293701171874, "step": 141480 }, { "epoch": 1.2233789591097353, "grad_norm": 1.1177127662625956, "learning_rate": 1.968819007611115e-06, "loss": 0.01397705078125, "step": 141485 }, { "epoch": 1.2234221926312785, "grad_norm": 0.5040253747817167, "learning_rate": 1.9686276903581124e-06, "loss": 0.08429126739501953, "step": 141490 }, { "epoch": 1.223465426152822, "grad_norm": 3.375515693493452, "learning_rate": 1.9684363778617076e-06, "loss": 0.2268850326538086, "step": 141495 }, { "epoch": 1.2235086596743652, "grad_norm": 0.2528539066373272, "learning_rate": 1.968245070122783e-06, "loss": 0.09957256317138671, "step": 141500 }, { "epoch": 1.2235518931959084, "grad_norm": 8.915850077500755, "learning_rate": 1.9680537671422214e-06, "loss": 0.05065155029296875, "step": 141505 }, { "epoch": 1.2235951267174516, "grad_norm": 6.402657012977996, "learning_rate": 1.9678624689209053e-06, "loss": 0.07283859252929688, "step": 141510 }, { "epoch": 1.2236383602389949, "grad_norm": 0.24624127070640214, "learning_rate": 1.9676711754597157e-06, "loss": 0.06255512237548828, "step": 141515 }, { "epoch": 1.223681593760538, "grad_norm": 12.400238052725802, "learning_rate": 1.967479886759535e-06, "loss": 0.0648183822631836, "step": 141520 }, { "epoch": 1.2237248272820813, "grad_norm": 0.019675552004388346, "learning_rate": 1.9672886028212456e-06, "loss": 0.01966266632080078, "step": 141525 }, { "epoch": 1.2237680608036248, "grad_norm": 33.5517953261314, "learning_rate": 1.9670973236457312e-06, "loss": 0.311773681640625, "step": 141530 }, { "epoch": 1.223811294325168, "grad_norm": 0.11251546779578529, "learning_rate": 1.9669060492338726e-06, "loss": 0.03322649002075195, "step": 141535 }, { "epoch": 1.2238545278467112, "grad_norm": 2.629601841851703, "learning_rate": 1.9667147795865513e-06, "loss": 0.06572971343994141, "step": 141540 }, { "epoch": 1.2238977613682545, "grad_norm": 0.14104804103423824, "learning_rate": 1.9665235147046506e-06, "loss": 0.025978851318359374, "step": 141545 }, { "epoch": 1.2239409948897977, "grad_norm": 10.336733886191942, "learning_rate": 1.9663322545890517e-06, "loss": 0.05418872833251953, "step": 141550 }, { "epoch": 1.2239842284113411, "grad_norm": 0.3061164701812687, "learning_rate": 1.9661409992406377e-06, "loss": 0.010541915893554688, "step": 141555 }, { "epoch": 1.2240274619328844, "grad_norm": 0.9864297832590101, "learning_rate": 1.9659497486602886e-06, "loss": 0.007366943359375, "step": 141560 }, { "epoch": 1.2240706954544276, "grad_norm": 2.5445642479488844, "learning_rate": 1.965758502848889e-06, "loss": 0.3193855285644531, "step": 141565 }, { "epoch": 1.2241139289759708, "grad_norm": 25.411669565084157, "learning_rate": 1.96556726180732e-06, "loss": 0.052059364318847653, "step": 141570 }, { "epoch": 1.224157162497514, "grad_norm": 0.7820466856653389, "learning_rate": 1.965376025536463e-06, "loss": 0.03351287841796875, "step": 141575 }, { "epoch": 1.2242003960190573, "grad_norm": 0.16267199347212966, "learning_rate": 1.9651847940372004e-06, "loss": 0.015753555297851562, "step": 141580 }, { "epoch": 1.2242436295406005, "grad_norm": 48.71496290872564, "learning_rate": 1.9649935673104146e-06, "loss": 0.5827346801757812, "step": 141585 }, { "epoch": 1.224286863062144, "grad_norm": 0.09819346099455015, "learning_rate": 1.9648023453569856e-06, "loss": 0.026221466064453126, "step": 141590 }, { "epoch": 1.2243300965836872, "grad_norm": 13.996556937564273, "learning_rate": 1.9646111281777976e-06, "loss": 0.08394374847412109, "step": 141595 }, { "epoch": 1.2243733301052304, "grad_norm": 0.8068049001964728, "learning_rate": 1.9644199157737325e-06, "loss": 0.0862091064453125, "step": 141600 }, { "epoch": 1.2244165636267736, "grad_norm": 11.380233342596194, "learning_rate": 1.9642287081456695e-06, "loss": 0.0330596923828125, "step": 141605 }, { "epoch": 1.2244597971483169, "grad_norm": 2.06765030905103, "learning_rate": 1.9640375052944936e-06, "loss": 0.09171104431152344, "step": 141610 }, { "epoch": 1.22450303066986, "grad_norm": 26.806414563840825, "learning_rate": 1.963846307221085e-06, "loss": 0.08850746154785157, "step": 141615 }, { "epoch": 1.2245462641914036, "grad_norm": 2.3954824666354946, "learning_rate": 1.963655113926324e-06, "loss": 0.18984527587890626, "step": 141620 }, { "epoch": 1.2245894977129468, "grad_norm": 0.1481897073924637, "learning_rate": 1.963463925411096e-06, "loss": 0.44213104248046875, "step": 141625 }, { "epoch": 1.22463273123449, "grad_norm": 5.482788488050528, "learning_rate": 1.96327274167628e-06, "loss": 0.047878265380859375, "step": 141630 }, { "epoch": 1.2246759647560332, "grad_norm": 32.3226949180494, "learning_rate": 1.963081562722759e-06, "loss": 0.09999160766601563, "step": 141635 }, { "epoch": 1.2247191982775765, "grad_norm": 3.648594858447019, "learning_rate": 1.962890388551414e-06, "loss": 0.04212455749511719, "step": 141640 }, { "epoch": 1.2247624317991197, "grad_norm": 6.088917230394389, "learning_rate": 1.9626992191631268e-06, "loss": 0.030259323120117188, "step": 141645 }, { "epoch": 1.224805665320663, "grad_norm": 0.3673147707852706, "learning_rate": 1.9625080545587792e-06, "loss": 0.08230552673339844, "step": 141650 }, { "epoch": 1.2248488988422064, "grad_norm": 0.06600922428803518, "learning_rate": 1.962316894739252e-06, "loss": 0.031213760375976562, "step": 141655 }, { "epoch": 1.2248921323637496, "grad_norm": 58.2445652448892, "learning_rate": 1.9621257397054286e-06, "loss": 0.3099662780761719, "step": 141660 }, { "epoch": 1.2249353658852928, "grad_norm": 1.4045972602480732, "learning_rate": 1.9619345894581887e-06, "loss": 0.0330413818359375, "step": 141665 }, { "epoch": 1.224978599406836, "grad_norm": 1.594822163093147, "learning_rate": 1.961743443998416e-06, "loss": 0.09055557250976562, "step": 141670 }, { "epoch": 1.2250218329283793, "grad_norm": 27.741157742960485, "learning_rate": 1.96155230332699e-06, "loss": 0.4181205749511719, "step": 141675 }, { "epoch": 1.2250650664499225, "grad_norm": 2.2961594034655284, "learning_rate": 1.9613611674447933e-06, "loss": 0.05442047119140625, "step": 141680 }, { "epoch": 1.225108299971466, "grad_norm": 1.199428954952146, "learning_rate": 1.961170036352706e-06, "loss": 0.026751708984375, "step": 141685 }, { "epoch": 1.2251515334930092, "grad_norm": 3.368449495189946, "learning_rate": 1.960978910051612e-06, "loss": 0.07547988891601562, "step": 141690 }, { "epoch": 1.2251947670145524, "grad_norm": 16.05734005806096, "learning_rate": 1.960787788542391e-06, "loss": 0.014154815673828125, "step": 141695 }, { "epoch": 1.2252380005360957, "grad_norm": 7.629507861765745, "learning_rate": 1.960596671825925e-06, "loss": 0.06987380981445312, "step": 141700 }, { "epoch": 1.225281234057639, "grad_norm": 0.6260145024797374, "learning_rate": 1.9604055599030954e-06, "loss": 0.023308563232421874, "step": 141705 }, { "epoch": 1.2253244675791821, "grad_norm": 1.4369613058713568, "learning_rate": 1.960214452774784e-06, "loss": 0.08759498596191406, "step": 141710 }, { "epoch": 1.2253677011007253, "grad_norm": 0.023326488334107158, "learning_rate": 1.960023350441871e-06, "loss": 0.0601318359375, "step": 141715 }, { "epoch": 1.2254109346222688, "grad_norm": 3.3118929864287265, "learning_rate": 1.959832252905238e-06, "loss": 0.06385841369628906, "step": 141720 }, { "epoch": 1.225454168143812, "grad_norm": 5.409808299739851, "learning_rate": 1.9596411601657665e-06, "loss": 0.06621685028076171, "step": 141725 }, { "epoch": 1.2254974016653553, "grad_norm": 23.923152737870463, "learning_rate": 1.959450072224339e-06, "loss": 0.08630447387695313, "step": 141730 }, { "epoch": 1.2255406351868985, "grad_norm": 36.47594453997601, "learning_rate": 1.959258989081836e-06, "loss": 0.18338623046875, "step": 141735 }, { "epoch": 1.2255838687084417, "grad_norm": 0.2860692762278572, "learning_rate": 1.959067910739138e-06, "loss": 0.014100265502929688, "step": 141740 }, { "epoch": 1.2256271022299852, "grad_norm": 0.13385621644336512, "learning_rate": 1.958876837197127e-06, "loss": 0.015207672119140625, "step": 141745 }, { "epoch": 1.2256703357515284, "grad_norm": 2.2561877891219138, "learning_rate": 1.958685768456683e-06, "loss": 0.035799407958984376, "step": 141750 }, { "epoch": 1.2257135692730716, "grad_norm": 2.7049072122864257, "learning_rate": 1.9584947045186893e-06, "loss": 0.05569725036621094, "step": 141755 }, { "epoch": 1.2257568027946149, "grad_norm": 0.5740291522720294, "learning_rate": 1.9583036453840265e-06, "loss": 0.02374267578125, "step": 141760 }, { "epoch": 1.225800036316158, "grad_norm": 0.7339321167257313, "learning_rate": 1.958112591053575e-06, "loss": 0.059326171875, "step": 141765 }, { "epoch": 1.2258432698377013, "grad_norm": 4.0997649374167215, "learning_rate": 1.9579215415282153e-06, "loss": 0.12471771240234375, "step": 141770 }, { "epoch": 1.2258865033592445, "grad_norm": 8.650264872868245, "learning_rate": 1.9577304968088305e-06, "loss": 0.030484390258789063, "step": 141775 }, { "epoch": 1.2259297368807878, "grad_norm": 0.08251373350987902, "learning_rate": 1.957539456896299e-06, "loss": 0.17891082763671876, "step": 141780 }, { "epoch": 1.2259729704023312, "grad_norm": 12.73380707488708, "learning_rate": 1.9573484217915045e-06, "loss": 0.1637298583984375, "step": 141785 }, { "epoch": 1.2260162039238744, "grad_norm": 1.7420670799957019, "learning_rate": 1.9571573914953273e-06, "loss": 0.03592987060546875, "step": 141790 }, { "epoch": 1.2260594374454177, "grad_norm": 52.993744183830124, "learning_rate": 1.9569663660086476e-06, "loss": 0.13873291015625, "step": 141795 }, { "epoch": 1.226102670966961, "grad_norm": 0.3686422520158693, "learning_rate": 1.9567753453323464e-06, "loss": 0.023756790161132812, "step": 141800 }, { "epoch": 1.2261459044885041, "grad_norm": 3.989052404713771, "learning_rate": 1.956584329467306e-06, "loss": 0.027332878112792967, "step": 141805 }, { "epoch": 1.2261891380100476, "grad_norm": 2.6866544162534236, "learning_rate": 1.9563933184144063e-06, "loss": 0.03429412841796875, "step": 141810 }, { "epoch": 1.2262323715315908, "grad_norm": 0.6143902488707137, "learning_rate": 1.956202312174527e-06, "loss": 0.014387893676757812, "step": 141815 }, { "epoch": 1.226275605053134, "grad_norm": 91.761426216043, "learning_rate": 1.956011310748552e-06, "loss": 0.153839111328125, "step": 141820 }, { "epoch": 1.2263188385746773, "grad_norm": 6.17193602657931, "learning_rate": 1.9558203141373606e-06, "loss": 0.031018447875976563, "step": 141825 }, { "epoch": 1.2263620720962205, "grad_norm": 1.1519385761323893, "learning_rate": 1.9556293223418322e-06, "loss": 0.07052898406982422, "step": 141830 }, { "epoch": 1.2264053056177637, "grad_norm": 1.7168799009103857, "learning_rate": 1.9554383353628505e-06, "loss": 0.063751220703125, "step": 141835 }, { "epoch": 1.226448539139307, "grad_norm": 2.8775298795044533, "learning_rate": 1.955247353201295e-06, "loss": 0.1118408203125, "step": 141840 }, { "epoch": 1.2264917726608504, "grad_norm": 1.4805296531404135, "learning_rate": 1.9550563758580448e-06, "loss": 0.16792144775390624, "step": 141845 }, { "epoch": 1.2265350061823936, "grad_norm": 1.752089927663871, "learning_rate": 1.9548654033339835e-06, "loss": 0.01363067626953125, "step": 141850 }, { "epoch": 1.2265782397039369, "grad_norm": 0.15593629356856856, "learning_rate": 1.954674435629991e-06, "loss": 0.011732101440429688, "step": 141855 }, { "epoch": 1.22662147322548, "grad_norm": 0.6912584554463735, "learning_rate": 1.9544834727469465e-06, "loss": 0.25066299438476564, "step": 141860 }, { "epoch": 1.2266647067470233, "grad_norm": 1.0228353128403256, "learning_rate": 1.9542925146857327e-06, "loss": 0.01352996826171875, "step": 141865 }, { "epoch": 1.2267079402685666, "grad_norm": 2.6428011734367454, "learning_rate": 1.9541015614472297e-06, "loss": 0.079144287109375, "step": 141870 }, { "epoch": 1.22675117379011, "grad_norm": 1.424841116836834, "learning_rate": 1.953910613032316e-06, "loss": 0.014725494384765624, "step": 141875 }, { "epoch": 1.2267944073116532, "grad_norm": 2.9380961243595176, "learning_rate": 1.953719669441876e-06, "loss": 0.018619537353515625, "step": 141880 }, { "epoch": 1.2268376408331965, "grad_norm": 7.176952785256763, "learning_rate": 1.9535287306767876e-06, "loss": 0.03397674560546875, "step": 141885 }, { "epoch": 1.2268808743547397, "grad_norm": 4.6520219409126, "learning_rate": 1.953337796737932e-06, "loss": 0.0126129150390625, "step": 141890 }, { "epoch": 1.226924107876283, "grad_norm": 0.8659932167272933, "learning_rate": 1.953146867626191e-06, "loss": 0.14862747192382814, "step": 141895 }, { "epoch": 1.2269673413978262, "grad_norm": 31.742443716226493, "learning_rate": 1.952955943342444e-06, "loss": 0.46925601959228513, "step": 141900 }, { "epoch": 1.2270105749193694, "grad_norm": 10.822393774190111, "learning_rate": 1.952765023887572e-06, "loss": 0.029027557373046874, "step": 141905 }, { "epoch": 1.2270538084409128, "grad_norm": 3.8990926375680908, "learning_rate": 1.952574109262453e-06, "loss": 0.03000030517578125, "step": 141910 }, { "epoch": 1.227097041962456, "grad_norm": 2.5554514039294625, "learning_rate": 1.9523831994679716e-06, "loss": 0.007462310791015625, "step": 141915 }, { "epoch": 1.2271402754839993, "grad_norm": 7.67404392084459, "learning_rate": 1.9521922945050056e-06, "loss": 0.015459632873535157, "step": 141920 }, { "epoch": 1.2271835090055425, "grad_norm": 3.2801310702786823, "learning_rate": 1.9520013943744363e-06, "loss": 0.02836456298828125, "step": 141925 }, { "epoch": 1.2272267425270857, "grad_norm": 1.4859748226312588, "learning_rate": 1.9518104990771445e-06, "loss": 0.11550064086914062, "step": 141930 }, { "epoch": 1.227269976048629, "grad_norm": 4.670482494800213, "learning_rate": 1.95161960861401e-06, "loss": 0.0199737548828125, "step": 141935 }, { "epoch": 1.2273132095701724, "grad_norm": 75.42351776144325, "learning_rate": 1.951428722985912e-06, "loss": 0.19140777587890626, "step": 141940 }, { "epoch": 1.2273564430917157, "grad_norm": 0.427109401108819, "learning_rate": 1.951237842193733e-06, "loss": 0.05164680480957031, "step": 141945 }, { "epoch": 1.2273996766132589, "grad_norm": 3.027877326198787, "learning_rate": 1.951046966238352e-06, "loss": 0.23973388671875, "step": 141950 }, { "epoch": 1.2274429101348021, "grad_norm": 19.222447874821686, "learning_rate": 1.9508560951206504e-06, "loss": 0.3660064697265625, "step": 141955 }, { "epoch": 1.2274861436563453, "grad_norm": 0.5214809608208292, "learning_rate": 1.9506652288415076e-06, "loss": 0.22655487060546875, "step": 141960 }, { "epoch": 1.2275293771778886, "grad_norm": 1.2244977053905635, "learning_rate": 1.950474367401804e-06, "loss": 0.4125926971435547, "step": 141965 }, { "epoch": 1.2275726106994318, "grad_norm": 6.557180441700041, "learning_rate": 1.950283510802419e-06, "loss": 0.03220367431640625, "step": 141970 }, { "epoch": 1.2276158442209752, "grad_norm": 5.306045754435493, "learning_rate": 1.9500926590442337e-06, "loss": 0.1262460708618164, "step": 141975 }, { "epoch": 1.2276590777425185, "grad_norm": 1.6225569085477511, "learning_rate": 1.9499018121281294e-06, "loss": 0.0165374755859375, "step": 141980 }, { "epoch": 1.2277023112640617, "grad_norm": 1.8933764847771237, "learning_rate": 1.949710970054985e-06, "loss": 0.13110694885253907, "step": 141985 }, { "epoch": 1.227745544785605, "grad_norm": 0.3799809552834536, "learning_rate": 1.9495201328256813e-06, "loss": 0.010330963134765624, "step": 141990 }, { "epoch": 1.2277887783071482, "grad_norm": 1.8405730890143235, "learning_rate": 1.9493293004410967e-06, "loss": 0.04759902954101562, "step": 141995 }, { "epoch": 1.2278320118286916, "grad_norm": 30.49163752667784, "learning_rate": 1.9491384729021133e-06, "loss": 0.09658050537109375, "step": 142000 }, { "epoch": 1.2278752453502348, "grad_norm": 0.6524412471661107, "learning_rate": 1.9489476502096093e-06, "loss": 0.006306838989257812, "step": 142005 }, { "epoch": 1.227918478871778, "grad_norm": 1.133888912667103, "learning_rate": 1.948756832364467e-06, "loss": 0.007554244995117187, "step": 142010 }, { "epoch": 1.2279617123933213, "grad_norm": 18.86137533369474, "learning_rate": 1.9485660193675652e-06, "loss": 0.033736801147460936, "step": 142015 }, { "epoch": 1.2280049459148645, "grad_norm": 0.8160126691864864, "learning_rate": 1.948375211219784e-06, "loss": 0.00887603759765625, "step": 142020 }, { "epoch": 1.2280481794364078, "grad_norm": 0.46641991970596036, "learning_rate": 1.9481844079220027e-06, "loss": 0.29064102172851564, "step": 142025 }, { "epoch": 1.228091412957951, "grad_norm": 3.6513583090855284, "learning_rate": 1.9479936094751027e-06, "loss": 0.012630462646484375, "step": 142030 }, { "epoch": 1.2281346464794942, "grad_norm": 9.635913711416752, "learning_rate": 1.947802815879962e-06, "loss": 0.009549331665039063, "step": 142035 }, { "epoch": 1.2281778800010377, "grad_norm": 7.954615420564412, "learning_rate": 1.947612027137463e-06, "loss": 0.1813385009765625, "step": 142040 }, { "epoch": 1.228221113522581, "grad_norm": 2.8238756299891, "learning_rate": 1.947421243248484e-06, "loss": 0.0306243896484375, "step": 142045 }, { "epoch": 1.2282643470441241, "grad_norm": 34.75335169320606, "learning_rate": 1.9472304642139065e-06, "loss": 0.1481729507446289, "step": 142050 }, { "epoch": 1.2283075805656674, "grad_norm": 0.17765009503241527, "learning_rate": 1.947039690034607e-06, "loss": 0.05313796997070312, "step": 142055 }, { "epoch": 1.2283508140872106, "grad_norm": 0.5515326793654541, "learning_rate": 1.9468489207114684e-06, "loss": 0.12201385498046875, "step": 142060 }, { "epoch": 1.228394047608754, "grad_norm": 0.3358512482675164, "learning_rate": 1.9466581562453704e-06, "loss": 0.0129486083984375, "step": 142065 }, { "epoch": 1.2284372811302973, "grad_norm": 4.158300688848214, "learning_rate": 1.9464673966371892e-06, "loss": 0.09827423095703125, "step": 142070 }, { "epoch": 1.2284805146518405, "grad_norm": 13.947026446245244, "learning_rate": 1.9462766418878097e-06, "loss": 0.06237010955810547, "step": 142075 }, { "epoch": 1.2285237481733837, "grad_norm": 0.2795680311764937, "learning_rate": 1.946085891998109e-06, "loss": 0.088214111328125, "step": 142080 }, { "epoch": 1.228566981694927, "grad_norm": 12.737103784233936, "learning_rate": 1.9458951469689657e-06, "loss": 0.17442779541015624, "step": 142085 }, { "epoch": 1.2286102152164702, "grad_norm": 0.8161265035721368, "learning_rate": 1.945704406801262e-06, "loss": 0.008161163330078125, "step": 142090 }, { "epoch": 1.2286534487380134, "grad_norm": 0.21286614641142598, "learning_rate": 1.9455136714958764e-06, "loss": 0.2035400390625, "step": 142095 }, { "epoch": 1.2286966822595569, "grad_norm": 0.9798112264087768, "learning_rate": 1.945322941053688e-06, "loss": 0.04263381958007813, "step": 142100 }, { "epoch": 1.2287399157811, "grad_norm": 36.94046431429217, "learning_rate": 1.9451322154755767e-06, "loss": 0.04554824829101563, "step": 142105 }, { "epoch": 1.2287831493026433, "grad_norm": 1.3275390547505446, "learning_rate": 1.944941494762424e-06, "loss": 0.06568183898925781, "step": 142110 }, { "epoch": 1.2288263828241865, "grad_norm": 17.621037250127028, "learning_rate": 1.944750778915106e-06, "loss": 0.09929428100585938, "step": 142115 }, { "epoch": 1.2288696163457298, "grad_norm": 0.3605714728824701, "learning_rate": 1.9445600679345053e-06, "loss": 0.08280181884765625, "step": 142120 }, { "epoch": 1.228912849867273, "grad_norm": 5.421763314622781, "learning_rate": 1.9443693618215005e-06, "loss": 0.22733535766601562, "step": 142125 }, { "epoch": 1.2289560833888165, "grad_norm": 0.23832892628720387, "learning_rate": 1.9441786605769697e-06, "loss": 0.02812042236328125, "step": 142130 }, { "epoch": 1.2289993169103597, "grad_norm": 1.1295632652458854, "learning_rate": 1.9439879642017945e-06, "loss": 0.08198089599609375, "step": 142135 }, { "epoch": 1.229042550431903, "grad_norm": 6.141111474249231, "learning_rate": 1.9437972726968538e-06, "loss": 0.09229965209960937, "step": 142140 }, { "epoch": 1.2290857839534461, "grad_norm": 9.354959894904605, "learning_rate": 1.943606586063026e-06, "loss": 0.21178665161132812, "step": 142145 }, { "epoch": 1.2291290174749894, "grad_norm": 57.5926473402886, "learning_rate": 1.9434159043011918e-06, "loss": 0.22279319763183594, "step": 142150 }, { "epoch": 1.2291722509965326, "grad_norm": 7.50253827081952, "learning_rate": 1.9432252274122306e-06, "loss": 0.09940547943115234, "step": 142155 }, { "epoch": 1.2292154845180758, "grad_norm": 7.272877078050375, "learning_rate": 1.9430345553970204e-06, "loss": 0.1498180389404297, "step": 142160 }, { "epoch": 1.2292587180396193, "grad_norm": 18.540946025586162, "learning_rate": 1.942843888256441e-06, "loss": 0.19371376037597657, "step": 142165 }, { "epoch": 1.2293019515611625, "grad_norm": 2.0780524666956888, "learning_rate": 1.942653225991372e-06, "loss": 0.021961212158203125, "step": 142170 }, { "epoch": 1.2293451850827057, "grad_norm": 1.5866647900186424, "learning_rate": 1.9424625686026942e-06, "loss": 0.03804597854614258, "step": 142175 }, { "epoch": 1.229388418604249, "grad_norm": 7.668395680374919, "learning_rate": 1.9422719160912854e-06, "loss": 0.023297119140625, "step": 142180 }, { "epoch": 1.2294316521257922, "grad_norm": 0.40198776887517945, "learning_rate": 1.9420812684580253e-06, "loss": 0.06810302734375, "step": 142185 }, { "epoch": 1.2294748856473354, "grad_norm": 1.9734716371124872, "learning_rate": 1.9418906257037922e-06, "loss": 0.22179756164550782, "step": 142190 }, { "epoch": 1.2295181191688789, "grad_norm": 1.150387843679567, "learning_rate": 1.9416999878294655e-06, "loss": 0.091363525390625, "step": 142195 }, { "epoch": 1.229561352690422, "grad_norm": 1.4281222133868159, "learning_rate": 1.941509354835925e-06, "loss": 0.069085693359375, "step": 142200 }, { "epoch": 1.2296045862119653, "grad_norm": 0.3729833439321357, "learning_rate": 1.941318726724051e-06, "loss": 0.024740982055664062, "step": 142205 }, { "epoch": 1.2296478197335086, "grad_norm": 13.754853725668832, "learning_rate": 1.941128103494721e-06, "loss": 0.06112804412841797, "step": 142210 }, { "epoch": 1.2296910532550518, "grad_norm": 15.682625366628903, "learning_rate": 1.940937485148815e-06, "loss": 0.10788192749023437, "step": 142215 }, { "epoch": 1.229734286776595, "grad_norm": 5.313988068813317, "learning_rate": 1.940746871687211e-06, "loss": 0.0261993408203125, "step": 142220 }, { "epoch": 1.2297775202981382, "grad_norm": 1.8622949692085244, "learning_rate": 1.940556263110789e-06, "loss": 0.0360443115234375, "step": 142225 }, { "epoch": 1.2298207538196817, "grad_norm": 21.762027112980558, "learning_rate": 1.9403656594204275e-06, "loss": 0.19442577362060548, "step": 142230 }, { "epoch": 1.229863987341225, "grad_norm": 0.6681400305044509, "learning_rate": 1.940175060617007e-06, "loss": 0.015998077392578126, "step": 142235 }, { "epoch": 1.2299072208627682, "grad_norm": 10.9043772613308, "learning_rate": 1.939984466701405e-06, "loss": 0.2494537353515625, "step": 142240 }, { "epoch": 1.2299504543843114, "grad_norm": 12.873438394140969, "learning_rate": 1.9397938776745014e-06, "loss": 0.039302825927734375, "step": 142245 }, { "epoch": 1.2299936879058546, "grad_norm": 3.9595796386826647, "learning_rate": 1.939603293537174e-06, "loss": 0.06496353149414062, "step": 142250 }, { "epoch": 1.230036921427398, "grad_norm": 3.3744558120071977, "learning_rate": 1.939412714290303e-06, "loss": 0.05602531433105469, "step": 142255 }, { "epoch": 1.2300801549489413, "grad_norm": 1.1040836276027435, "learning_rate": 1.939222139934766e-06, "loss": 0.0289825439453125, "step": 142260 }, { "epoch": 1.2301233884704845, "grad_norm": 4.6618575787848995, "learning_rate": 1.9390315704714432e-06, "loss": 0.038354110717773435, "step": 142265 }, { "epoch": 1.2301666219920278, "grad_norm": 5.925455179006508, "learning_rate": 1.938841005901214e-06, "loss": 0.05278911590576172, "step": 142270 }, { "epoch": 1.230209855513571, "grad_norm": 4.512165608510845, "learning_rate": 1.9386504462249556e-06, "loss": 0.04849395751953125, "step": 142275 }, { "epoch": 1.2302530890351142, "grad_norm": 1.8041166598210756, "learning_rate": 1.938459891443547e-06, "loss": 0.008130645751953125, "step": 142280 }, { "epoch": 1.2302963225566574, "grad_norm": 2.8952708478889346, "learning_rate": 1.938269341557869e-06, "loss": 0.22964067459106446, "step": 142285 }, { "epoch": 1.2303395560782007, "grad_norm": 0.9654344065770558, "learning_rate": 1.9380787965687965e-06, "loss": 0.03388195037841797, "step": 142290 }, { "epoch": 1.2303827895997441, "grad_norm": 12.763305454818724, "learning_rate": 1.9378882564772127e-06, "loss": 0.06087827682495117, "step": 142295 }, { "epoch": 1.2304260231212873, "grad_norm": 3.050367702331255, "learning_rate": 1.9376977212839947e-06, "loss": 0.024265289306640625, "step": 142300 }, { "epoch": 1.2304692566428306, "grad_norm": 2.6028288883349284, "learning_rate": 1.9375071909900207e-06, "loss": 0.09874267578125, "step": 142305 }, { "epoch": 1.2305124901643738, "grad_norm": 2.630669262360216, "learning_rate": 1.9373166655961687e-06, "loss": 0.016601181030273436, "step": 142310 }, { "epoch": 1.230555723685917, "grad_norm": 7.591537339263706, "learning_rate": 1.937126145103319e-06, "loss": 0.06159515380859375, "step": 142315 }, { "epoch": 1.2305989572074605, "grad_norm": 1.162603016423457, "learning_rate": 1.9369356295123482e-06, "loss": 0.12739028930664062, "step": 142320 }, { "epoch": 1.2306421907290037, "grad_norm": 2.223414803696329, "learning_rate": 1.9367451188241385e-06, "loss": 0.14281005859375, "step": 142325 }, { "epoch": 1.230685424250547, "grad_norm": 1.0646471012349856, "learning_rate": 1.9365546130395656e-06, "loss": 0.10317726135253906, "step": 142330 }, { "epoch": 1.2307286577720902, "grad_norm": 1.5583630160700739, "learning_rate": 1.936364112159509e-06, "loss": 0.205145263671875, "step": 142335 }, { "epoch": 1.2307718912936334, "grad_norm": 12.473676858596557, "learning_rate": 1.9361736161848465e-06, "loss": 0.05632705688476562, "step": 142340 }, { "epoch": 1.2308151248151766, "grad_norm": 41.87785633636031, "learning_rate": 1.935983125116458e-06, "loss": 0.11290264129638672, "step": 142345 }, { "epoch": 1.2308583583367199, "grad_norm": 8.265962187585462, "learning_rate": 1.9357926389552214e-06, "loss": 0.03184700012207031, "step": 142350 }, { "epoch": 1.2309015918582633, "grad_norm": 0.15736383942452736, "learning_rate": 1.935602157702014e-06, "loss": 0.2282886505126953, "step": 142355 }, { "epoch": 1.2309448253798065, "grad_norm": 0.34822452334158105, "learning_rate": 1.935411681357716e-06, "loss": 0.0185028076171875, "step": 142360 }, { "epoch": 1.2309880589013498, "grad_norm": 3.2865412056490024, "learning_rate": 1.9352212099232056e-06, "loss": 0.08550605773925782, "step": 142365 }, { "epoch": 1.231031292422893, "grad_norm": 32.15254517307176, "learning_rate": 1.9350307433993603e-06, "loss": 0.10815200805664063, "step": 142370 }, { "epoch": 1.2310745259444362, "grad_norm": 4.4702513903518275, "learning_rate": 1.93484028178706e-06, "loss": 0.052861785888671874, "step": 142375 }, { "epoch": 1.2311177594659795, "grad_norm": 1.5929665048367447, "learning_rate": 1.9346498250871816e-06, "loss": 0.05435333251953125, "step": 142380 }, { "epoch": 1.231160992987523, "grad_norm": 0.46718675218658984, "learning_rate": 1.9344593733006037e-06, "loss": 0.06658592224121093, "step": 142385 }, { "epoch": 1.2312042265090661, "grad_norm": 4.61635277686688, "learning_rate": 1.9342689264282052e-06, "loss": 0.13100357055664064, "step": 142390 }, { "epoch": 1.2312474600306094, "grad_norm": 0.6081852106681834, "learning_rate": 1.9340784844708645e-06, "loss": 0.020755577087402343, "step": 142395 }, { "epoch": 1.2312906935521526, "grad_norm": 0.08004929057748851, "learning_rate": 1.9338880474294594e-06, "loss": 0.015282058715820312, "step": 142400 }, { "epoch": 1.2313339270736958, "grad_norm": 0.8438630853298844, "learning_rate": 1.9336976153048698e-06, "loss": 0.080645751953125, "step": 142405 }, { "epoch": 1.231377160595239, "grad_norm": 0.9166508534069491, "learning_rate": 1.9335071880979712e-06, "loss": 0.05642547607421875, "step": 142410 }, { "epoch": 1.2314203941167823, "grad_norm": 0.4966765642158706, "learning_rate": 1.9333167658096436e-06, "loss": 0.04098968505859375, "step": 142415 }, { "epoch": 1.2314636276383257, "grad_norm": 5.072692010636066, "learning_rate": 1.933126348440764e-06, "loss": 0.03463687896728516, "step": 142420 }, { "epoch": 1.231506861159869, "grad_norm": 0.25720589139218153, "learning_rate": 1.9329359359922116e-06, "loss": 0.11161651611328124, "step": 142425 }, { "epoch": 1.2315500946814122, "grad_norm": 14.62313752231993, "learning_rate": 1.9327455284648654e-06, "loss": 0.079705810546875, "step": 142430 }, { "epoch": 1.2315933282029554, "grad_norm": 2.606131858785517, "learning_rate": 1.932555125859602e-06, "loss": 0.02387542724609375, "step": 142435 }, { "epoch": 1.2316365617244986, "grad_norm": 0.263374689676342, "learning_rate": 1.9323647281773004e-06, "loss": 0.0412445068359375, "step": 142440 }, { "epoch": 1.2316797952460419, "grad_norm": 20.840911793021732, "learning_rate": 1.9321743354188375e-06, "loss": 0.10381317138671875, "step": 142445 }, { "epoch": 1.2317230287675853, "grad_norm": 5.056250665312897, "learning_rate": 1.9319839475850924e-06, "loss": 0.28743820190429686, "step": 142450 }, { "epoch": 1.2317662622891286, "grad_norm": 47.296852765412325, "learning_rate": 1.9317935646769426e-06, "loss": 0.4834281921386719, "step": 142455 }, { "epoch": 1.2318094958106718, "grad_norm": 0.43184900620229744, "learning_rate": 1.9316031866952673e-06, "loss": 0.0196014404296875, "step": 142460 }, { "epoch": 1.231852729332215, "grad_norm": 4.383774286514058, "learning_rate": 1.931412813640944e-06, "loss": 0.030133819580078124, "step": 142465 }, { "epoch": 1.2318959628537582, "grad_norm": 0.496076707182999, "learning_rate": 1.93122244551485e-06, "loss": 0.1264495849609375, "step": 142470 }, { "epoch": 1.2319391963753015, "grad_norm": 2.537254829631447, "learning_rate": 1.9310320823178634e-06, "loss": 0.008286380767822265, "step": 142475 }, { "epoch": 1.2319824298968447, "grad_norm": 0.44476893238109716, "learning_rate": 1.930841724050862e-06, "loss": 0.042150115966796874, "step": 142480 }, { "epoch": 1.2320256634183882, "grad_norm": 3.3199717581112815, "learning_rate": 1.9306513707147247e-06, "loss": 0.0845489501953125, "step": 142485 }, { "epoch": 1.2320688969399314, "grad_norm": 19.56957744313675, "learning_rate": 1.9304610223103286e-06, "loss": 0.1307830810546875, "step": 142490 }, { "epoch": 1.2321121304614746, "grad_norm": 0.9803552194924932, "learning_rate": 1.9302706788385526e-06, "loss": 0.057706260681152345, "step": 142495 }, { "epoch": 1.2321553639830178, "grad_norm": 7.36396877714143, "learning_rate": 1.930080340300273e-06, "loss": 0.03757476806640625, "step": 142500 }, { "epoch": 1.232198597504561, "grad_norm": 2.5863899214657615, "learning_rate": 1.929890006696368e-06, "loss": 0.1580474853515625, "step": 142505 }, { "epoch": 1.2322418310261045, "grad_norm": 23.180596665290118, "learning_rate": 1.929699678027716e-06, "loss": 0.2073760986328125, "step": 142510 }, { "epoch": 1.2322850645476477, "grad_norm": 1.6193939350026936, "learning_rate": 1.9295093542951937e-06, "loss": 0.058214664459228516, "step": 142515 }, { "epoch": 1.232328298069191, "grad_norm": 40.64472825829692, "learning_rate": 1.929319035499681e-06, "loss": 0.26802825927734375, "step": 142520 }, { "epoch": 1.2323715315907342, "grad_norm": 0.20784841255321873, "learning_rate": 1.929128721642054e-06, "loss": 0.009431838989257812, "step": 142525 }, { "epoch": 1.2324147651122774, "grad_norm": 11.907869271998345, "learning_rate": 1.9289384127231907e-06, "loss": 0.15542526245117189, "step": 142530 }, { "epoch": 1.2324579986338207, "grad_norm": 5.07433676255754, "learning_rate": 1.9287481087439684e-06, "loss": 0.03411102294921875, "step": 142535 }, { "epoch": 1.232501232155364, "grad_norm": 7.791576095108344, "learning_rate": 1.9285578097052657e-06, "loss": 0.07027740478515625, "step": 142540 }, { "epoch": 1.2325444656769073, "grad_norm": 0.36694210008640016, "learning_rate": 1.9283675156079584e-06, "loss": 0.022472190856933593, "step": 142545 }, { "epoch": 1.2325876991984506, "grad_norm": 5.71409702704505, "learning_rate": 1.9281772264529265e-06, "loss": 0.08476943969726562, "step": 142550 }, { "epoch": 1.2326309327199938, "grad_norm": 3.1073517231231587, "learning_rate": 1.927986942241047e-06, "loss": 0.040692138671875, "step": 142555 }, { "epoch": 1.232674166241537, "grad_norm": 2.476553020025535, "learning_rate": 1.9277966629731968e-06, "loss": 0.0668004035949707, "step": 142560 }, { "epoch": 1.2327173997630803, "grad_norm": 1.9371055272628364, "learning_rate": 1.9276063886502526e-06, "loss": 0.01571044921875, "step": 142565 }, { "epoch": 1.2327606332846235, "grad_norm": 0.019165462650829204, "learning_rate": 1.927416119273094e-06, "loss": 0.1113351821899414, "step": 142570 }, { "epoch": 1.232803866806167, "grad_norm": 0.5075999026010586, "learning_rate": 1.9272258548425965e-06, "loss": 0.1089630126953125, "step": 142575 }, { "epoch": 1.2328471003277102, "grad_norm": 1.130963609100029, "learning_rate": 1.9270355953596396e-06, "loss": 0.07379684448242188, "step": 142580 }, { "epoch": 1.2328903338492534, "grad_norm": 51.07245836085987, "learning_rate": 1.9268453408250995e-06, "loss": 0.29778213500976564, "step": 142585 }, { "epoch": 1.2329335673707966, "grad_norm": 1.1533834596749666, "learning_rate": 1.926655091239854e-06, "loss": 0.07809562683105468, "step": 142590 }, { "epoch": 1.2329768008923399, "grad_norm": 3.365656594838892, "learning_rate": 1.9264648466047797e-06, "loss": 0.053707122802734375, "step": 142595 }, { "epoch": 1.233020034413883, "grad_norm": 5.574519671443565, "learning_rate": 1.926274606920755e-06, "loss": 0.17262802124023438, "step": 142600 }, { "epoch": 1.2330632679354263, "grad_norm": 3.0066602445230206, "learning_rate": 1.9260843721886572e-06, "loss": 0.0407470703125, "step": 142605 }, { "epoch": 1.2331065014569698, "grad_norm": 1.6699358029970108, "learning_rate": 1.9258941424093628e-06, "loss": 0.02908477783203125, "step": 142610 }, { "epoch": 1.233149734978513, "grad_norm": 1.2821840546578225, "learning_rate": 1.92570391758375e-06, "loss": 0.14065685272216796, "step": 142615 }, { "epoch": 1.2331929685000562, "grad_norm": 18.205437072090486, "learning_rate": 1.9255136977126957e-06, "loss": 0.4187164306640625, "step": 142620 }, { "epoch": 1.2332362020215994, "grad_norm": 19.837664579385795, "learning_rate": 1.9253234827970778e-06, "loss": 0.1685455322265625, "step": 142625 }, { "epoch": 1.2332794355431427, "grad_norm": 1.7832267838568863, "learning_rate": 1.9251332728377728e-06, "loss": 0.031041717529296874, "step": 142630 }, { "epoch": 1.233322669064686, "grad_norm": 23.780726869531364, "learning_rate": 1.924943067835658e-06, "loss": 0.2851871490478516, "step": 142635 }, { "epoch": 1.2333659025862294, "grad_norm": 0.7877955263991568, "learning_rate": 1.9247528677916103e-06, "loss": 0.08541355133056641, "step": 142640 }, { "epoch": 1.2334091361077726, "grad_norm": 5.302132968906419, "learning_rate": 1.9245626727065084e-06, "loss": 0.12291717529296875, "step": 142645 }, { "epoch": 1.2334523696293158, "grad_norm": 0.29259904406964937, "learning_rate": 1.9243724825812273e-06, "loss": 0.025078678131103517, "step": 142650 }, { "epoch": 1.233495603150859, "grad_norm": 0.026232867928271574, "learning_rate": 1.924182297416646e-06, "loss": 0.20483856201171874, "step": 142655 }, { "epoch": 1.2335388366724023, "grad_norm": 18.048429374276452, "learning_rate": 1.9239921172136414e-06, "loss": 0.09673919677734374, "step": 142660 }, { "epoch": 1.2335820701939455, "grad_norm": 0.06360285459087366, "learning_rate": 1.9238019419730895e-06, "loss": 0.22188682556152345, "step": 142665 }, { "epoch": 1.2336253037154887, "grad_norm": 0.5241983325662093, "learning_rate": 1.923611771695867e-06, "loss": 0.04133949279785156, "step": 142670 }, { "epoch": 1.2336685372370322, "grad_norm": 1.1729682638109313, "learning_rate": 1.9234216063828533e-06, "loss": 0.05676956176757812, "step": 142675 }, { "epoch": 1.2337117707585754, "grad_norm": 1.5900587331805913, "learning_rate": 1.9232314460349236e-06, "loss": 0.0890869140625, "step": 142680 }, { "epoch": 1.2337550042801186, "grad_norm": 3.5742243016013147, "learning_rate": 1.923041290652955e-06, "loss": 0.13617401123046874, "step": 142685 }, { "epoch": 1.2337982378016619, "grad_norm": 18.285170589145476, "learning_rate": 1.9228511402378253e-06, "loss": 0.06651153564453124, "step": 142690 }, { "epoch": 1.233841471323205, "grad_norm": 2.3403490911354194, "learning_rate": 1.9226609947904113e-06, "loss": 0.05496063232421875, "step": 142695 }, { "epoch": 1.2338847048447485, "grad_norm": 3.0451075330808814, "learning_rate": 1.922470854311589e-06, "loss": 0.02832489013671875, "step": 142700 }, { "epoch": 1.2339279383662918, "grad_norm": 0.20473561522048517, "learning_rate": 1.922280718802235e-06, "loss": 0.05734100341796875, "step": 142705 }, { "epoch": 1.233971171887835, "grad_norm": 1.9959819851348062, "learning_rate": 1.922090588263228e-06, "loss": 0.05711631774902344, "step": 142710 }, { "epoch": 1.2340144054093782, "grad_norm": 14.157520625239533, "learning_rate": 1.921900462695444e-06, "loss": 0.133111572265625, "step": 142715 }, { "epoch": 1.2340576389309215, "grad_norm": 8.567510328120857, "learning_rate": 1.9217103420997604e-06, "loss": 0.041162872314453126, "step": 142720 }, { "epoch": 1.2341008724524647, "grad_norm": 20.960524159528784, "learning_rate": 1.921520226477053e-06, "loss": 0.13998641967773437, "step": 142725 }, { "epoch": 1.234144105974008, "grad_norm": 4.040847169242854, "learning_rate": 1.921330115828198e-06, "loss": 0.10480785369873047, "step": 142730 }, { "epoch": 1.2341873394955512, "grad_norm": 0.17603703014989042, "learning_rate": 1.9211400101540742e-06, "loss": 0.035495758056640625, "step": 142735 }, { "epoch": 1.2342305730170946, "grad_norm": 14.982834103179009, "learning_rate": 1.9209499094555565e-06, "loss": 0.028510665893554686, "step": 142740 }, { "epoch": 1.2342738065386378, "grad_norm": 0.3730273245086078, "learning_rate": 1.920759813733524e-06, "loss": 0.15658912658691407, "step": 142745 }, { "epoch": 1.234317040060181, "grad_norm": 0.8186330172888645, "learning_rate": 1.9205697229888508e-06, "loss": 0.046105575561523435, "step": 142750 }, { "epoch": 1.2343602735817243, "grad_norm": 11.94332533373595, "learning_rate": 1.9203796372224156e-06, "loss": 0.05816574096679687, "step": 142755 }, { "epoch": 1.2344035071032675, "grad_norm": 46.79783172041802, "learning_rate": 1.920189556435093e-06, "loss": 0.15037307739257813, "step": 142760 }, { "epoch": 1.234446740624811, "grad_norm": 25.138064434012914, "learning_rate": 1.919999480627761e-06, "loss": 0.07322845458984376, "step": 142765 }, { "epoch": 1.2344899741463542, "grad_norm": 1.810161923587747, "learning_rate": 1.9198094098012952e-06, "loss": 0.10954303741455078, "step": 142770 }, { "epoch": 1.2345332076678974, "grad_norm": 1.451298928578177, "learning_rate": 1.9196193439565744e-06, "loss": 0.03373394012451172, "step": 142775 }, { "epoch": 1.2345764411894407, "grad_norm": 0.5754064711914979, "learning_rate": 1.919429283094473e-06, "loss": 0.0306488037109375, "step": 142780 }, { "epoch": 1.2346196747109839, "grad_norm": 0.13897463765480847, "learning_rate": 1.9192392272158696e-06, "loss": 0.07941474914550781, "step": 142785 }, { "epoch": 1.2346629082325271, "grad_norm": 0.8603144259289398, "learning_rate": 1.9190491763216373e-06, "loss": 0.08370132446289062, "step": 142790 }, { "epoch": 1.2347061417540703, "grad_norm": 0.220093277214049, "learning_rate": 1.918859130412656e-06, "loss": 0.07904739379882812, "step": 142795 }, { "epoch": 1.2347493752756138, "grad_norm": 3.140770987589429, "learning_rate": 1.9186690894897997e-06, "loss": 0.34026641845703126, "step": 142800 }, { "epoch": 1.234792608797157, "grad_norm": 4.267425274236561, "learning_rate": 1.918479053553947e-06, "loss": 0.044411849975585935, "step": 142805 }, { "epoch": 1.2348358423187002, "grad_norm": 0.7303871900000303, "learning_rate": 1.9182890226059734e-06, "loss": 0.041857147216796876, "step": 142810 }, { "epoch": 1.2348790758402435, "grad_norm": 2.4056909405868705, "learning_rate": 1.9180989966467555e-06, "loss": 0.07380142211914062, "step": 142815 }, { "epoch": 1.2349223093617867, "grad_norm": 3.445840386664883, "learning_rate": 1.917908975677168e-06, "loss": 0.25376358032226565, "step": 142820 }, { "epoch": 1.23496554288333, "grad_norm": 1.4720773694260654, "learning_rate": 1.9177189596980904e-06, "loss": 0.08349609375, "step": 142825 }, { "epoch": 1.2350087764048734, "grad_norm": 56.914713833402196, "learning_rate": 1.9175289487103954e-06, "loss": 0.5753616333007813, "step": 142830 }, { "epoch": 1.2350520099264166, "grad_norm": 0.7153233890858457, "learning_rate": 1.917338942714963e-06, "loss": 0.09868545532226562, "step": 142835 }, { "epoch": 1.2350952434479598, "grad_norm": 1.2629396801905832, "learning_rate": 1.9171489417126675e-06, "loss": 0.028816986083984374, "step": 142840 }, { "epoch": 1.235138476969503, "grad_norm": 61.23556146638765, "learning_rate": 1.916958945704384e-06, "loss": 0.10850677490234376, "step": 142845 }, { "epoch": 1.2351817104910463, "grad_norm": 8.107949120111721, "learning_rate": 1.916768954690992e-06, "loss": 0.19218215942382813, "step": 142850 }, { "epoch": 1.2352249440125895, "grad_norm": 5.767020559715554, "learning_rate": 1.916578968673365e-06, "loss": 0.03443851470947266, "step": 142855 }, { "epoch": 1.2352681775341328, "grad_norm": 1.834838642484595, "learning_rate": 1.9163889876523805e-06, "loss": 0.0230255126953125, "step": 142860 }, { "epoch": 1.2353114110556762, "grad_norm": 4.771434280752644, "learning_rate": 1.9161990116289125e-06, "loss": 0.08104248046875, "step": 142865 }, { "epoch": 1.2353546445772194, "grad_norm": 9.937365510977303, "learning_rate": 1.9160090406038406e-06, "loss": 0.04472427368164063, "step": 142870 }, { "epoch": 1.2353978780987627, "grad_norm": 53.464231845904685, "learning_rate": 1.9158190745780385e-06, "loss": 0.25654144287109376, "step": 142875 }, { "epoch": 1.235441111620306, "grad_norm": 17.653281461380857, "learning_rate": 1.9156291135523836e-06, "loss": 0.10853118896484375, "step": 142880 }, { "epoch": 1.2354843451418491, "grad_norm": 2.424011850228174, "learning_rate": 1.9154391575277515e-06, "loss": 0.012957382202148437, "step": 142885 }, { "epoch": 1.2355275786633924, "grad_norm": 2.3081182796480384, "learning_rate": 1.9152492065050183e-06, "loss": 0.3414447784423828, "step": 142890 }, { "epoch": 1.2355708121849358, "grad_norm": 15.950053970432416, "learning_rate": 1.9150592604850586e-06, "loss": 0.17956466674804689, "step": 142895 }, { "epoch": 1.235614045706479, "grad_norm": 0.5175195873554176, "learning_rate": 1.914869319468751e-06, "loss": 0.053469276428222655, "step": 142900 }, { "epoch": 1.2356572792280223, "grad_norm": 1.3037543707964785, "learning_rate": 1.914679383456969e-06, "loss": 0.1099334716796875, "step": 142905 }, { "epoch": 1.2357005127495655, "grad_norm": 0.1760676526541262, "learning_rate": 1.914489452450591e-06, "loss": 0.018971633911132813, "step": 142910 }, { "epoch": 1.2357437462711087, "grad_norm": 8.765708542446912, "learning_rate": 1.9142995264504913e-06, "loss": 0.06974277496337891, "step": 142915 }, { "epoch": 1.235786979792652, "grad_norm": 3.591374330964381, "learning_rate": 1.914109605457547e-06, "loss": 0.1843416213989258, "step": 142920 }, { "epoch": 1.2358302133141952, "grad_norm": 18.15093289032299, "learning_rate": 1.9139196894726314e-06, "loss": 0.08673858642578125, "step": 142925 }, { "epoch": 1.2358734468357386, "grad_norm": 7.124964822861518, "learning_rate": 1.9137297784966237e-06, "loss": 0.05061264038085937, "step": 142930 }, { "epoch": 1.2359166803572819, "grad_norm": 12.784237167980537, "learning_rate": 1.9135398725303974e-06, "loss": 0.0582977294921875, "step": 142935 }, { "epoch": 1.235959913878825, "grad_norm": 34.48795387633885, "learning_rate": 1.91334997157483e-06, "loss": 0.09928245544433593, "step": 142940 }, { "epoch": 1.2360031474003683, "grad_norm": 1.2130567440345692, "learning_rate": 1.9131600756307967e-06, "loss": 0.048760986328125, "step": 142945 }, { "epoch": 1.2360463809219115, "grad_norm": 1.1589810225773485, "learning_rate": 1.9129701846991727e-06, "loss": 0.08131484985351563, "step": 142950 }, { "epoch": 1.236089614443455, "grad_norm": 10.874033293707969, "learning_rate": 1.912780298780834e-06, "loss": 0.05353469848632812, "step": 142955 }, { "epoch": 1.2361328479649982, "grad_norm": 15.816093004664866, "learning_rate": 1.9125904178766558e-06, "loss": 0.10434646606445312, "step": 142960 }, { "epoch": 1.2361760814865415, "grad_norm": 7.3558767620560195, "learning_rate": 1.912400541987515e-06, "loss": 0.011876678466796875, "step": 142965 }, { "epoch": 1.2362193150080847, "grad_norm": 3.500259858119039, "learning_rate": 1.9122106711142876e-06, "loss": 0.113006591796875, "step": 142970 }, { "epoch": 1.236262548529628, "grad_norm": 1.9276941258845703, "learning_rate": 1.912020805257848e-06, "loss": 0.015765380859375, "step": 142975 }, { "epoch": 1.2363057820511711, "grad_norm": 0.41130086748985334, "learning_rate": 1.9118309444190726e-06, "loss": 0.07094879150390625, "step": 142980 }, { "epoch": 1.2363490155727144, "grad_norm": 0.18090963328194565, "learning_rate": 1.9116410885988362e-06, "loss": 0.04552459716796875, "step": 142985 }, { "epoch": 1.2363922490942576, "grad_norm": 0.16030079673676914, "learning_rate": 1.9114512377980146e-06, "loss": 0.025131988525390624, "step": 142990 }, { "epoch": 1.236435482615801, "grad_norm": 1.820197658280974, "learning_rate": 1.9112613920174836e-06, "loss": 0.02677764892578125, "step": 142995 }, { "epoch": 1.2364787161373443, "grad_norm": 22.92266360311679, "learning_rate": 1.9110715512581198e-06, "loss": 0.078619384765625, "step": 143000 }, { "epoch": 1.2365219496588875, "grad_norm": 1.9281709709067432, "learning_rate": 1.910881715520798e-06, "loss": 0.06346321105957031, "step": 143005 }, { "epoch": 1.2365651831804307, "grad_norm": 6.8125874413788905, "learning_rate": 1.910691884806393e-06, "loss": 0.0947998046875, "step": 143010 }, { "epoch": 1.236608416701974, "grad_norm": 1.1439195994553153, "learning_rate": 1.9105020591157806e-06, "loss": 0.028301239013671875, "step": 143015 }, { "epoch": 1.2366516502235174, "grad_norm": 29.94925784221545, "learning_rate": 1.910312238449837e-06, "loss": 0.07046222686767578, "step": 143020 }, { "epoch": 1.2366948837450606, "grad_norm": 1.0980361612679446, "learning_rate": 1.9101224228094352e-06, "loss": 0.03512725830078125, "step": 143025 }, { "epoch": 1.2367381172666039, "grad_norm": 33.19217326616614, "learning_rate": 1.9099326121954544e-06, "loss": 0.12358894348144531, "step": 143030 }, { "epoch": 1.236781350788147, "grad_norm": 0.7940066876737757, "learning_rate": 1.909742806608768e-06, "loss": 0.014510726928710938, "step": 143035 }, { "epoch": 1.2368245843096903, "grad_norm": 4.513145902840142, "learning_rate": 1.9095530060502505e-06, "loss": 0.1081207275390625, "step": 143040 }, { "epoch": 1.2368678178312336, "grad_norm": 0.6481562702636621, "learning_rate": 1.909363210520779e-06, "loss": 0.018684005737304686, "step": 143045 }, { "epoch": 1.2369110513527768, "grad_norm": 34.53278746499122, "learning_rate": 1.9091734200212283e-06, "loss": 0.11235275268554687, "step": 143050 }, { "epoch": 1.2369542848743202, "grad_norm": 8.557666824142528, "learning_rate": 1.9089836345524717e-06, "loss": 0.03141326904296875, "step": 143055 }, { "epoch": 1.2369975183958635, "grad_norm": 0.8230975883746433, "learning_rate": 1.9087938541153877e-06, "loss": 0.08739395141601562, "step": 143060 }, { "epoch": 1.2370407519174067, "grad_norm": 0.777741002871124, "learning_rate": 1.9086040787108495e-06, "loss": 0.04695339202880859, "step": 143065 }, { "epoch": 1.23708398543895, "grad_norm": 1.9526735314004664, "learning_rate": 1.9084143083397327e-06, "loss": 0.2071929931640625, "step": 143070 }, { "epoch": 1.2371272189604932, "grad_norm": 53.334182169913994, "learning_rate": 1.9082245430029134e-06, "loss": 0.15526199340820312, "step": 143075 }, { "epoch": 1.2371704524820364, "grad_norm": 7.396188476215052, "learning_rate": 1.9080347827012657e-06, "loss": 0.05125846862792969, "step": 143080 }, { "epoch": 1.2372136860035798, "grad_norm": 0.22178016056331915, "learning_rate": 1.9078450274356643e-06, "loss": 0.047955322265625, "step": 143085 }, { "epoch": 1.237256919525123, "grad_norm": 7.2270631044722276, "learning_rate": 1.907655277206986e-06, "loss": 0.04174346923828125, "step": 143090 }, { "epoch": 1.2373001530466663, "grad_norm": 45.12200440489969, "learning_rate": 1.907465532016105e-06, "loss": 0.21199073791503906, "step": 143095 }, { "epoch": 1.2373433865682095, "grad_norm": 0.3096393353183172, "learning_rate": 1.9072757918638958e-06, "loss": 0.00604400634765625, "step": 143100 }, { "epoch": 1.2373866200897528, "grad_norm": 6.529680726084898, "learning_rate": 1.907086056751235e-06, "loss": 0.04432258605957031, "step": 143105 }, { "epoch": 1.237429853611296, "grad_norm": 5.363918674728279, "learning_rate": 1.9068963266789966e-06, "loss": 0.0948577880859375, "step": 143110 }, { "epoch": 1.2374730871328392, "grad_norm": 2.7479880050001357, "learning_rate": 1.9067066016480554e-06, "loss": 0.018445587158203124, "step": 143115 }, { "epoch": 1.2375163206543827, "grad_norm": 0.5687551783408805, "learning_rate": 1.9065168816592858e-06, "loss": 0.034508132934570314, "step": 143120 }, { "epoch": 1.237559554175926, "grad_norm": 1.370453257212978, "learning_rate": 1.9063271667135653e-06, "loss": 0.052941513061523435, "step": 143125 }, { "epoch": 1.2376027876974691, "grad_norm": 9.522421205012431, "learning_rate": 1.9061374568117663e-06, "loss": 0.032332229614257815, "step": 143130 }, { "epoch": 1.2376460212190123, "grad_norm": 8.643493238026124, "learning_rate": 1.905947751954765e-06, "loss": 0.15357599258422852, "step": 143135 }, { "epoch": 1.2376892547405556, "grad_norm": 3.3156797347635973, "learning_rate": 1.9057580521434363e-06, "loss": 0.018090534210205077, "step": 143140 }, { "epoch": 1.2377324882620988, "grad_norm": 0.14893556045319106, "learning_rate": 1.905568357378655e-06, "loss": 0.018974685668945314, "step": 143145 }, { "epoch": 1.2377757217836423, "grad_norm": 45.701928322487525, "learning_rate": 1.9053786676612945e-06, "loss": 0.15746536254882812, "step": 143150 }, { "epoch": 1.2378189553051855, "grad_norm": 0.39518729568427996, "learning_rate": 1.905188982992232e-06, "loss": 0.06882781982421875, "step": 143155 }, { "epoch": 1.2378621888267287, "grad_norm": 0.22658614709931366, "learning_rate": 1.90499930337234e-06, "loss": 0.03095588684082031, "step": 143160 }, { "epoch": 1.237905422348272, "grad_norm": 5.250667379578956, "learning_rate": 1.9048096288024957e-06, "loss": 0.05928993225097656, "step": 143165 }, { "epoch": 1.2379486558698152, "grad_norm": 47.47054223239794, "learning_rate": 1.9046199592835727e-06, "loss": 0.12648468017578124, "step": 143170 }, { "epoch": 1.2379918893913584, "grad_norm": 1.5303109700366286, "learning_rate": 1.9044302948164452e-06, "loss": 0.017400550842285156, "step": 143175 }, { "epoch": 1.2380351229129016, "grad_norm": 0.9227727360566425, "learning_rate": 1.9042406354019875e-06, "loss": 0.010143280029296875, "step": 143180 }, { "epoch": 1.238078356434445, "grad_norm": 0.24295510583710359, "learning_rate": 1.9040509810410763e-06, "loss": 0.02966156005859375, "step": 143185 }, { "epoch": 1.2381215899559883, "grad_norm": 0.09039403607345094, "learning_rate": 1.9038613317345846e-06, "loss": 0.004591560363769532, "step": 143190 }, { "epoch": 1.2381648234775315, "grad_norm": 1.8985277345162894, "learning_rate": 1.9036716874833877e-06, "loss": 0.05774688720703125, "step": 143195 }, { "epoch": 1.2382080569990748, "grad_norm": 4.709403663669914, "learning_rate": 1.9034820482883605e-06, "loss": 0.035076904296875, "step": 143200 }, { "epoch": 1.238251290520618, "grad_norm": 11.169138284121061, "learning_rate": 1.9032924141503772e-06, "loss": 0.16695098876953124, "step": 143205 }, { "epoch": 1.2382945240421614, "grad_norm": 0.5568603952000074, "learning_rate": 1.903102785070312e-06, "loss": 0.04531784057617187, "step": 143210 }, { "epoch": 1.2383377575637047, "grad_norm": 1.3944414101754925, "learning_rate": 1.902913161049039e-06, "loss": 0.3005378723144531, "step": 143215 }, { "epoch": 1.238380991085248, "grad_norm": 64.93554145426103, "learning_rate": 1.9027235420874344e-06, "loss": 0.11128005981445313, "step": 143220 }, { "epoch": 1.2384242246067911, "grad_norm": 17.34038160460681, "learning_rate": 1.902533928186372e-06, "loss": 0.0372650146484375, "step": 143225 }, { "epoch": 1.2384674581283344, "grad_norm": 2.014826240512035, "learning_rate": 1.9023443193467265e-06, "loss": 0.0366973876953125, "step": 143230 }, { "epoch": 1.2385106916498776, "grad_norm": 15.927271805926976, "learning_rate": 1.902154715569372e-06, "loss": 0.0634840965270996, "step": 143235 }, { "epoch": 1.2385539251714208, "grad_norm": 56.8306828557471, "learning_rate": 1.9019651168551823e-06, "loss": 0.16240921020507812, "step": 143240 }, { "epoch": 1.2385971586929643, "grad_norm": 0.5742021299063178, "learning_rate": 1.9017755232050315e-06, "loss": 0.05001983642578125, "step": 143245 }, { "epoch": 1.2386403922145075, "grad_norm": 0.9437039296066638, "learning_rate": 1.9015859346197966e-06, "loss": 0.05907306671142578, "step": 143250 }, { "epoch": 1.2386836257360507, "grad_norm": 0.12636683270578872, "learning_rate": 1.90139635110035e-06, "loss": 0.021602630615234375, "step": 143255 }, { "epoch": 1.238726859257594, "grad_norm": 7.650661521856913, "learning_rate": 1.9012067726475665e-06, "loss": 0.047467803955078124, "step": 143260 }, { "epoch": 1.2387700927791372, "grad_norm": 10.628183480211362, "learning_rate": 1.9010171992623194e-06, "loss": 0.16553802490234376, "step": 143265 }, { "epoch": 1.2388133263006804, "grad_norm": 0.6879247897797021, "learning_rate": 1.9008276309454852e-06, "loss": 0.007814407348632812, "step": 143270 }, { "epoch": 1.2388565598222239, "grad_norm": 0.6354011374173878, "learning_rate": 1.9006380676979352e-06, "loss": 0.13331680297851561, "step": 143275 }, { "epoch": 1.238899793343767, "grad_norm": 4.105921390599568, "learning_rate": 1.9004485095205464e-06, "loss": 0.10109481811523438, "step": 143280 }, { "epoch": 1.2389430268653103, "grad_norm": 1.0721934027233155, "learning_rate": 1.900258956414192e-06, "loss": 0.07307968139648438, "step": 143285 }, { "epoch": 1.2389862603868536, "grad_norm": 0.32380606038962756, "learning_rate": 1.900069408379746e-06, "loss": 0.13631725311279297, "step": 143290 }, { "epoch": 1.2390294939083968, "grad_norm": 12.959986457059811, "learning_rate": 1.8998798654180823e-06, "loss": 0.03935165405273437, "step": 143295 }, { "epoch": 1.23907272742994, "grad_norm": 0.04497292994285954, "learning_rate": 1.8996903275300757e-06, "loss": 0.04040946960449219, "step": 143300 }, { "epoch": 1.2391159609514832, "grad_norm": 0.6920260225295026, "learning_rate": 1.8995007947166006e-06, "loss": 0.27779693603515626, "step": 143305 }, { "epoch": 1.2391591944730267, "grad_norm": 1.7087830240226645, "learning_rate": 1.8993112669785288e-06, "loss": 0.0286865234375, "step": 143310 }, { "epoch": 1.23920242799457, "grad_norm": 3.374402707555425, "learning_rate": 1.8991217443167379e-06, "loss": 0.0456634521484375, "step": 143315 }, { "epoch": 1.2392456615161132, "grad_norm": 0.4677065526922594, "learning_rate": 1.8989322267321e-06, "loss": 0.11620407104492188, "step": 143320 }, { "epoch": 1.2392888950376564, "grad_norm": 2.4909492657224446, "learning_rate": 1.8987427142254887e-06, "loss": 0.03823089599609375, "step": 143325 }, { "epoch": 1.2393321285591996, "grad_norm": 8.410893962092056, "learning_rate": 1.8985532067977795e-06, "loss": 0.0823089599609375, "step": 143330 }, { "epoch": 1.2393753620807428, "grad_norm": 0.6979308361690426, "learning_rate": 1.8983637044498454e-06, "loss": 0.04820938110351562, "step": 143335 }, { "epoch": 1.2394185956022863, "grad_norm": 8.617708388145212, "learning_rate": 1.8981742071825596e-06, "loss": 0.028202056884765625, "step": 143340 }, { "epoch": 1.2394618291238295, "grad_norm": 5.798715022020914, "learning_rate": 1.897984714996798e-06, "loss": 0.0929718017578125, "step": 143345 }, { "epoch": 1.2395050626453727, "grad_norm": 8.690576675390934, "learning_rate": 1.8977952278934336e-06, "loss": 0.11996421813964844, "step": 143350 }, { "epoch": 1.239548296166916, "grad_norm": 36.04004220178086, "learning_rate": 1.8976057458733393e-06, "loss": 0.11317253112792969, "step": 143355 }, { "epoch": 1.2395915296884592, "grad_norm": 7.851018251403636, "learning_rate": 1.8974162689373908e-06, "loss": 0.09623603820800782, "step": 143360 }, { "epoch": 1.2396347632100024, "grad_norm": 5.643270359877789, "learning_rate": 1.8972267970864614e-06, "loss": 0.030129432678222656, "step": 143365 }, { "epoch": 1.2396779967315457, "grad_norm": 1.1419685053198403, "learning_rate": 1.897037330321424e-06, "loss": 0.05033721923828125, "step": 143370 }, { "epoch": 1.2397212302530891, "grad_norm": 0.6816082881054926, "learning_rate": 1.896847868643152e-06, "loss": 0.07419281005859375, "step": 143375 }, { "epoch": 1.2397644637746323, "grad_norm": 0.21236093643743711, "learning_rate": 1.8966584120525212e-06, "loss": 0.014006423950195312, "step": 143380 }, { "epoch": 1.2398076972961756, "grad_norm": 43.13041597211839, "learning_rate": 1.8964689605504037e-06, "loss": 0.147357177734375, "step": 143385 }, { "epoch": 1.2398509308177188, "grad_norm": 0.42768237677281684, "learning_rate": 1.8962795141376746e-06, "loss": 0.1158294677734375, "step": 143390 }, { "epoch": 1.239894164339262, "grad_norm": 8.218653204350767, "learning_rate": 1.8960900728152069e-06, "loss": 0.14360618591308594, "step": 143395 }, { "epoch": 1.2399373978608053, "grad_norm": 42.144238954804216, "learning_rate": 1.8959006365838741e-06, "loss": 0.05202789306640625, "step": 143400 }, { "epoch": 1.2399806313823487, "grad_norm": 52.90951183852001, "learning_rate": 1.895711205444549e-06, "loss": 0.27698097229003904, "step": 143405 }, { "epoch": 1.240023864903892, "grad_norm": 8.520937126189787, "learning_rate": 1.8955217793981074e-06, "loss": 0.03588943481445313, "step": 143410 }, { "epoch": 1.2400670984254352, "grad_norm": 1.0672845851746013, "learning_rate": 1.895332358445421e-06, "loss": 0.1862579345703125, "step": 143415 }, { "epoch": 1.2401103319469784, "grad_norm": 1.7862576789361386, "learning_rate": 1.8951429425873648e-06, "loss": 0.0205352783203125, "step": 143420 }, { "epoch": 1.2401535654685216, "grad_norm": 2.8244969009967873, "learning_rate": 1.8949535318248118e-06, "loss": 0.040465927124023436, "step": 143425 }, { "epoch": 1.2401967989900649, "grad_norm": 8.550240109895665, "learning_rate": 1.8947641261586354e-06, "loss": 0.0358673095703125, "step": 143430 }, { "epoch": 1.240240032511608, "grad_norm": 37.417220647474274, "learning_rate": 1.8945747255897079e-06, "loss": 0.26737518310546876, "step": 143435 }, { "epoch": 1.2402832660331515, "grad_norm": 8.49878883729995, "learning_rate": 1.8943853301189047e-06, "loss": 0.036810302734375, "step": 143440 }, { "epoch": 1.2403264995546948, "grad_norm": 25.53576625710215, "learning_rate": 1.8941959397470994e-06, "loss": 0.078619384765625, "step": 143445 }, { "epoch": 1.240369733076238, "grad_norm": 3.6112843204018557, "learning_rate": 1.8940065544751646e-06, "loss": 0.07130279541015624, "step": 143450 }, { "epoch": 1.2404129665977812, "grad_norm": 5.36274114007043, "learning_rate": 1.8938171743039743e-06, "loss": 0.06190338134765625, "step": 143455 }, { "epoch": 1.2404562001193244, "grad_norm": 1.1703294763506464, "learning_rate": 1.893627799234401e-06, "loss": 0.22936859130859374, "step": 143460 }, { "epoch": 1.240499433640868, "grad_norm": 5.995301868389271, "learning_rate": 1.8934384292673178e-06, "loss": 0.02570934295654297, "step": 143465 }, { "epoch": 1.2405426671624111, "grad_norm": 25.020519307212016, "learning_rate": 1.8932490644035983e-06, "loss": 0.06538772583007812, "step": 143470 }, { "epoch": 1.2405859006839544, "grad_norm": 2.772625372240683, "learning_rate": 1.8930597046441178e-06, "loss": 0.084576416015625, "step": 143475 }, { "epoch": 1.2406291342054976, "grad_norm": 64.8672295040512, "learning_rate": 1.8928703499897477e-06, "loss": 0.5895271301269531, "step": 143480 }, { "epoch": 1.2406723677270408, "grad_norm": 0.17929370085258786, "learning_rate": 1.892681000441362e-06, "loss": 0.09712791442871094, "step": 143485 }, { "epoch": 1.240715601248584, "grad_norm": 33.68243992771336, "learning_rate": 1.8924916559998332e-06, "loss": 0.1020721435546875, "step": 143490 }, { "epoch": 1.2407588347701273, "grad_norm": 2.042310447096182, "learning_rate": 1.8923023166660351e-06, "loss": 0.058945083618164064, "step": 143495 }, { "epoch": 1.2408020682916707, "grad_norm": 2.2651711949760513, "learning_rate": 1.8921129824408401e-06, "loss": 0.017352294921875, "step": 143500 }, { "epoch": 1.240845301813214, "grad_norm": 0.17277093278264985, "learning_rate": 1.8919236533251234e-06, "loss": 0.01066274642944336, "step": 143505 }, { "epoch": 1.2408885353347572, "grad_norm": 2.1036224063985713, "learning_rate": 1.8917343293197568e-06, "loss": 0.024105596542358398, "step": 143510 }, { "epoch": 1.2409317688563004, "grad_norm": 0.6087853327425193, "learning_rate": 1.8915450104256135e-06, "loss": 0.12548980712890626, "step": 143515 }, { "epoch": 1.2409750023778436, "grad_norm": 13.281428387516517, "learning_rate": 1.8913556966435661e-06, "loss": 0.03268146514892578, "step": 143520 }, { "epoch": 1.2410182358993869, "grad_norm": 4.827155462458667, "learning_rate": 1.8911663879744887e-06, "loss": 0.06502685546875, "step": 143525 }, { "epoch": 1.2410614694209303, "grad_norm": 0.5982717816080295, "learning_rate": 1.8909770844192526e-06, "loss": 0.027030181884765626, "step": 143530 }, { "epoch": 1.2411047029424735, "grad_norm": 7.928662691019856, "learning_rate": 1.8907877859787339e-06, "loss": 0.034102249145507815, "step": 143535 }, { "epoch": 1.2411479364640168, "grad_norm": 8.234167040622818, "learning_rate": 1.8905984926538035e-06, "loss": 0.12369403839111329, "step": 143540 }, { "epoch": 1.24119116998556, "grad_norm": 20.036558971216184, "learning_rate": 1.8904092044453348e-06, "loss": 0.0517852783203125, "step": 143545 }, { "epoch": 1.2412344035071032, "grad_norm": 1.1114367184490896, "learning_rate": 1.8902199213542007e-06, "loss": 0.14737777709960936, "step": 143550 }, { "epoch": 1.2412776370286465, "grad_norm": 3.5775910949094607, "learning_rate": 1.890030643381274e-06, "loss": 0.0212432861328125, "step": 143555 }, { "epoch": 1.2413208705501897, "grad_norm": 1.9614961637051092, "learning_rate": 1.8898413705274288e-06, "loss": 0.1644012451171875, "step": 143560 }, { "epoch": 1.2413641040717331, "grad_norm": 7.427323099248902, "learning_rate": 1.8896521027935354e-06, "loss": 0.02719879150390625, "step": 143565 }, { "epoch": 1.2414073375932764, "grad_norm": 48.13425579011451, "learning_rate": 1.8894628401804696e-06, "loss": 0.4635166168212891, "step": 143570 }, { "epoch": 1.2414505711148196, "grad_norm": 5.013516560170218, "learning_rate": 1.8892735826891036e-06, "loss": 0.22915430068969728, "step": 143575 }, { "epoch": 1.2414938046363628, "grad_norm": 5.751382770031492, "learning_rate": 1.8890843303203086e-06, "loss": 0.057088279724121095, "step": 143580 }, { "epoch": 1.241537038157906, "grad_norm": 24.44359893671614, "learning_rate": 1.8888950830749591e-06, "loss": 0.018301010131835938, "step": 143585 }, { "epoch": 1.2415802716794493, "grad_norm": 5.02440807484314, "learning_rate": 1.8887058409539275e-06, "loss": 0.023302459716796876, "step": 143590 }, { "epoch": 1.2416235052009927, "grad_norm": 0.12184546678889256, "learning_rate": 1.888516603958085e-06, "loss": 0.08732643127441406, "step": 143595 }, { "epoch": 1.241666738722536, "grad_norm": 37.79647975047895, "learning_rate": 1.888327372088307e-06, "loss": 0.3332550048828125, "step": 143600 }, { "epoch": 1.2417099722440792, "grad_norm": 23.640759392809578, "learning_rate": 1.8881381453454649e-06, "loss": 0.028176116943359374, "step": 143605 }, { "epoch": 1.2417532057656224, "grad_norm": 0.7621019532012718, "learning_rate": 1.8879489237304309e-06, "loss": 0.10695838928222656, "step": 143610 }, { "epoch": 1.2417964392871657, "grad_norm": 39.41356124263443, "learning_rate": 1.8877597072440785e-06, "loss": 0.14443702697753907, "step": 143615 }, { "epoch": 1.2418396728087089, "grad_norm": 10.924461110312325, "learning_rate": 1.8875704958872803e-06, "loss": 0.049747467041015625, "step": 143620 }, { "epoch": 1.2418829063302521, "grad_norm": 39.638256873438365, "learning_rate": 1.8873812896609073e-06, "loss": 0.10734214782714843, "step": 143625 }, { "epoch": 1.2419261398517956, "grad_norm": 13.751466679379668, "learning_rate": 1.887192088565835e-06, "loss": 0.160308837890625, "step": 143630 }, { "epoch": 1.2419693733733388, "grad_norm": 2.240833159657777, "learning_rate": 1.8870028926029342e-06, "loss": 0.014458847045898438, "step": 143635 }, { "epoch": 1.242012606894882, "grad_norm": 0.41829108809466214, "learning_rate": 1.8868137017730771e-06, "loss": 0.007550811767578125, "step": 143640 }, { "epoch": 1.2420558404164252, "grad_norm": 1.0735522423247292, "learning_rate": 1.8866245160771379e-06, "loss": 0.03605976104736328, "step": 143645 }, { "epoch": 1.2420990739379685, "grad_norm": 2.6291553401886354, "learning_rate": 1.8864353355159873e-06, "loss": 0.06992950439453124, "step": 143650 }, { "epoch": 1.242142307459512, "grad_norm": 6.387424432049275, "learning_rate": 1.886246160090499e-06, "loss": 0.06845130920410156, "step": 143655 }, { "epoch": 1.2421855409810552, "grad_norm": 1.4047579230142218, "learning_rate": 1.8860569898015437e-06, "loss": 0.054827880859375, "step": 143660 }, { "epoch": 1.2422287745025984, "grad_norm": 12.680956369569603, "learning_rate": 1.885867824649996e-06, "loss": 0.07448101043701172, "step": 143665 }, { "epoch": 1.2422720080241416, "grad_norm": 13.022078687932089, "learning_rate": 1.8856786646367275e-06, "loss": 0.03747215270996094, "step": 143670 }, { "epoch": 1.2423152415456848, "grad_norm": 2.631881401872414, "learning_rate": 1.8854895097626108e-06, "loss": 0.07952232360839843, "step": 143675 }, { "epoch": 1.242358475067228, "grad_norm": 0.904686665229055, "learning_rate": 1.8853003600285183e-06, "loss": 0.037939453125, "step": 143680 }, { "epoch": 1.2424017085887713, "grad_norm": 0.03878953658229397, "learning_rate": 1.8851112154353217e-06, "loss": 0.0202972412109375, "step": 143685 }, { "epoch": 1.2424449421103145, "grad_norm": 4.748393332696233, "learning_rate": 1.8849220759838926e-06, "loss": 0.23018722534179686, "step": 143690 }, { "epoch": 1.242488175631858, "grad_norm": 45.05743696427436, "learning_rate": 1.8847329416751046e-06, "loss": 0.419781494140625, "step": 143695 }, { "epoch": 1.2425314091534012, "grad_norm": 1.881260891086071, "learning_rate": 1.8845438125098308e-06, "loss": 0.07849884033203125, "step": 143700 }, { "epoch": 1.2425746426749444, "grad_norm": 44.23563327867372, "learning_rate": 1.8843546884889424e-06, "loss": 0.09010677337646485, "step": 143705 }, { "epoch": 1.2426178761964877, "grad_norm": 0.08371473415516419, "learning_rate": 1.8841655696133114e-06, "loss": 0.01313018798828125, "step": 143710 }, { "epoch": 1.242661109718031, "grad_norm": 36.45799807506287, "learning_rate": 1.8839764558838093e-06, "loss": 0.1647632598876953, "step": 143715 }, { "epoch": 1.2427043432395743, "grad_norm": 2.2921393937082617, "learning_rate": 1.8837873473013104e-06, "loss": 0.03376693725585937, "step": 143720 }, { "epoch": 1.2427475767611176, "grad_norm": 2.7629200417573445, "learning_rate": 1.8835982438666836e-06, "loss": 0.03014984130859375, "step": 143725 }, { "epoch": 1.2427908102826608, "grad_norm": 0.5146262681231736, "learning_rate": 1.883409145580805e-06, "loss": 0.018761444091796874, "step": 143730 }, { "epoch": 1.242834043804204, "grad_norm": 0.17032246179079413, "learning_rate": 1.8832200524445446e-06, "loss": 0.05326995849609375, "step": 143735 }, { "epoch": 1.2428772773257473, "grad_norm": 1.3655083377922943, "learning_rate": 1.8830309644587741e-06, "loss": 0.0262176513671875, "step": 143740 }, { "epoch": 1.2429205108472905, "grad_norm": 0.2959778132494384, "learning_rate": 1.8828418816243664e-06, "loss": 0.058415985107421874, "step": 143745 }, { "epoch": 1.2429637443688337, "grad_norm": 0.9513460965719539, "learning_rate": 1.8826528039421933e-06, "loss": 0.026078033447265624, "step": 143750 }, { "epoch": 1.2430069778903772, "grad_norm": 3.443497869288242, "learning_rate": 1.8824637314131255e-06, "loss": 0.16895523071289062, "step": 143755 }, { "epoch": 1.2430502114119204, "grad_norm": 4.872624057329323, "learning_rate": 1.8822746640380375e-06, "loss": 0.06632614135742188, "step": 143760 }, { "epoch": 1.2430934449334636, "grad_norm": 0.36222463916400605, "learning_rate": 1.8820856018178e-06, "loss": 0.0941497802734375, "step": 143765 }, { "epoch": 1.2431366784550069, "grad_norm": 30.04435694613381, "learning_rate": 1.8818965447532851e-06, "loss": 0.09407272338867187, "step": 143770 }, { "epoch": 1.24317991197655, "grad_norm": 18.384870184018176, "learning_rate": 1.8817074928453634e-06, "loss": 0.07657318115234375, "step": 143775 }, { "epoch": 1.2432231454980933, "grad_norm": 11.860387208953739, "learning_rate": 1.881518446094909e-06, "loss": 0.19822654724121094, "step": 143780 }, { "epoch": 1.2432663790196368, "grad_norm": 2.284665524878711, "learning_rate": 1.8813294045027914e-06, "loss": 0.05929489135742187, "step": 143785 }, { "epoch": 1.24330961254118, "grad_norm": 1.8038422933168083, "learning_rate": 1.8811403680698849e-06, "loss": 0.04516677856445313, "step": 143790 }, { "epoch": 1.2433528460627232, "grad_norm": 0.523056636248364, "learning_rate": 1.8809513367970605e-06, "loss": 0.19168529510498047, "step": 143795 }, { "epoch": 1.2433960795842665, "grad_norm": 5.172480832152145, "learning_rate": 1.880762310685189e-06, "loss": 0.05449061393737793, "step": 143800 }, { "epoch": 1.2434393131058097, "grad_norm": 24.23826926599471, "learning_rate": 1.8805732897351429e-06, "loss": 0.08302001953125, "step": 143805 }, { "epoch": 1.243482546627353, "grad_norm": 0.037040124343646966, "learning_rate": 1.880384273947794e-06, "loss": 0.061688995361328124, "step": 143810 }, { "epoch": 1.2435257801488961, "grad_norm": 0.2725004314716907, "learning_rate": 1.8801952633240141e-06, "loss": 0.06634407043457032, "step": 143815 }, { "epoch": 1.2435690136704396, "grad_norm": 13.72316052871594, "learning_rate": 1.8800062578646732e-06, "loss": 0.16647796630859374, "step": 143820 }, { "epoch": 1.2436122471919828, "grad_norm": 2.1679667024466696, "learning_rate": 1.8798172575706461e-06, "loss": 0.1501697540283203, "step": 143825 }, { "epoch": 1.243655480713526, "grad_norm": 1.8156975207631554, "learning_rate": 1.8796282624428026e-06, "loss": 0.477532958984375, "step": 143830 }, { "epoch": 1.2436987142350693, "grad_norm": 0.3789364809254862, "learning_rate": 1.879439272482014e-06, "loss": 0.024506378173828124, "step": 143835 }, { "epoch": 1.2437419477566125, "grad_norm": 18.634981921996598, "learning_rate": 1.8792502876891531e-06, "loss": 0.23850555419921876, "step": 143840 }, { "epoch": 1.2437851812781557, "grad_norm": 0.11508498323024575, "learning_rate": 1.8790613080650909e-06, "loss": 0.24790077209472655, "step": 143845 }, { "epoch": 1.2438284147996992, "grad_norm": 1.6179917044853076, "learning_rate": 1.8788723336106974e-06, "loss": 0.01568603515625, "step": 143850 }, { "epoch": 1.2438716483212424, "grad_norm": 15.49459833293878, "learning_rate": 1.8786833643268473e-06, "loss": 0.11102294921875, "step": 143855 }, { "epoch": 1.2439148818427856, "grad_norm": 0.3289994906940508, "learning_rate": 1.8784944002144102e-06, "loss": 0.027715301513671874, "step": 143860 }, { "epoch": 1.2439581153643289, "grad_norm": 49.22051007994051, "learning_rate": 1.8783054412742568e-06, "loss": 0.3217903137207031, "step": 143865 }, { "epoch": 1.244001348885872, "grad_norm": 2.4419195399679836, "learning_rate": 1.8781164875072608e-06, "loss": 0.08742084503173828, "step": 143870 }, { "epoch": 1.2440445824074153, "grad_norm": 9.43393847999811, "learning_rate": 1.8779275389142924e-06, "loss": 0.05340576171875, "step": 143875 }, { "epoch": 1.2440878159289586, "grad_norm": 1.0336246043375898, "learning_rate": 1.8777385954962213e-06, "loss": 0.03250694274902344, "step": 143880 }, { "epoch": 1.244131049450502, "grad_norm": 11.854296114942708, "learning_rate": 1.8775496572539225e-06, "loss": 0.046830368041992185, "step": 143885 }, { "epoch": 1.2441742829720452, "grad_norm": 0.7068959539548281, "learning_rate": 1.8773607241882646e-06, "loss": 0.12617721557617187, "step": 143890 }, { "epoch": 1.2442175164935885, "grad_norm": 1.6131166252447184, "learning_rate": 1.8771717963001203e-06, "loss": 0.03668270111083984, "step": 143895 }, { "epoch": 1.2442607500151317, "grad_norm": 0.23578118668246026, "learning_rate": 1.8769828735903608e-06, "loss": 0.009657669067382812, "step": 143900 }, { "epoch": 1.244303983536675, "grad_norm": 3.8196791701438646, "learning_rate": 1.876793956059857e-06, "loss": 0.029259109497070314, "step": 143905 }, { "epoch": 1.2443472170582184, "grad_norm": 1.1451819604334335, "learning_rate": 1.8766050437094792e-06, "loss": 0.011932182312011718, "step": 143910 }, { "epoch": 1.2443904505797616, "grad_norm": 0.9301359345749889, "learning_rate": 1.8764161365401e-06, "loss": 0.008312225341796875, "step": 143915 }, { "epoch": 1.2444336841013048, "grad_norm": 1.5154773171250677, "learning_rate": 1.8762272345525904e-06, "loss": 0.07428665161132812, "step": 143920 }, { "epoch": 1.244476917622848, "grad_norm": 3.5148304633599925, "learning_rate": 1.8760383377478216e-06, "loss": 0.24384117126464844, "step": 143925 }, { "epoch": 1.2445201511443913, "grad_norm": 7.265822858515902, "learning_rate": 1.875849446126665e-06, "loss": 0.04693737030029297, "step": 143930 }, { "epoch": 1.2445633846659345, "grad_norm": 3.371459768014417, "learning_rate": 1.8756605596899916e-06, "loss": 0.05079193115234375, "step": 143935 }, { "epoch": 1.2446066181874778, "grad_norm": 37.07913318443366, "learning_rate": 1.8754716784386718e-06, "loss": 0.11187248229980469, "step": 143940 }, { "epoch": 1.244649851709021, "grad_norm": 1.4731268830405457, "learning_rate": 1.875282802373577e-06, "loss": 0.06718330383300782, "step": 143945 }, { "epoch": 1.2446930852305644, "grad_norm": 3.5332489990910827, "learning_rate": 1.8750939314955784e-06, "loss": 0.06933708190917968, "step": 143950 }, { "epoch": 1.2447363187521077, "grad_norm": 44.38955133495396, "learning_rate": 1.8749050658055481e-06, "loss": 0.22741012573242186, "step": 143955 }, { "epoch": 1.244779552273651, "grad_norm": 2.7072193449743187, "learning_rate": 1.874716205304356e-06, "loss": 0.0731201171875, "step": 143960 }, { "epoch": 1.2448227857951941, "grad_norm": 1.1228517860027605, "learning_rate": 1.8745273499928732e-06, "loss": 0.16268558502197267, "step": 143965 }, { "epoch": 1.2448660193167373, "grad_norm": 5.775689185342435, "learning_rate": 1.8743384998719707e-06, "loss": 0.081072998046875, "step": 143970 }, { "epoch": 1.2449092528382808, "grad_norm": 2.7206825279673943, "learning_rate": 1.8741496549425193e-06, "loss": 0.096636962890625, "step": 143975 }, { "epoch": 1.244952486359824, "grad_norm": 1.7996239850805429, "learning_rate": 1.8739608152053905e-06, "loss": 0.028188705444335938, "step": 143980 }, { "epoch": 1.2449957198813673, "grad_norm": 1.6744546994816236, "learning_rate": 1.8737719806614548e-06, "loss": 0.1083292007446289, "step": 143985 }, { "epoch": 1.2450389534029105, "grad_norm": 33.495942698800526, "learning_rate": 1.8735831513115837e-06, "loss": 0.09314117431640626, "step": 143990 }, { "epoch": 1.2450821869244537, "grad_norm": 10.943367983267368, "learning_rate": 1.8733943271566477e-06, "loss": 0.08991012573242188, "step": 143995 }, { "epoch": 1.245125420445997, "grad_norm": 0.169994506808726, "learning_rate": 1.8732055081975166e-06, "loss": 0.16624069213867188, "step": 144000 }, { "epoch": 1.2451686539675402, "grad_norm": 28.279318509028638, "learning_rate": 1.873016694435063e-06, "loss": 0.19463653564453126, "step": 144005 }, { "epoch": 1.2452118874890836, "grad_norm": 18.397170375213012, "learning_rate": 1.8728278858701554e-06, "loss": 0.18786392211914063, "step": 144010 }, { "epoch": 1.2452551210106269, "grad_norm": 6.6051132491899285, "learning_rate": 1.8726390825036674e-06, "loss": 0.013541412353515626, "step": 144015 }, { "epoch": 1.24529835453217, "grad_norm": 0.35734360331394477, "learning_rate": 1.8724502843364683e-06, "loss": 0.04355144500732422, "step": 144020 }, { "epoch": 1.2453415880537133, "grad_norm": 41.090522432870394, "learning_rate": 1.8722614913694288e-06, "loss": 0.1020315170288086, "step": 144025 }, { "epoch": 1.2453848215752565, "grad_norm": 2.105768628848896, "learning_rate": 1.8720727036034189e-06, "loss": 0.048274803161621097, "step": 144030 }, { "epoch": 1.2454280550967998, "grad_norm": 6.311362135710157, "learning_rate": 1.8718839210393108e-06, "loss": 0.0622894287109375, "step": 144035 }, { "epoch": 1.2454712886183432, "grad_norm": 11.225493090795531, "learning_rate": 1.8716951436779733e-06, "loss": 0.2508522033691406, "step": 144040 }, { "epoch": 1.2455145221398864, "grad_norm": 0.4960187536494859, "learning_rate": 1.8715063715202791e-06, "loss": 0.06763381958007812, "step": 144045 }, { "epoch": 1.2455577556614297, "grad_norm": 2.7038827460033388, "learning_rate": 1.871317604567098e-06, "loss": 0.09311141967773437, "step": 144050 }, { "epoch": 1.245600989182973, "grad_norm": 4.297042568261167, "learning_rate": 1.8711288428193e-06, "loss": 0.054691314697265625, "step": 144055 }, { "epoch": 1.2456442227045161, "grad_norm": 26.902590483138056, "learning_rate": 1.8709400862777558e-06, "loss": 0.10619430541992188, "step": 144060 }, { "epoch": 1.2456874562260594, "grad_norm": 0.4796947318528786, "learning_rate": 1.8707513349433364e-06, "loss": 0.04661216735839844, "step": 144065 }, { "epoch": 1.2457306897476026, "grad_norm": 10.08677741393925, "learning_rate": 1.8705625888169122e-06, "loss": 0.05618581771850586, "step": 144070 }, { "epoch": 1.245773923269146, "grad_norm": 26.185478381088572, "learning_rate": 1.870373847899352e-06, "loss": 0.1158172607421875, "step": 144075 }, { "epoch": 1.2458171567906893, "grad_norm": 16.80024733220045, "learning_rate": 1.87018511219153e-06, "loss": 0.028565406799316406, "step": 144080 }, { "epoch": 1.2458603903122325, "grad_norm": 3.0473600179694933, "learning_rate": 1.869996381694313e-06, "loss": 0.4326957702636719, "step": 144085 }, { "epoch": 1.2459036238337757, "grad_norm": 3.8066526974480728, "learning_rate": 1.8698076564085739e-06, "loss": 0.05582504272460938, "step": 144090 }, { "epoch": 1.245946857355319, "grad_norm": 44.513270360477534, "learning_rate": 1.8696189363351815e-06, "loss": 0.081488037109375, "step": 144095 }, { "epoch": 1.2459900908768622, "grad_norm": 40.30478709578383, "learning_rate": 1.8694302214750073e-06, "loss": 0.09736099243164062, "step": 144100 }, { "epoch": 1.2460333243984056, "grad_norm": 4.141780904221181, "learning_rate": 1.869241511828919e-06, "loss": 0.05194168090820313, "step": 144105 }, { "epoch": 1.2460765579199489, "grad_norm": 9.908246573867912, "learning_rate": 1.8690528073977906e-06, "loss": 0.03110198974609375, "step": 144110 }, { "epoch": 1.246119791441492, "grad_norm": 0.7582074261465299, "learning_rate": 1.8688641081824903e-06, "loss": 0.0440155029296875, "step": 144115 }, { "epoch": 1.2461630249630353, "grad_norm": 0.8598824558478956, "learning_rate": 1.8686754141838891e-06, "loss": 0.047365760803222655, "step": 144120 }, { "epoch": 1.2462062584845786, "grad_norm": 2.4654383061303404, "learning_rate": 1.8684867254028573e-06, "loss": 0.029864501953125, "step": 144125 }, { "epoch": 1.2462494920061218, "grad_norm": 0.20156750524298161, "learning_rate": 1.8682980418402645e-06, "loss": 0.025095367431640626, "step": 144130 }, { "epoch": 1.246292725527665, "grad_norm": 0.06400922333529556, "learning_rate": 1.86810936349698e-06, "loss": 0.17047157287597656, "step": 144135 }, { "epoch": 1.2463359590492085, "grad_norm": 2.5454740613154887, "learning_rate": 1.867920690373876e-06, "loss": 0.022336864471435548, "step": 144140 }, { "epoch": 1.2463791925707517, "grad_norm": 10.047089568488227, "learning_rate": 1.8677320224718213e-06, "loss": 0.2017181396484375, "step": 144145 }, { "epoch": 1.246422426092295, "grad_norm": 2.8728328309729534, "learning_rate": 1.8675433597916874e-06, "loss": 0.1191314697265625, "step": 144150 }, { "epoch": 1.2464656596138382, "grad_norm": 6.3895368705147515, "learning_rate": 1.867354702334343e-06, "loss": 0.01596832275390625, "step": 144155 }, { "epoch": 1.2465088931353814, "grad_norm": 1.1063562645887872, "learning_rate": 1.867166050100659e-06, "loss": 0.09647808074951172, "step": 144160 }, { "epoch": 1.2465521266569248, "grad_norm": 0.1856221475987971, "learning_rate": 1.8669774030915043e-06, "loss": 0.03148345947265625, "step": 144165 }, { "epoch": 1.246595360178468, "grad_norm": 12.072398002665905, "learning_rate": 1.8667887613077494e-06, "loss": 0.029401016235351563, "step": 144170 }, { "epoch": 1.2466385937000113, "grad_norm": 0.8213270534786775, "learning_rate": 1.866600124750265e-06, "loss": 0.10009841918945313, "step": 144175 }, { "epoch": 1.2466818272215545, "grad_norm": 0.8976214501356068, "learning_rate": 1.8664114934199214e-06, "loss": 0.0061130523681640625, "step": 144180 }, { "epoch": 1.2467250607430977, "grad_norm": 0.49151236505575574, "learning_rate": 1.8662228673175874e-06, "loss": 0.04033737182617188, "step": 144185 }, { "epoch": 1.246768294264641, "grad_norm": 0.3558771822933691, "learning_rate": 1.8660342464441336e-06, "loss": 0.004428863525390625, "step": 144190 }, { "epoch": 1.2468115277861842, "grad_norm": 29.169573566671193, "learning_rate": 1.8658456308004293e-06, "loss": 0.12291374206542968, "step": 144195 }, { "epoch": 1.2468547613077277, "grad_norm": 23.646793222256626, "learning_rate": 1.8656570203873443e-06, "loss": 0.057360076904296876, "step": 144200 }, { "epoch": 1.2468979948292709, "grad_norm": 2.031596155828425, "learning_rate": 1.865468415205749e-06, "loss": 0.30225143432617185, "step": 144205 }, { "epoch": 1.2469412283508141, "grad_norm": 0.9187968065877381, "learning_rate": 1.8652798152565139e-06, "loss": 0.017114639282226562, "step": 144210 }, { "epoch": 1.2469844618723573, "grad_norm": 8.27838272681348, "learning_rate": 1.8650912205405077e-06, "loss": 0.058658599853515625, "step": 144215 }, { "epoch": 1.2470276953939006, "grad_norm": 7.841593447216027, "learning_rate": 1.8649026310586007e-06, "loss": 0.0695648193359375, "step": 144220 }, { "epoch": 1.2470709289154438, "grad_norm": 18.3324122640714, "learning_rate": 1.8647140468116617e-06, "loss": 0.08863334655761719, "step": 144225 }, { "epoch": 1.2471141624369873, "grad_norm": 4.234010413482124, "learning_rate": 1.8645254678005616e-06, "loss": 0.0881439208984375, "step": 144230 }, { "epoch": 1.2471573959585305, "grad_norm": 1.2550960580889388, "learning_rate": 1.8643368940261694e-06, "loss": 0.2966911315917969, "step": 144235 }, { "epoch": 1.2472006294800737, "grad_norm": 12.807847532758055, "learning_rate": 1.8641483254893559e-06, "loss": 0.11475601196289062, "step": 144240 }, { "epoch": 1.247243863001617, "grad_norm": 0.7973657446511742, "learning_rate": 1.8639597621909898e-06, "loss": 0.16352195739746095, "step": 144245 }, { "epoch": 1.2472870965231602, "grad_norm": 13.035881285256796, "learning_rate": 1.8637712041319408e-06, "loss": 0.12618408203125, "step": 144250 }, { "epoch": 1.2473303300447034, "grad_norm": 0.07570206657974812, "learning_rate": 1.8635826513130783e-06, "loss": 0.04315395355224609, "step": 144255 }, { "epoch": 1.2473735635662466, "grad_norm": 24.804658425633892, "learning_rate": 1.8633941037352726e-06, "loss": 0.087115478515625, "step": 144260 }, { "epoch": 1.24741679708779, "grad_norm": 13.565013967701901, "learning_rate": 1.8632055613993917e-06, "loss": 0.03441963195800781, "step": 144265 }, { "epoch": 1.2474600306093333, "grad_norm": 1.4278626620305537, "learning_rate": 1.8630170243063074e-06, "loss": 0.11042709350585937, "step": 144270 }, { "epoch": 1.2475032641308765, "grad_norm": 5.897854357856947, "learning_rate": 1.8628284924568883e-06, "loss": 0.04324951171875, "step": 144275 }, { "epoch": 1.2475464976524198, "grad_norm": 0.850836510502972, "learning_rate": 1.8626399658520035e-06, "loss": 0.04083099365234375, "step": 144280 }, { "epoch": 1.247589731173963, "grad_norm": 2.4489003651608776, "learning_rate": 1.8624514444925221e-06, "loss": 0.031259918212890626, "step": 144285 }, { "epoch": 1.2476329646955062, "grad_norm": 1.2053568142421207, "learning_rate": 1.8622629283793148e-06, "loss": 0.033477783203125, "step": 144290 }, { "epoch": 1.2476761982170497, "grad_norm": 1.7157067463996647, "learning_rate": 1.8620744175132492e-06, "loss": 0.07240524291992187, "step": 144295 }, { "epoch": 1.247719431738593, "grad_norm": 0.7315289925705435, "learning_rate": 1.861885911895197e-06, "loss": 0.03455410003662109, "step": 144300 }, { "epoch": 1.2477626652601361, "grad_norm": 5.299317132203822, "learning_rate": 1.8616974115260264e-06, "loss": 0.04497451782226562, "step": 144305 }, { "epoch": 1.2478058987816794, "grad_norm": 1.5081673040259604, "learning_rate": 1.8615089164066063e-06, "loss": 0.027400970458984375, "step": 144310 }, { "epoch": 1.2478491323032226, "grad_norm": 1.0299399281718575, "learning_rate": 1.8613204265378067e-06, "loss": 0.035494232177734376, "step": 144315 }, { "epoch": 1.2478923658247658, "grad_norm": 0.6625682951793682, "learning_rate": 1.8611319419204972e-06, "loss": 0.07549057006835938, "step": 144320 }, { "epoch": 1.247935599346309, "grad_norm": 0.9770973641070813, "learning_rate": 1.8609434625555462e-06, "loss": 0.25223846435546876, "step": 144325 }, { "epoch": 1.2479788328678525, "grad_norm": 1.6120120838812784, "learning_rate": 1.860754988443822e-06, "loss": 0.015488433837890624, "step": 144330 }, { "epoch": 1.2480220663893957, "grad_norm": 2.2413493903902966, "learning_rate": 1.8605665195861966e-06, "loss": 0.13843116760253907, "step": 144335 }, { "epoch": 1.248065299910939, "grad_norm": 28.842042172019173, "learning_rate": 1.860378055983537e-06, "loss": 0.12743091583251953, "step": 144340 }, { "epoch": 1.2481085334324822, "grad_norm": 51.480308787186644, "learning_rate": 1.8601895976367139e-06, "loss": 0.066046142578125, "step": 144345 }, { "epoch": 1.2481517669540254, "grad_norm": 0.42348509425088926, "learning_rate": 1.8600011445465955e-06, "loss": 0.048523330688476564, "step": 144350 }, { "epoch": 1.2481950004755689, "grad_norm": 0.9318824637659576, "learning_rate": 1.8598126967140512e-06, "loss": 0.03381805419921875, "step": 144355 }, { "epoch": 1.248238233997112, "grad_norm": 1.0319362875114242, "learning_rate": 1.8596242541399485e-06, "loss": 0.04691314697265625, "step": 144360 }, { "epoch": 1.2482814675186553, "grad_norm": 0.23873289306319972, "learning_rate": 1.8594358168251594e-06, "loss": 0.01849517822265625, "step": 144365 }, { "epoch": 1.2483247010401985, "grad_norm": 3.5237733322093505, "learning_rate": 1.859247384770551e-06, "loss": 0.06623306274414062, "step": 144370 }, { "epoch": 1.2483679345617418, "grad_norm": 2.0900105694520787, "learning_rate": 1.8590589579769934e-06, "loss": 0.038958740234375, "step": 144375 }, { "epoch": 1.248411168083285, "grad_norm": 33.54407449150413, "learning_rate": 1.858870536445355e-06, "loss": 0.32868194580078125, "step": 144380 }, { "epoch": 1.2484544016048282, "grad_norm": 4.88835236561313, "learning_rate": 1.8586821201765048e-06, "loss": 0.052962875366210936, "step": 144385 }, { "epoch": 1.2484976351263715, "grad_norm": 0.1802561123894682, "learning_rate": 1.8584937091713105e-06, "loss": 0.22855682373046876, "step": 144390 }, { "epoch": 1.248540868647915, "grad_norm": 7.569006932092989, "learning_rate": 1.858305303430644e-06, "loss": 0.0816802978515625, "step": 144395 }, { "epoch": 1.2485841021694581, "grad_norm": 4.1615802554404615, "learning_rate": 1.8581169029553716e-06, "loss": 0.0896066665649414, "step": 144400 }, { "epoch": 1.2486273356910014, "grad_norm": 0.14371128068051553, "learning_rate": 1.8579285077463637e-06, "loss": 0.0316436767578125, "step": 144405 }, { "epoch": 1.2486705692125446, "grad_norm": 16.490481692972203, "learning_rate": 1.857740117804489e-06, "loss": 0.07532157897949218, "step": 144410 }, { "epoch": 1.2487138027340878, "grad_norm": 0.806867626775556, "learning_rate": 1.8575517331306156e-06, "loss": 0.01005706787109375, "step": 144415 }, { "epoch": 1.2487570362556313, "grad_norm": 4.586638773049821, "learning_rate": 1.8573633537256122e-06, "loss": 0.1294321060180664, "step": 144420 }, { "epoch": 1.2488002697771745, "grad_norm": 2.2398119328579926, "learning_rate": 1.8571749795903478e-06, "loss": 0.13186492919921874, "step": 144425 }, { "epoch": 1.2488435032987177, "grad_norm": 4.540463617333346, "learning_rate": 1.8569866107256918e-06, "loss": 0.06487054824829101, "step": 144430 }, { "epoch": 1.248886736820261, "grad_norm": 26.363822093836536, "learning_rate": 1.8567982471325128e-06, "loss": 0.075909423828125, "step": 144435 }, { "epoch": 1.2489299703418042, "grad_norm": 0.9631082973547425, "learning_rate": 1.8566098888116795e-06, "loss": 0.10284881591796875, "step": 144440 }, { "epoch": 1.2489732038633474, "grad_norm": 0.36315893415506023, "learning_rate": 1.8564215357640601e-06, "loss": 0.07418098449707031, "step": 144445 }, { "epoch": 1.2490164373848907, "grad_norm": 0.4425052910234189, "learning_rate": 1.8562331879905232e-06, "loss": 0.015264892578125, "step": 144450 }, { "epoch": 1.249059670906434, "grad_norm": 27.240822971389605, "learning_rate": 1.8560448454919381e-06, "loss": 0.04448890686035156, "step": 144455 }, { "epoch": 1.2491029044279773, "grad_norm": 1.4198388528468084, "learning_rate": 1.8558565082691723e-06, "loss": 0.034723663330078126, "step": 144460 }, { "epoch": 1.2491461379495206, "grad_norm": 0.6981265021585314, "learning_rate": 1.8556681763230965e-06, "loss": 0.08846626281738282, "step": 144465 }, { "epoch": 1.2491893714710638, "grad_norm": 114.96248093984038, "learning_rate": 1.8554798496545772e-06, "loss": 0.12362747192382813, "step": 144470 }, { "epoch": 1.249232604992607, "grad_norm": 0.9139932227568247, "learning_rate": 1.8552915282644844e-06, "loss": 0.020003509521484376, "step": 144475 }, { "epoch": 1.2492758385141502, "grad_norm": 20.136258691530774, "learning_rate": 1.8551032121536853e-06, "loss": 0.09676132202148438, "step": 144480 }, { "epoch": 1.2493190720356937, "grad_norm": 0.6149549355964333, "learning_rate": 1.8549149013230477e-06, "loss": 0.07790336608886719, "step": 144485 }, { "epoch": 1.249362305557237, "grad_norm": 2.1376155255361864, "learning_rate": 1.8547265957734432e-06, "loss": 0.04598731994628906, "step": 144490 }, { "epoch": 1.2494055390787802, "grad_norm": 35.80900732387384, "learning_rate": 1.8545382955057385e-06, "loss": 0.062459564208984374, "step": 144495 }, { "epoch": 1.2494487726003234, "grad_norm": 0.2577321493165733, "learning_rate": 1.8543500005208017e-06, "loss": 0.15926971435546874, "step": 144500 }, { "epoch": 1.2494920061218666, "grad_norm": 10.274179750482512, "learning_rate": 1.8541617108195014e-06, "loss": 0.08188743591308593, "step": 144505 }, { "epoch": 1.2495352396434098, "grad_norm": 0.03305898067116628, "learning_rate": 1.8539734264027056e-06, "loss": 0.021856117248535156, "step": 144510 }, { "epoch": 1.249578473164953, "grad_norm": 1.4411043967315098, "learning_rate": 1.853785147271284e-06, "loss": 0.006368446350097656, "step": 144515 }, { "epoch": 1.2496217066864965, "grad_norm": 14.67489427871742, "learning_rate": 1.8535968734261026e-06, "loss": 0.08130950927734375, "step": 144520 }, { "epoch": 1.2496649402080398, "grad_norm": 10.764248629142514, "learning_rate": 1.853408604868032e-06, "loss": 0.043284034729003905, "step": 144525 }, { "epoch": 1.249708173729583, "grad_norm": 0.08382057343313794, "learning_rate": 1.85322034159794e-06, "loss": 0.054981231689453125, "step": 144530 }, { "epoch": 1.2497514072511262, "grad_norm": 2.430260282739557, "learning_rate": 1.8530320836166935e-06, "loss": 0.015727996826171875, "step": 144535 }, { "epoch": 1.2497946407726694, "grad_norm": 1.8263494218075498, "learning_rate": 1.8528438309251625e-06, "loss": 0.027157974243164063, "step": 144540 }, { "epoch": 1.2498378742942127, "grad_norm": 0.5634605573636431, "learning_rate": 1.8526555835242145e-06, "loss": 0.07698078155517578, "step": 144545 }, { "epoch": 1.2498811078157561, "grad_norm": 23.139880838195154, "learning_rate": 1.852467341414716e-06, "loss": 0.10675506591796875, "step": 144550 }, { "epoch": 1.2499243413372993, "grad_norm": 0.6385220636485158, "learning_rate": 1.8522791045975382e-06, "loss": 0.0957906723022461, "step": 144555 }, { "epoch": 1.2499675748588426, "grad_norm": 0.06630120822431722, "learning_rate": 1.8520908730735477e-06, "loss": 0.054715728759765624, "step": 144560 }, { "epoch": 1.2500108083803858, "grad_norm": 7.3473193006747275, "learning_rate": 1.8519026468436123e-06, "loss": 0.045513916015625, "step": 144565 }, { "epoch": 1.250054041901929, "grad_norm": 0.1986087653593587, "learning_rate": 1.8517144259086007e-06, "loss": 0.0558990478515625, "step": 144570 }, { "epoch": 1.2500972754234723, "grad_norm": 2.4502490588648542, "learning_rate": 1.8515262102693808e-06, "loss": 0.14252166748046874, "step": 144575 }, { "epoch": 1.2501405089450155, "grad_norm": 0.26169813195406705, "learning_rate": 1.8513379999268193e-06, "loss": 0.07008819580078125, "step": 144580 }, { "epoch": 1.250183742466559, "grad_norm": 66.90979287962335, "learning_rate": 1.8511497948817867e-06, "loss": 0.1047607421875, "step": 144585 }, { "epoch": 1.2502269759881022, "grad_norm": 2.4256454409857593, "learning_rate": 1.8509615951351497e-06, "loss": 0.02422027587890625, "step": 144590 }, { "epoch": 1.2502702095096454, "grad_norm": 1.8199491089319226, "learning_rate": 1.8507734006877755e-06, "loss": 0.07796859741210938, "step": 144595 }, { "epoch": 1.2503134430311886, "grad_norm": 10.722258838439997, "learning_rate": 1.8505852115405338e-06, "loss": 0.06685638427734375, "step": 144600 }, { "epoch": 1.2503566765527319, "grad_norm": 33.65273161488361, "learning_rate": 1.8503970276942911e-06, "loss": 0.31460227966308596, "step": 144605 }, { "epoch": 1.2503999100742753, "grad_norm": 1.7376142425171897, "learning_rate": 1.8502088491499163e-06, "loss": 0.01306610107421875, "step": 144610 }, { "epoch": 1.2504431435958185, "grad_norm": 7.12714499138025, "learning_rate": 1.850020675908275e-06, "loss": 0.1157745361328125, "step": 144615 }, { "epoch": 1.2504863771173618, "grad_norm": 4.450230246642004, "learning_rate": 1.8498325079702385e-06, "loss": 0.08044548034667968, "step": 144620 }, { "epoch": 1.250529610638905, "grad_norm": 12.416674053850624, "learning_rate": 1.8496443453366715e-06, "loss": 0.10916252136230468, "step": 144625 }, { "epoch": 1.2505728441604482, "grad_norm": 5.761936417729242, "learning_rate": 1.8494561880084442e-06, "loss": 0.2720973968505859, "step": 144630 }, { "epoch": 1.2506160776819915, "grad_norm": 1.035750065959412, "learning_rate": 1.8492680359864237e-06, "loss": 0.20843029022216797, "step": 144635 }, { "epoch": 1.2506593112035347, "grad_norm": 0.6833553217499875, "learning_rate": 1.8490798892714764e-06, "loss": 0.023195457458496094, "step": 144640 }, { "epoch": 1.250702544725078, "grad_norm": 0.5915752600617009, "learning_rate": 1.84889174786447e-06, "loss": 0.1284393310546875, "step": 144645 }, { "epoch": 1.2507457782466214, "grad_norm": 0.8715174104442939, "learning_rate": 1.848703611766275e-06, "loss": 0.03132781982421875, "step": 144650 }, { "epoch": 1.2507890117681646, "grad_norm": 0.11453923364572523, "learning_rate": 1.848515480977756e-06, "loss": 0.012430191040039062, "step": 144655 }, { "epoch": 1.2508322452897078, "grad_norm": 12.533684509531579, "learning_rate": 1.8483273554997824e-06, "loss": 0.15328006744384765, "step": 144660 }, { "epoch": 1.250875478811251, "grad_norm": 1.27917641415255, "learning_rate": 1.8481392353332218e-06, "loss": 0.08342742919921875, "step": 144665 }, { "epoch": 1.2509187123327943, "grad_norm": 5.296924579018619, "learning_rate": 1.847951120478941e-06, "loss": 0.42342147827148435, "step": 144670 }, { "epoch": 1.2509619458543377, "grad_norm": 2.243601466675744, "learning_rate": 1.8477630109378071e-06, "loss": 0.1531665802001953, "step": 144675 }, { "epoch": 1.251005179375881, "grad_norm": 1.3610461580732174, "learning_rate": 1.8475749067106883e-06, "loss": 0.08317794799804687, "step": 144680 }, { "epoch": 1.2510484128974242, "grad_norm": 0.6243052372727773, "learning_rate": 1.8473868077984526e-06, "loss": 0.09234695434570313, "step": 144685 }, { "epoch": 1.2510916464189674, "grad_norm": 3.3569781334194286, "learning_rate": 1.847198714201967e-06, "loss": 0.014739418029785156, "step": 144690 }, { "epoch": 1.2511348799405106, "grad_norm": 0.4032569699641144, "learning_rate": 1.8470106259221e-06, "loss": 0.0396575927734375, "step": 144695 }, { "epoch": 1.2511781134620539, "grad_norm": 10.981377304286944, "learning_rate": 1.8468225429597174e-06, "loss": 0.026558303833007814, "step": 144700 }, { "epoch": 1.251221346983597, "grad_norm": 0.13837570766029392, "learning_rate": 1.8466344653156873e-06, "loss": 0.06451168060302734, "step": 144705 }, { "epoch": 1.2512645805051403, "grad_norm": 6.325486228845072, "learning_rate": 1.846446392990876e-06, "loss": 0.10819511413574219, "step": 144710 }, { "epoch": 1.2513078140266838, "grad_norm": 54.8921842465737, "learning_rate": 1.8462583259861534e-06, "loss": 0.21709098815917968, "step": 144715 }, { "epoch": 1.251351047548227, "grad_norm": 7.062142806173703, "learning_rate": 1.8460702643023856e-06, "loss": 0.0764739990234375, "step": 144720 }, { "epoch": 1.2513942810697702, "grad_norm": 2.3759482138259065, "learning_rate": 1.8458822079404396e-06, "loss": 0.033098602294921876, "step": 144725 }, { "epoch": 1.2514375145913135, "grad_norm": 9.645595770485855, "learning_rate": 1.8456941569011829e-06, "loss": 0.058854293823242185, "step": 144730 }, { "epoch": 1.251480748112857, "grad_norm": 0.1756513483217183, "learning_rate": 1.8455061111854822e-06, "loss": 0.059710693359375, "step": 144735 }, { "epoch": 1.2515239816344002, "grad_norm": 3.1321013034940544, "learning_rate": 1.8453180707942044e-06, "loss": 0.029777145385742186, "step": 144740 }, { "epoch": 1.2515672151559434, "grad_norm": 0.024636748728085356, "learning_rate": 1.8451300357282193e-06, "loss": 0.04353752136230469, "step": 144745 }, { "epoch": 1.2516104486774866, "grad_norm": 17.23184977281755, "learning_rate": 1.8449420059883923e-06, "loss": 0.06480846405029297, "step": 144750 }, { "epoch": 1.2516536821990298, "grad_norm": 5.035079340555492, "learning_rate": 1.8447539815755906e-06, "loss": 0.04695205688476563, "step": 144755 }, { "epoch": 1.251696915720573, "grad_norm": 0.22076287426952565, "learning_rate": 1.844565962490681e-06, "loss": 0.0190155029296875, "step": 144760 }, { "epoch": 1.2517401492421163, "grad_norm": 5.78774907989541, "learning_rate": 1.8443779487345314e-06, "loss": 0.046550750732421875, "step": 144765 }, { "epoch": 1.2517833827636595, "grad_norm": 0.09090381254693883, "learning_rate": 1.8441899403080087e-06, "loss": 0.02401275634765625, "step": 144770 }, { "epoch": 1.251826616285203, "grad_norm": 3.6136925228038255, "learning_rate": 1.8440019372119789e-06, "loss": 0.06851348876953126, "step": 144775 }, { "epoch": 1.2518698498067462, "grad_norm": 0.6842572938556206, "learning_rate": 1.8438139394473112e-06, "loss": 0.057161712646484376, "step": 144780 }, { "epoch": 1.2519130833282894, "grad_norm": 1.765108317763091, "learning_rate": 1.8436259470148712e-06, "loss": 0.0246917724609375, "step": 144785 }, { "epoch": 1.2519563168498327, "grad_norm": 2.377409514896901, "learning_rate": 1.8434379599155257e-06, "loss": 0.021483993530273436, "step": 144790 }, { "epoch": 1.251999550371376, "grad_norm": 11.499154063459905, "learning_rate": 1.8432499781501425e-06, "loss": 0.071099853515625, "step": 144795 }, { "epoch": 1.2520427838929193, "grad_norm": 0.3027914627539151, "learning_rate": 1.8430620017195887e-06, "loss": 0.05674037933349609, "step": 144800 }, { "epoch": 1.2520860174144626, "grad_norm": 4.420132602471957, "learning_rate": 1.842874030624729e-06, "loss": 0.017681884765625, "step": 144805 }, { "epoch": 1.2521292509360058, "grad_norm": 0.05580177513247806, "learning_rate": 1.8426860648664336e-06, "loss": 0.029229736328125, "step": 144810 }, { "epoch": 1.252172484457549, "grad_norm": 17.110864260534054, "learning_rate": 1.8424981044455675e-06, "loss": 0.060874557495117186, "step": 144815 }, { "epoch": 1.2522157179790923, "grad_norm": 3.1109413782463684, "learning_rate": 1.8423101493629974e-06, "loss": 0.020877838134765625, "step": 144820 }, { "epoch": 1.2522589515006355, "grad_norm": 33.18175276513681, "learning_rate": 1.8421221996195913e-06, "loss": 0.14734649658203125, "step": 144825 }, { "epoch": 1.2523021850221787, "grad_norm": 0.6117050603594052, "learning_rate": 1.8419342552162153e-06, "loss": 0.011572265625, "step": 144830 }, { "epoch": 1.252345418543722, "grad_norm": 0.5432280703666628, "learning_rate": 1.8417463161537347e-06, "loss": 0.11010971069335937, "step": 144835 }, { "epoch": 1.2523886520652654, "grad_norm": 0.9249043824167156, "learning_rate": 1.841558382433019e-06, "loss": 0.014890289306640625, "step": 144840 }, { "epoch": 1.2524318855868086, "grad_norm": 14.227890030426547, "learning_rate": 1.841370454054934e-06, "loss": 0.0704071044921875, "step": 144845 }, { "epoch": 1.2524751191083519, "grad_norm": 1.337429750127262, "learning_rate": 1.8411825310203452e-06, "loss": 0.012320709228515626, "step": 144850 }, { "epoch": 1.252518352629895, "grad_norm": 7.06268156742184, "learning_rate": 1.840994613330121e-06, "loss": 0.05848541259765625, "step": 144855 }, { "epoch": 1.2525615861514383, "grad_norm": 26.62604257480157, "learning_rate": 1.8408067009851272e-06, "loss": 0.15652923583984374, "step": 144860 }, { "epoch": 1.2526048196729818, "grad_norm": 7.892502242697045, "learning_rate": 1.8406187939862304e-06, "loss": 0.3135875701904297, "step": 144865 }, { "epoch": 1.252648053194525, "grad_norm": 13.401080680714587, "learning_rate": 1.8404308923342958e-06, "loss": 0.05611763000488281, "step": 144870 }, { "epoch": 1.2526912867160682, "grad_norm": 0.653715058896821, "learning_rate": 1.8402429960301928e-06, "loss": 0.050969696044921874, "step": 144875 }, { "epoch": 1.2527345202376114, "grad_norm": 4.774180035728409, "learning_rate": 1.8400551050747865e-06, "loss": 0.05746612548828125, "step": 144880 }, { "epoch": 1.2527777537591547, "grad_norm": 16.14018137084361, "learning_rate": 1.8398672194689436e-06, "loss": 0.28812923431396487, "step": 144885 }, { "epoch": 1.252820987280698, "grad_norm": 3.4483120863104766, "learning_rate": 1.839679339213531e-06, "loss": 0.015146636962890625, "step": 144890 }, { "epoch": 1.2528642208022411, "grad_norm": 1.031142350407182, "learning_rate": 1.8394914643094142e-06, "loss": 0.0757720947265625, "step": 144895 }, { "epoch": 1.2529074543237844, "grad_norm": 0.6912533607517974, "learning_rate": 1.8393035947574597e-06, "loss": 0.039571380615234374, "step": 144900 }, { "epoch": 1.2529506878453278, "grad_norm": 1.9634421007916545, "learning_rate": 1.839115730558535e-06, "loss": 0.0340728759765625, "step": 144905 }, { "epoch": 1.252993921366871, "grad_norm": 7.115724238187684, "learning_rate": 1.838927871713506e-06, "loss": 0.0336639404296875, "step": 144910 }, { "epoch": 1.2530371548884143, "grad_norm": 2.1593996143888035, "learning_rate": 1.8387400182232393e-06, "loss": 0.033367919921875, "step": 144915 }, { "epoch": 1.2530803884099575, "grad_norm": 2.2982257015367646, "learning_rate": 1.838552170088601e-06, "loss": 0.04677276611328125, "step": 144920 }, { "epoch": 1.2531236219315007, "grad_norm": 7.168824201884611, "learning_rate": 1.8383643273104574e-06, "loss": 0.0390380859375, "step": 144925 }, { "epoch": 1.2531668554530442, "grad_norm": 14.291234761515948, "learning_rate": 1.8381764898896738e-06, "loss": 0.11727981567382813, "step": 144930 }, { "epoch": 1.2532100889745874, "grad_norm": 17.52654450453024, "learning_rate": 1.8379886578271182e-06, "loss": 0.29922027587890626, "step": 144935 }, { "epoch": 1.2532533224961306, "grad_norm": 3.742633403432674, "learning_rate": 1.8378008311236568e-06, "loss": 0.038202667236328126, "step": 144940 }, { "epoch": 1.2532965560176739, "grad_norm": 17.054134674032653, "learning_rate": 1.8376130097801553e-06, "loss": 0.16960983276367186, "step": 144945 }, { "epoch": 1.253339789539217, "grad_norm": 0.4061608898679595, "learning_rate": 1.8374251937974798e-06, "loss": 0.091107177734375, "step": 144950 }, { "epoch": 1.2533830230607603, "grad_norm": 0.14741373636091917, "learning_rate": 1.8372373831764962e-06, "loss": 0.11493682861328125, "step": 144955 }, { "epoch": 1.2534262565823036, "grad_norm": 0.15480532009049572, "learning_rate": 1.8370495779180716e-06, "loss": 0.024454498291015626, "step": 144960 }, { "epoch": 1.2534694901038468, "grad_norm": 29.243799870040824, "learning_rate": 1.83686177802307e-06, "loss": 0.1297637939453125, "step": 144965 }, { "epoch": 1.2535127236253902, "grad_norm": 13.333403237437137, "learning_rate": 1.8366739834923607e-06, "loss": 0.07460174560546876, "step": 144970 }, { "epoch": 1.2535559571469335, "grad_norm": 25.405121355689666, "learning_rate": 1.8364861943268084e-06, "loss": 0.17673225402832032, "step": 144975 }, { "epoch": 1.2535991906684767, "grad_norm": 2.5568510659326873, "learning_rate": 1.8362984105272783e-06, "loss": 0.015276336669921875, "step": 144980 }, { "epoch": 1.25364242419002, "grad_norm": 4.720173969075239, "learning_rate": 1.836110632094637e-06, "loss": 0.025723934173583984, "step": 144985 }, { "epoch": 1.2536856577115634, "grad_norm": 0.5690550583366424, "learning_rate": 1.835922859029751e-06, "loss": 0.021776199340820312, "step": 144990 }, { "epoch": 1.2537288912331066, "grad_norm": 24.66711086488925, "learning_rate": 1.8357350913334847e-06, "loss": 0.11648025512695312, "step": 144995 }, { "epoch": 1.2537721247546498, "grad_norm": 0.495220021242697, "learning_rate": 1.835547329006707e-06, "loss": 0.09894180297851562, "step": 145000 }, { "epoch": 1.253815358276193, "grad_norm": 0.9477553584189375, "learning_rate": 1.8353595720502815e-06, "loss": 0.04881439208984375, "step": 145005 }, { "epoch": 1.2538585917977363, "grad_norm": 0.2586721537223803, "learning_rate": 1.835171820465075e-06, "loss": 0.03430328369140625, "step": 145010 }, { "epoch": 1.2539018253192795, "grad_norm": 11.66273673578511, "learning_rate": 1.8349840742519525e-06, "loss": 0.06890478134155273, "step": 145015 }, { "epoch": 1.2539450588408227, "grad_norm": 14.29598845243818, "learning_rate": 1.8347963334117808e-06, "loss": 0.07486686706542969, "step": 145020 }, { "epoch": 1.253988292362366, "grad_norm": 0.4867685923818132, "learning_rate": 1.8346085979454261e-06, "loss": 0.017458343505859376, "step": 145025 }, { "epoch": 1.2540315258839094, "grad_norm": 1.6011046075156612, "learning_rate": 1.8344208678537518e-06, "loss": 0.0062885284423828125, "step": 145030 }, { "epoch": 1.2540747594054527, "grad_norm": 1.5992989379201306, "learning_rate": 1.834233143137627e-06, "loss": 0.04792232513427734, "step": 145035 }, { "epoch": 1.2541179929269959, "grad_norm": 12.588706816997572, "learning_rate": 1.8340454237979157e-06, "loss": 0.08206100463867187, "step": 145040 }, { "epoch": 1.2541612264485391, "grad_norm": 0.6108537167694098, "learning_rate": 1.8338577098354836e-06, "loss": 0.03790969848632812, "step": 145045 }, { "epoch": 1.2542044599700823, "grad_norm": 2.933985728014206, "learning_rate": 1.833670001251197e-06, "loss": 0.23816986083984376, "step": 145050 }, { "epoch": 1.2542476934916258, "grad_norm": 0.8031103404616894, "learning_rate": 1.8334822980459213e-06, "loss": 0.055667877197265625, "step": 145055 }, { "epoch": 1.254290927013169, "grad_norm": 2.1157182011638773, "learning_rate": 1.833294600220521e-06, "loss": 0.04668922424316406, "step": 145060 }, { "epoch": 1.2543341605347123, "grad_norm": 13.576580881954953, "learning_rate": 1.8331069077758642e-06, "loss": 0.12845115661621093, "step": 145065 }, { "epoch": 1.2543773940562555, "grad_norm": 1.677140731340758, "learning_rate": 1.8329192207128154e-06, "loss": 0.0225311279296875, "step": 145070 }, { "epoch": 1.2544206275777987, "grad_norm": 1.1028568245789503, "learning_rate": 1.8327315390322391e-06, "loss": 0.052910995483398435, "step": 145075 }, { "epoch": 1.254463861099342, "grad_norm": 0.19161443574525586, "learning_rate": 1.8325438627350026e-06, "loss": 0.09205322265625, "step": 145080 }, { "epoch": 1.2545070946208852, "grad_norm": 11.449177879567808, "learning_rate": 1.8323561918219705e-06, "loss": 0.0938018798828125, "step": 145085 }, { "epoch": 1.2545503281424284, "grad_norm": 1.6595969259641772, "learning_rate": 1.8321685262940072e-06, "loss": 0.06505966186523438, "step": 145090 }, { "epoch": 1.2545935616639718, "grad_norm": 0.7546320508404597, "learning_rate": 1.8319808661519808e-06, "loss": 0.014367294311523438, "step": 145095 }, { "epoch": 1.254636795185515, "grad_norm": 2.024004715802073, "learning_rate": 1.8317932113967555e-06, "loss": 0.006931686401367187, "step": 145100 }, { "epoch": 1.2546800287070583, "grad_norm": 4.057659452908047, "learning_rate": 1.831605562029196e-06, "loss": 0.012203598022460937, "step": 145105 }, { "epoch": 1.2547232622286015, "grad_norm": 2.0057656124602325, "learning_rate": 1.831417918050169e-06, "loss": 0.16327133178710937, "step": 145110 }, { "epoch": 1.2547664957501448, "grad_norm": 3.916391510466245, "learning_rate": 1.8312302794605398e-06, "loss": 0.04213981628417969, "step": 145115 }, { "epoch": 1.2548097292716882, "grad_norm": 1.250372961200809, "learning_rate": 1.8310426462611727e-06, "loss": 0.03748779296875, "step": 145120 }, { "epoch": 1.2548529627932314, "grad_norm": 1.2440147742454029, "learning_rate": 1.8308550184529324e-06, "loss": 0.01849365234375, "step": 145125 }, { "epoch": 1.2548961963147747, "grad_norm": 14.993593018812062, "learning_rate": 1.8306673960366867e-06, "loss": 0.079144287109375, "step": 145130 }, { "epoch": 1.254939429836318, "grad_norm": 9.819448454301138, "learning_rate": 1.8304797790132992e-06, "loss": 0.026779937744140624, "step": 145135 }, { "epoch": 1.2549826633578611, "grad_norm": 8.51872281081205, "learning_rate": 1.8302921673836362e-06, "loss": 0.08217391967773438, "step": 145140 }, { "epoch": 1.2550258968794044, "grad_norm": 13.442300261673912, "learning_rate": 1.8301045611485626e-06, "loss": 0.0942138671875, "step": 145145 }, { "epoch": 1.2550691304009476, "grad_norm": 0.9916230875248715, "learning_rate": 1.829916960308943e-06, "loss": 0.05126190185546875, "step": 145150 }, { "epoch": 1.2551123639224908, "grad_norm": 11.893432800574423, "learning_rate": 1.8297293648656419e-06, "loss": 0.040036773681640624, "step": 145155 }, { "epoch": 1.2551555974440343, "grad_norm": 26.492573435785054, "learning_rate": 1.829541774819526e-06, "loss": 0.0390472412109375, "step": 145160 }, { "epoch": 1.2551988309655775, "grad_norm": 15.730126850560998, "learning_rate": 1.829354190171461e-06, "loss": 0.059095001220703124, "step": 145165 }, { "epoch": 1.2552420644871207, "grad_norm": 5.3782589187412855, "learning_rate": 1.8291666109223105e-06, "loss": 0.043272781372070315, "step": 145170 }, { "epoch": 1.255285298008664, "grad_norm": 8.871436024605563, "learning_rate": 1.8289790370729406e-06, "loss": 0.08463363647460938, "step": 145175 }, { "epoch": 1.2553285315302072, "grad_norm": 0.059271822536162026, "learning_rate": 1.8287914686242154e-06, "loss": 0.04796943664550781, "step": 145180 }, { "epoch": 1.2553717650517506, "grad_norm": 20.515269017228846, "learning_rate": 1.828603905577e-06, "loss": 0.04215660095214844, "step": 145185 }, { "epoch": 1.2554149985732939, "grad_norm": 6.949390211138538, "learning_rate": 1.8284163479321598e-06, "loss": 0.041558837890625, "step": 145190 }, { "epoch": 1.255458232094837, "grad_norm": 34.01013476123989, "learning_rate": 1.8282287956905613e-06, "loss": 0.08961944580078125, "step": 145195 }, { "epoch": 1.2555014656163803, "grad_norm": 0.4672061625450012, "learning_rate": 1.828041248853067e-06, "loss": 0.02070770263671875, "step": 145200 }, { "epoch": 1.2555446991379235, "grad_norm": 0.7314756766101308, "learning_rate": 1.8278537074205438e-06, "loss": 0.068975830078125, "step": 145205 }, { "epoch": 1.2555879326594668, "grad_norm": 10.458324485165692, "learning_rate": 1.8276661713938545e-06, "loss": 0.0635009765625, "step": 145210 }, { "epoch": 1.25563116618101, "grad_norm": 0.18330409076923662, "learning_rate": 1.8274786407738661e-06, "loss": 0.15539512634277344, "step": 145215 }, { "epoch": 1.2556743997025532, "grad_norm": 0.14130847724918288, "learning_rate": 1.8272911155614417e-06, "loss": 0.028303909301757812, "step": 145220 }, { "epoch": 1.2557176332240967, "grad_norm": 0.1304678671459934, "learning_rate": 1.8271035957574478e-06, "loss": 0.1183807373046875, "step": 145225 }, { "epoch": 1.25576086674564, "grad_norm": 0.2592587328117445, "learning_rate": 1.8269160813627489e-06, "loss": 0.016732406616210938, "step": 145230 }, { "epoch": 1.2558041002671831, "grad_norm": 17.363891466282116, "learning_rate": 1.8267285723782088e-06, "loss": 0.061048698425292966, "step": 145235 }, { "epoch": 1.2558473337887264, "grad_norm": 18.68769633156723, "learning_rate": 1.8265410688046924e-06, "loss": 0.15571441650390624, "step": 145240 }, { "epoch": 1.2558905673102698, "grad_norm": 0.23539908424465078, "learning_rate": 1.826353570643066e-06, "loss": 0.11519870758056641, "step": 145245 }, { "epoch": 1.255933800831813, "grad_norm": 6.671060202396183, "learning_rate": 1.8261660778941918e-06, "loss": 0.024275588989257812, "step": 145250 }, { "epoch": 1.2559770343533563, "grad_norm": 0.8374891542123015, "learning_rate": 1.8259785905589367e-06, "loss": 0.090032958984375, "step": 145255 }, { "epoch": 1.2560202678748995, "grad_norm": 26.361067435573815, "learning_rate": 1.825791108638165e-06, "loss": 0.11554527282714844, "step": 145260 }, { "epoch": 1.2560635013964427, "grad_norm": 1.1127575507081762, "learning_rate": 1.8256036321327406e-06, "loss": 0.2046539306640625, "step": 145265 }, { "epoch": 1.256106734917986, "grad_norm": 6.936208057909277, "learning_rate": 1.8254161610435282e-06, "loss": 0.1311431884765625, "step": 145270 }, { "epoch": 1.2561499684395292, "grad_norm": 8.23025314956253, "learning_rate": 1.8252286953713932e-06, "loss": 0.02882804870605469, "step": 145275 }, { "epoch": 1.2561932019610724, "grad_norm": 5.7692663483813975, "learning_rate": 1.8250412351171987e-06, "loss": 0.03204803466796875, "step": 145280 }, { "epoch": 1.2562364354826159, "grad_norm": 4.7709363042771065, "learning_rate": 1.8248537802818112e-06, "loss": 0.10730628967285157, "step": 145285 }, { "epoch": 1.256279669004159, "grad_norm": 8.913853828317391, "learning_rate": 1.824666330866094e-06, "loss": 0.13713493347167968, "step": 145290 }, { "epoch": 1.2563229025257023, "grad_norm": 4.248399274178745, "learning_rate": 1.8244788868709122e-06, "loss": 0.04652557373046875, "step": 145295 }, { "epoch": 1.2563661360472456, "grad_norm": 2.0952941184523652, "learning_rate": 1.8242914482971291e-06, "loss": 0.106927490234375, "step": 145300 }, { "epoch": 1.2564093695687888, "grad_norm": 18.846345170443414, "learning_rate": 1.8241040151456106e-06, "loss": 0.06471710205078125, "step": 145305 }, { "epoch": 1.2564526030903322, "grad_norm": 3.6548144428135476, "learning_rate": 1.8239165874172208e-06, "loss": 0.01476593017578125, "step": 145310 }, { "epoch": 1.2564958366118755, "grad_norm": 10.471893008809863, "learning_rate": 1.8237291651128219e-06, "loss": 0.2602424621582031, "step": 145315 }, { "epoch": 1.2565390701334187, "grad_norm": 19.790396347346572, "learning_rate": 1.8235417482332818e-06, "loss": 0.08047027587890625, "step": 145320 }, { "epoch": 1.256582303654962, "grad_norm": 7.877189086685989, "learning_rate": 1.823354336779463e-06, "loss": 0.011563491821289063, "step": 145325 }, { "epoch": 1.2566255371765052, "grad_norm": 3.654709795314228, "learning_rate": 1.8231669307522297e-06, "loss": 0.04932327270507812, "step": 145330 }, { "epoch": 1.2566687706980484, "grad_norm": 45.13165511917178, "learning_rate": 1.8229795301524467e-06, "loss": 0.10594024658203124, "step": 145335 }, { "epoch": 1.2567120042195916, "grad_norm": 2.071870102710356, "learning_rate": 1.8227921349809785e-06, "loss": 0.05469512939453125, "step": 145340 }, { "epoch": 1.2567552377411348, "grad_norm": 0.54082661733129, "learning_rate": 1.8226047452386878e-06, "loss": 0.011983108520507813, "step": 145345 }, { "epoch": 1.2567984712626783, "grad_norm": 2.2055361599786143, "learning_rate": 1.822417360926441e-06, "loss": 0.13167953491210938, "step": 145350 }, { "epoch": 1.2568417047842215, "grad_norm": 3.023515160759982, "learning_rate": 1.8222299820451005e-06, "loss": 0.048050308227539064, "step": 145355 }, { "epoch": 1.2568849383057648, "grad_norm": 0.43896883351268523, "learning_rate": 1.822042608595532e-06, "loss": 0.052001953125, "step": 145360 }, { "epoch": 1.256928171827308, "grad_norm": 2.064045499519427, "learning_rate": 1.821855240578599e-06, "loss": 0.01320037841796875, "step": 145365 }, { "epoch": 1.2569714053488512, "grad_norm": 6.059387638763989, "learning_rate": 1.8216678779951656e-06, "loss": 0.014241981506347656, "step": 145370 }, { "epoch": 1.2570146388703947, "grad_norm": 0.46125183503281825, "learning_rate": 1.8214805208460954e-06, "loss": 0.014134597778320313, "step": 145375 }, { "epoch": 1.257057872391938, "grad_norm": 0.7795869306458103, "learning_rate": 1.8212931691322523e-06, "loss": 0.06966609954833984, "step": 145380 }, { "epoch": 1.2571011059134811, "grad_norm": 1.2530234005085532, "learning_rate": 1.8211058228545012e-06, "loss": 0.022672653198242188, "step": 145385 }, { "epoch": 1.2571443394350243, "grad_norm": 0.5121375145884186, "learning_rate": 1.8209184820137064e-06, "loss": 0.010306739807128906, "step": 145390 }, { "epoch": 1.2571875729565676, "grad_norm": 0.261356651678529, "learning_rate": 1.8207311466107316e-06, "loss": 0.03697891235351562, "step": 145395 }, { "epoch": 1.2572308064781108, "grad_norm": 10.731591774384405, "learning_rate": 1.8205438166464406e-06, "loss": 0.06182518005371094, "step": 145400 }, { "epoch": 1.257274039999654, "grad_norm": 1.269583504265063, "learning_rate": 1.8203564921216963e-06, "loss": 0.015375518798828125, "step": 145405 }, { "epoch": 1.2573172735211973, "grad_norm": 0.22378173200921406, "learning_rate": 1.820169173037364e-06, "loss": 0.0377044677734375, "step": 145410 }, { "epoch": 1.2573605070427407, "grad_norm": 6.799366027018792, "learning_rate": 1.8199818593943073e-06, "loss": 0.08825263977050782, "step": 145415 }, { "epoch": 1.257403740564284, "grad_norm": 7.2108740428680465, "learning_rate": 1.8197945511933903e-06, "loss": 0.016202926635742188, "step": 145420 }, { "epoch": 1.2574469740858272, "grad_norm": 9.000152331575665, "learning_rate": 1.8196072484354763e-06, "loss": 0.058800697326660156, "step": 145425 }, { "epoch": 1.2574902076073704, "grad_norm": 15.769642810026419, "learning_rate": 1.8194199511214297e-06, "loss": 0.05933456420898438, "step": 145430 }, { "epoch": 1.2575334411289136, "grad_norm": 1.482412289710381, "learning_rate": 1.8192326592521135e-06, "loss": 0.017682647705078124, "step": 145435 }, { "epoch": 1.257576674650457, "grad_norm": 1.909671830442739, "learning_rate": 1.819045372828392e-06, "loss": 0.0795928955078125, "step": 145440 }, { "epoch": 1.2576199081720003, "grad_norm": 1.6053984414711826, "learning_rate": 1.8188580918511283e-06, "loss": 0.06912918090820312, "step": 145445 }, { "epoch": 1.2576631416935435, "grad_norm": 2.3630928980402843, "learning_rate": 1.818670816321188e-06, "loss": 0.010525894165039063, "step": 145450 }, { "epoch": 1.2577063752150868, "grad_norm": 25.62108823099697, "learning_rate": 1.818483546239433e-06, "loss": 0.1382293701171875, "step": 145455 }, { "epoch": 1.25774960873663, "grad_norm": 0.36261656779152696, "learning_rate": 1.8182962816067278e-06, "loss": 0.05620994567871094, "step": 145460 }, { "epoch": 1.2577928422581732, "grad_norm": 0.46611251854204866, "learning_rate": 1.818109022423935e-06, "loss": 0.09724960327148438, "step": 145465 }, { "epoch": 1.2578360757797165, "grad_norm": 1.860493460993573, "learning_rate": 1.8179217686919198e-06, "loss": 0.015023612976074218, "step": 145470 }, { "epoch": 1.25787930930126, "grad_norm": 3.291483523104394, "learning_rate": 1.8177345204115437e-06, "loss": 0.11918182373046875, "step": 145475 }, { "epoch": 1.2579225428228031, "grad_norm": 11.655072069797813, "learning_rate": 1.8175472775836722e-06, "loss": 0.1745269775390625, "step": 145480 }, { "epoch": 1.2579657763443464, "grad_norm": 9.161367042618629, "learning_rate": 1.8173600402091687e-06, "loss": 0.03782634735107422, "step": 145485 }, { "epoch": 1.2580090098658896, "grad_norm": 0.6886400050165371, "learning_rate": 1.817172808288896e-06, "loss": 0.0459136962890625, "step": 145490 }, { "epoch": 1.2580522433874328, "grad_norm": 2.4104038257891, "learning_rate": 1.8169855818237171e-06, "loss": 0.04433135986328125, "step": 145495 }, { "epoch": 1.2580954769089763, "grad_norm": 0.9336275679331709, "learning_rate": 1.816798360814497e-06, "loss": 0.05842437744140625, "step": 145500 }, { "epoch": 1.2581387104305195, "grad_norm": 39.55879849028086, "learning_rate": 1.816611145262097e-06, "loss": 0.3375873565673828, "step": 145505 }, { "epoch": 1.2581819439520627, "grad_norm": 0.8708651485114821, "learning_rate": 1.8164239351673832e-06, "loss": 0.043438720703125, "step": 145510 }, { "epoch": 1.258225177473606, "grad_norm": 2.4496071806811166, "learning_rate": 1.816236730531217e-06, "loss": 0.0901031494140625, "step": 145515 }, { "epoch": 1.2582684109951492, "grad_norm": 9.920142331445929, "learning_rate": 1.8160495313544632e-06, "loss": 0.2791740417480469, "step": 145520 }, { "epoch": 1.2583116445166924, "grad_norm": 2.4843697144537833, "learning_rate": 1.8158623376379834e-06, "loss": 0.1591944694519043, "step": 145525 }, { "epoch": 1.2583548780382356, "grad_norm": 0.9549308137822491, "learning_rate": 1.8156751493826427e-06, "loss": 0.1404338836669922, "step": 145530 }, { "epoch": 1.2583981115597789, "grad_norm": 4.230781145777422, "learning_rate": 1.815487966589302e-06, "loss": 0.1192474365234375, "step": 145535 }, { "epoch": 1.2584413450813223, "grad_norm": 19.27569655466076, "learning_rate": 1.8153007892588277e-06, "loss": 0.2811130523681641, "step": 145540 }, { "epoch": 1.2584845786028656, "grad_norm": 31.40943750514157, "learning_rate": 1.815113617392081e-06, "loss": 0.2455230712890625, "step": 145545 }, { "epoch": 1.2585278121244088, "grad_norm": 106.58926530648831, "learning_rate": 1.8149264509899257e-06, "loss": 0.3501853942871094, "step": 145550 }, { "epoch": 1.258571045645952, "grad_norm": 5.410580000574859, "learning_rate": 1.8147392900532246e-06, "loss": 0.04122772216796875, "step": 145555 }, { "epoch": 1.2586142791674952, "grad_norm": 10.275556959009615, "learning_rate": 1.8145521345828414e-06, "loss": 0.0452423095703125, "step": 145560 }, { "epoch": 1.2586575126890387, "grad_norm": 27.90184907326545, "learning_rate": 1.8143649845796394e-06, "loss": 0.05352783203125, "step": 145565 }, { "epoch": 1.258700746210582, "grad_norm": 62.004682895619794, "learning_rate": 1.8141778400444798e-06, "loss": 0.34806365966796876, "step": 145570 }, { "epoch": 1.2587439797321252, "grad_norm": 5.024188077738675, "learning_rate": 1.8139907009782289e-06, "loss": 0.0502288818359375, "step": 145575 }, { "epoch": 1.2587872132536684, "grad_norm": 0.6538110401514049, "learning_rate": 1.8138035673817468e-06, "loss": 0.0214263916015625, "step": 145580 }, { "epoch": 1.2588304467752116, "grad_norm": 3.4435481706321647, "learning_rate": 1.8136164392558987e-06, "loss": 0.04081926345825195, "step": 145585 }, { "epoch": 1.2588736802967548, "grad_norm": 0.7088315826238453, "learning_rate": 1.8134293166015472e-06, "loss": 0.02412872314453125, "step": 145590 }, { "epoch": 1.258916913818298, "grad_norm": 7.137136127849041, "learning_rate": 1.8132421994195542e-06, "loss": 0.0885498046875, "step": 145595 }, { "epoch": 1.2589601473398413, "grad_norm": 41.26309987219476, "learning_rate": 1.8130550877107825e-06, "loss": 0.2864643096923828, "step": 145600 }, { "epoch": 1.2590033808613847, "grad_norm": 19.60486969208834, "learning_rate": 1.812867981476097e-06, "loss": 0.03691611289978027, "step": 145605 }, { "epoch": 1.259046614382928, "grad_norm": 0.17628718310306565, "learning_rate": 1.8126808807163585e-06, "loss": 0.03232765197753906, "step": 145610 }, { "epoch": 1.2590898479044712, "grad_norm": 0.29917240177346965, "learning_rate": 1.8124937854324321e-06, "loss": 0.0531219482421875, "step": 145615 }, { "epoch": 1.2591330814260144, "grad_norm": 27.79110465060658, "learning_rate": 1.8123066956251792e-06, "loss": 0.11842384338378906, "step": 145620 }, { "epoch": 1.2591763149475577, "grad_norm": 1.597541508555416, "learning_rate": 1.8121196112954625e-06, "loss": 0.51962890625, "step": 145625 }, { "epoch": 1.2592195484691011, "grad_norm": 0.6769563758037243, "learning_rate": 1.8119325324441445e-06, "loss": 0.021660041809082032, "step": 145630 }, { "epoch": 1.2592627819906443, "grad_norm": 1.5837788129485895, "learning_rate": 1.8117454590720897e-06, "loss": 0.10930747985839843, "step": 145635 }, { "epoch": 1.2593060155121876, "grad_norm": 1.3394315152257121, "learning_rate": 1.8115583911801593e-06, "loss": 0.28037185668945314, "step": 145640 }, { "epoch": 1.2593492490337308, "grad_norm": 8.387331468184918, "learning_rate": 1.8113713287692168e-06, "loss": 0.4710670471191406, "step": 145645 }, { "epoch": 1.259392482555274, "grad_norm": 12.489878574338634, "learning_rate": 1.8111842718401253e-06, "loss": 0.05632867813110352, "step": 145650 }, { "epoch": 1.2594357160768173, "grad_norm": 1.8055007458122054, "learning_rate": 1.8109972203937466e-06, "loss": 0.4630472183227539, "step": 145655 }, { "epoch": 1.2594789495983605, "grad_norm": 1.2335800214895407, "learning_rate": 1.8108101744309428e-06, "loss": 0.083880615234375, "step": 145660 }, { "epoch": 1.2595221831199037, "grad_norm": 1.457675523044422, "learning_rate": 1.8106231339525777e-06, "loss": 0.12197456359863282, "step": 145665 }, { "epoch": 1.2595654166414472, "grad_norm": 17.206379589428256, "learning_rate": 1.8104360989595133e-06, "loss": 0.23303794860839844, "step": 145670 }, { "epoch": 1.2596086501629904, "grad_norm": 4.663262787895056, "learning_rate": 1.8102490694526136e-06, "loss": 0.04260940551757812, "step": 145675 }, { "epoch": 1.2596518836845336, "grad_norm": 12.913380235094817, "learning_rate": 1.8100620454327401e-06, "loss": 0.04527435302734375, "step": 145680 }, { "epoch": 1.2596951172060769, "grad_norm": 4.9579739449044595, "learning_rate": 1.8098750269007546e-06, "loss": 0.14100265502929688, "step": 145685 }, { "epoch": 1.2597383507276203, "grad_norm": 2.5149743906520086, "learning_rate": 1.8096880138575203e-06, "loss": 0.019173431396484374, "step": 145690 }, { "epoch": 1.2597815842491635, "grad_norm": 0.17250568841789937, "learning_rate": 1.8095010063038994e-06, "loss": 0.0074368476867675785, "step": 145695 }, { "epoch": 1.2598248177707068, "grad_norm": 0.6099592715822156, "learning_rate": 1.809314004240755e-06, "loss": 0.05862998962402344, "step": 145700 }, { "epoch": 1.25986805129225, "grad_norm": 0.8391444436742042, "learning_rate": 1.80912700766895e-06, "loss": 0.031280517578125, "step": 145705 }, { "epoch": 1.2599112848137932, "grad_norm": 1.8687611002045237, "learning_rate": 1.8089400165893455e-06, "loss": 0.06127738952636719, "step": 145710 }, { "epoch": 1.2599545183353364, "grad_norm": 38.80373675774415, "learning_rate": 1.8087530310028045e-06, "loss": 0.14828929901123047, "step": 145715 }, { "epoch": 1.2599977518568797, "grad_norm": 36.459483250649, "learning_rate": 1.8085660509101886e-06, "loss": 0.11732139587402343, "step": 145720 }, { "epoch": 1.260040985378423, "grad_norm": 1.549348966484545, "learning_rate": 1.8083790763123616e-06, "loss": 0.025234222412109375, "step": 145725 }, { "epoch": 1.2600842188999664, "grad_norm": 0.9609119628963823, "learning_rate": 1.8081921072101838e-06, "loss": 0.15070648193359376, "step": 145730 }, { "epoch": 1.2601274524215096, "grad_norm": 0.26418293482261385, "learning_rate": 1.8080051436045194e-06, "loss": 0.2167116165161133, "step": 145735 }, { "epoch": 1.2601706859430528, "grad_norm": 10.21308794132731, "learning_rate": 1.8078181854962303e-06, "loss": 0.10132064819335937, "step": 145740 }, { "epoch": 1.260213919464596, "grad_norm": 52.47433898692255, "learning_rate": 1.8076312328861784e-06, "loss": 0.26403656005859377, "step": 145745 }, { "epoch": 1.2602571529861393, "grad_norm": 5.344134214336695, "learning_rate": 1.8074442857752254e-06, "loss": 0.031476593017578124, "step": 145750 }, { "epoch": 1.2603003865076827, "grad_norm": 22.59753894848743, "learning_rate": 1.8072573441642345e-06, "loss": 0.17372894287109375, "step": 145755 }, { "epoch": 1.260343620029226, "grad_norm": 12.614180619397732, "learning_rate": 1.8070704080540662e-06, "loss": 0.048297119140625, "step": 145760 }, { "epoch": 1.2603868535507692, "grad_norm": 0.9124395257742153, "learning_rate": 1.806883477445585e-06, "loss": 0.05919361114501953, "step": 145765 }, { "epoch": 1.2604300870723124, "grad_norm": 0.5138389289307402, "learning_rate": 1.8066965523396517e-06, "loss": 0.136260986328125, "step": 145770 }, { "epoch": 1.2604733205938556, "grad_norm": 1.0011008837786746, "learning_rate": 1.8065096327371283e-06, "loss": 0.05784111022949219, "step": 145775 }, { "epoch": 1.2605165541153989, "grad_norm": 1.974525834444403, "learning_rate": 1.8063227186388765e-06, "loss": 0.01622772216796875, "step": 145780 }, { "epoch": 1.260559787636942, "grad_norm": 6.943368075770545, "learning_rate": 1.8061358100457597e-06, "loss": 0.04173431396484375, "step": 145785 }, { "epoch": 1.2606030211584853, "grad_norm": 14.21237422475183, "learning_rate": 1.8059489069586373e-06, "loss": 0.21549835205078124, "step": 145790 }, { "epoch": 1.2606462546800288, "grad_norm": 1.505602602410856, "learning_rate": 1.8057620093783748e-06, "loss": 0.07339935302734375, "step": 145795 }, { "epoch": 1.260689488201572, "grad_norm": 0.6501812298237761, "learning_rate": 1.805575117305832e-06, "loss": 0.010578155517578125, "step": 145800 }, { "epoch": 1.2607327217231152, "grad_norm": 0.5224898128903085, "learning_rate": 1.805388230741871e-06, "loss": 0.016701316833496092, "step": 145805 }, { "epoch": 1.2607759552446585, "grad_norm": 12.110859239049857, "learning_rate": 1.8052013496873543e-06, "loss": 0.12041549682617188, "step": 145810 }, { "epoch": 1.2608191887662017, "grad_norm": 1.3337172532836465, "learning_rate": 1.8050144741431431e-06, "loss": 0.014783859252929688, "step": 145815 }, { "epoch": 1.2608624222877451, "grad_norm": 3.6184330709736106, "learning_rate": 1.8048276041101001e-06, "loss": 0.04261970520019531, "step": 145820 }, { "epoch": 1.2609056558092884, "grad_norm": 0.9446658442813812, "learning_rate": 1.804640739589085e-06, "loss": 0.026257705688476563, "step": 145825 }, { "epoch": 1.2609488893308316, "grad_norm": 1.7520192089106101, "learning_rate": 1.8044538805809628e-06, "loss": 0.06743431091308594, "step": 145830 }, { "epoch": 1.2609921228523748, "grad_norm": 4.330266024478093, "learning_rate": 1.8042670270865928e-06, "loss": 0.1865234375, "step": 145835 }, { "epoch": 1.261035356373918, "grad_norm": 8.133301664363092, "learning_rate": 1.8040801791068382e-06, "loss": 0.07032318115234375, "step": 145840 }, { "epoch": 1.2610785898954613, "grad_norm": 8.36740565425088, "learning_rate": 1.80389333664256e-06, "loss": 0.03477935791015625, "step": 145845 }, { "epoch": 1.2611218234170045, "grad_norm": 7.347317822678795, "learning_rate": 1.8037064996946203e-06, "loss": 0.074884033203125, "step": 145850 }, { "epoch": 1.2611650569385477, "grad_norm": 0.45689824147471625, "learning_rate": 1.803519668263879e-06, "loss": 0.0228240966796875, "step": 145855 }, { "epoch": 1.2612082904600912, "grad_norm": 3.840453229135214, "learning_rate": 1.8033328423512005e-06, "loss": 0.01055755615234375, "step": 145860 }, { "epoch": 1.2612515239816344, "grad_norm": 5.145442724942477, "learning_rate": 1.8031460219574449e-06, "loss": 0.050855255126953124, "step": 145865 }, { "epoch": 1.2612947575031777, "grad_norm": 17.110523950075763, "learning_rate": 1.8029592070834745e-06, "loss": 0.08447227478027344, "step": 145870 }, { "epoch": 1.2613379910247209, "grad_norm": 0.6817714157315248, "learning_rate": 1.802772397730151e-06, "loss": 0.03636322021484375, "step": 145875 }, { "epoch": 1.2613812245462641, "grad_norm": 4.121375988067588, "learning_rate": 1.8025855938983344e-06, "loss": 0.20573272705078124, "step": 145880 }, { "epoch": 1.2614244580678076, "grad_norm": 0.156201528602932, "learning_rate": 1.8023987955888868e-06, "loss": 0.5120582580566406, "step": 145885 }, { "epoch": 1.2614676915893508, "grad_norm": 6.101782672001191, "learning_rate": 1.802212002802671e-06, "loss": 0.025017929077148438, "step": 145890 }, { "epoch": 1.261510925110894, "grad_norm": 0.07977174768636058, "learning_rate": 1.802025215540547e-06, "loss": 0.1112715721130371, "step": 145895 }, { "epoch": 1.2615541586324373, "grad_norm": 14.407007334984991, "learning_rate": 1.801838433803378e-06, "loss": 0.098565673828125, "step": 145900 }, { "epoch": 1.2615973921539805, "grad_norm": 4.491471411250257, "learning_rate": 1.8016516575920234e-06, "loss": 0.034683609008789064, "step": 145905 }, { "epoch": 1.2616406256755237, "grad_norm": 0.4341069338456496, "learning_rate": 1.801464886907346e-06, "loss": 0.02199554443359375, "step": 145910 }, { "epoch": 1.261683859197067, "grad_norm": 0.11822910414258091, "learning_rate": 1.801278121750206e-06, "loss": 0.04019355773925781, "step": 145915 }, { "epoch": 1.2617270927186102, "grad_norm": 6.225955158970666, "learning_rate": 1.8010913621214654e-06, "loss": 0.038542556762695315, "step": 145920 }, { "epoch": 1.2617703262401536, "grad_norm": 2.5724223461475266, "learning_rate": 1.8009046080219856e-06, "loss": 0.04111404418945312, "step": 145925 }, { "epoch": 1.2618135597616968, "grad_norm": 21.943807909337934, "learning_rate": 1.800717859452628e-06, "loss": 0.21029624938964844, "step": 145930 }, { "epoch": 1.26185679328324, "grad_norm": 1.0311873554328914, "learning_rate": 1.800531116414254e-06, "loss": 0.03755950927734375, "step": 145935 }, { "epoch": 1.2619000268047833, "grad_norm": 1.0383122612001747, "learning_rate": 1.8003443789077243e-06, "loss": 0.01300811767578125, "step": 145940 }, { "epoch": 1.2619432603263268, "grad_norm": 4.185036880331274, "learning_rate": 1.8001576469338998e-06, "loss": 0.06035614013671875, "step": 145945 }, { "epoch": 1.26198649384787, "grad_norm": 4.811170991359989, "learning_rate": 1.799970920493642e-06, "loss": 0.20634613037109376, "step": 145950 }, { "epoch": 1.2620297273694132, "grad_norm": 2.3272922033448706, "learning_rate": 1.7997841995878128e-06, "loss": 0.07249908447265625, "step": 145955 }, { "epoch": 1.2620729608909564, "grad_norm": 0.47282301614819067, "learning_rate": 1.799597484217273e-06, "loss": 0.14956340789794922, "step": 145960 }, { "epoch": 1.2621161944124997, "grad_norm": 84.55217245965383, "learning_rate": 1.7994107743828837e-06, "loss": 0.3349308013916016, "step": 145965 }, { "epoch": 1.262159427934043, "grad_norm": 0.15195374545037688, "learning_rate": 1.7992240700855058e-06, "loss": 0.0327056884765625, "step": 145970 }, { "epoch": 1.2622026614555861, "grad_norm": 32.245677927758756, "learning_rate": 1.7990373713259995e-06, "loss": 0.15746097564697265, "step": 145975 }, { "epoch": 1.2622458949771294, "grad_norm": 0.612796183734122, "learning_rate": 1.7988506781052278e-06, "loss": 0.048593902587890626, "step": 145980 }, { "epoch": 1.2622891284986728, "grad_norm": 0.6220223052061838, "learning_rate": 1.7986639904240493e-06, "loss": 0.1097625732421875, "step": 145985 }, { "epoch": 1.262332362020216, "grad_norm": 2.4534217389894066, "learning_rate": 1.7984773082833275e-06, "loss": 0.01792449951171875, "step": 145990 }, { "epoch": 1.2623755955417593, "grad_norm": 19.069349833181743, "learning_rate": 1.798290631683922e-06, "loss": 0.017138671875, "step": 145995 }, { "epoch": 1.2624188290633025, "grad_norm": 1.6075167552227172, "learning_rate": 1.7981039606266938e-06, "loss": 0.09612579345703125, "step": 146000 }, { "epoch": 1.2624620625848457, "grad_norm": 16.45964001237505, "learning_rate": 1.7979172951125041e-06, "loss": 0.27805633544921876, "step": 146005 }, { "epoch": 1.2625052961063892, "grad_norm": 1.5995773654981589, "learning_rate": 1.7977306351422137e-06, "loss": 0.007249069213867187, "step": 146010 }, { "epoch": 1.2625485296279324, "grad_norm": 4.034814704172675, "learning_rate": 1.7975439807166818e-06, "loss": 0.06593017578125, "step": 146015 }, { "epoch": 1.2625917631494756, "grad_norm": 1.8674941812677084, "learning_rate": 1.7973573318367727e-06, "loss": 0.1456634521484375, "step": 146020 }, { "epoch": 1.2626349966710189, "grad_norm": 9.095344914991452, "learning_rate": 1.7971706885033448e-06, "loss": 0.24819297790527345, "step": 146025 }, { "epoch": 1.262678230192562, "grad_norm": 8.0614750264467, "learning_rate": 1.7969840507172589e-06, "loss": 0.030319595336914064, "step": 146030 }, { "epoch": 1.2627214637141053, "grad_norm": 0.7219044746032208, "learning_rate": 1.796797418479377e-06, "loss": 0.0157073974609375, "step": 146035 }, { "epoch": 1.2627646972356485, "grad_norm": 3.8089504761143904, "learning_rate": 1.7966107917905593e-06, "loss": 0.07282638549804688, "step": 146040 }, { "epoch": 1.2628079307571918, "grad_norm": 0.15665713697044478, "learning_rate": 1.7964241706516645e-06, "loss": 0.14965591430664063, "step": 146045 }, { "epoch": 1.2628511642787352, "grad_norm": 0.33682286686748747, "learning_rate": 1.7962375550635567e-06, "loss": 0.4864528656005859, "step": 146050 }, { "epoch": 1.2628943978002785, "grad_norm": 18.50355150496625, "learning_rate": 1.796050945027095e-06, "loss": 0.07710151672363282, "step": 146055 }, { "epoch": 1.2629376313218217, "grad_norm": 12.423464232492273, "learning_rate": 1.7958643405431392e-06, "loss": 0.07177352905273438, "step": 146060 }, { "epoch": 1.262980864843365, "grad_norm": 33.588540246946934, "learning_rate": 1.795677741612551e-06, "loss": 0.12972602844238282, "step": 146065 }, { "epoch": 1.2630240983649081, "grad_norm": 1.913628404539753, "learning_rate": 1.7954911482361908e-06, "loss": 0.03463668823242187, "step": 146070 }, { "epoch": 1.2630673318864516, "grad_norm": 0.24377541596654279, "learning_rate": 1.795304560414919e-06, "loss": 0.02579498291015625, "step": 146075 }, { "epoch": 1.2631105654079948, "grad_norm": 0.743414783598153, "learning_rate": 1.7951179781495953e-06, "loss": 0.06579303741455078, "step": 146080 }, { "epoch": 1.263153798929538, "grad_norm": 0.06094708312253796, "learning_rate": 1.7949314014410823e-06, "loss": 0.059851455688476565, "step": 146085 }, { "epoch": 1.2631970324510813, "grad_norm": 7.327992352522486, "learning_rate": 1.7947448302902379e-06, "loss": 0.17470989227294922, "step": 146090 }, { "epoch": 1.2632402659726245, "grad_norm": 0.2651798197754622, "learning_rate": 1.794558264697925e-06, "loss": 0.081805419921875, "step": 146095 }, { "epoch": 1.2632834994941677, "grad_norm": 4.653902756557741, "learning_rate": 1.7943717046650026e-06, "loss": 0.027884769439697265, "step": 146100 }, { "epoch": 1.263326733015711, "grad_norm": 0.46378500821480845, "learning_rate": 1.7941851501923314e-06, "loss": 0.2813243865966797, "step": 146105 }, { "epoch": 1.2633699665372542, "grad_norm": 0.6264347192081072, "learning_rate": 1.7939986012807705e-06, "loss": 0.031828689575195315, "step": 146110 }, { "epoch": 1.2634132000587976, "grad_norm": 11.960466118028952, "learning_rate": 1.7938120579311828e-06, "loss": 0.06041412353515625, "step": 146115 }, { "epoch": 1.2634564335803409, "grad_norm": 5.286019683906544, "learning_rate": 1.7936255201444266e-06, "loss": 0.04381637573242188, "step": 146120 }, { "epoch": 1.263499667101884, "grad_norm": 0.9991279613910354, "learning_rate": 1.7934389879213638e-06, "loss": 0.12088623046875, "step": 146125 }, { "epoch": 1.2635429006234273, "grad_norm": 3.58146488337873, "learning_rate": 1.7932524612628537e-06, "loss": 0.12144813537597657, "step": 146130 }, { "epoch": 1.2635861341449706, "grad_norm": 5.002704927654412, "learning_rate": 1.7930659401697561e-06, "loss": 0.036885833740234374, "step": 146135 }, { "epoch": 1.263629367666514, "grad_norm": 13.817299192560874, "learning_rate": 1.792879424642931e-06, "loss": 0.2072845458984375, "step": 146140 }, { "epoch": 1.2636726011880572, "grad_norm": 5.784409404336276, "learning_rate": 1.7926929146832404e-06, "loss": 0.018183135986328126, "step": 146145 }, { "epoch": 1.2637158347096005, "grad_norm": 0.6986458967455195, "learning_rate": 1.7925064102915428e-06, "loss": 0.06376914978027344, "step": 146150 }, { "epoch": 1.2637590682311437, "grad_norm": 1.6103681165059711, "learning_rate": 1.7923199114686996e-06, "loss": 0.04058990478515625, "step": 146155 }, { "epoch": 1.263802301752687, "grad_norm": 0.6131835185980131, "learning_rate": 1.79213341821557e-06, "loss": 0.06849365234375, "step": 146160 }, { "epoch": 1.2638455352742302, "grad_norm": 7.146625510521279, "learning_rate": 1.7919469305330147e-06, "loss": 0.020811080932617188, "step": 146165 }, { "epoch": 1.2638887687957734, "grad_norm": 0.8999472265870717, "learning_rate": 1.7917604484218925e-06, "loss": 0.027771377563476564, "step": 146170 }, { "epoch": 1.2639320023173166, "grad_norm": 16.61902721184197, "learning_rate": 1.7915739718830647e-06, "loss": 0.06540908813476562, "step": 146175 }, { "epoch": 1.26397523583886, "grad_norm": 0.6752440538479771, "learning_rate": 1.7913875009173908e-06, "loss": 0.034735107421875, "step": 146180 }, { "epoch": 1.2640184693604033, "grad_norm": 0.03312607003777122, "learning_rate": 1.7912010355257315e-06, "loss": 0.06766910552978515, "step": 146185 }, { "epoch": 1.2640617028819465, "grad_norm": 5.063401439332522, "learning_rate": 1.7910145757089464e-06, "loss": 0.06332931518554688, "step": 146190 }, { "epoch": 1.2641049364034898, "grad_norm": 46.65851390422818, "learning_rate": 1.790828121467895e-06, "loss": 0.23374176025390625, "step": 146195 }, { "epoch": 1.2641481699250332, "grad_norm": 19.44333493354612, "learning_rate": 1.7906416728034366e-06, "loss": 0.05829010009765625, "step": 146200 }, { "epoch": 1.2641914034465764, "grad_norm": 7.268407680147459, "learning_rate": 1.7904552297164319e-06, "loss": 0.02209892272949219, "step": 146205 }, { "epoch": 1.2642346369681197, "grad_norm": 27.487096792568508, "learning_rate": 1.7902687922077417e-06, "loss": 0.25045928955078123, "step": 146210 }, { "epoch": 1.264277870489663, "grad_norm": 7.971230459627957, "learning_rate": 1.7900823602782255e-06, "loss": 0.12595672607421876, "step": 146215 }, { "epoch": 1.2643211040112061, "grad_norm": 9.914486261211476, "learning_rate": 1.7898959339287417e-06, "loss": 0.1854839324951172, "step": 146220 }, { "epoch": 1.2643643375327493, "grad_norm": 9.823046798314712, "learning_rate": 1.7897095131601511e-06, "loss": 0.055035400390625, "step": 146225 }, { "epoch": 1.2644075710542926, "grad_norm": 2.8570016764124317, "learning_rate": 1.7895230979733135e-06, "loss": 0.03384208679199219, "step": 146230 }, { "epoch": 1.2644508045758358, "grad_norm": 26.118813206541468, "learning_rate": 1.7893366883690868e-06, "loss": 0.09701309204101563, "step": 146235 }, { "epoch": 1.2644940380973793, "grad_norm": 0.43246684295620663, "learning_rate": 1.789150284348334e-06, "loss": 0.012517547607421875, "step": 146240 }, { "epoch": 1.2645372716189225, "grad_norm": 0.023465413269999444, "learning_rate": 1.7889638859119132e-06, "loss": 0.1192169189453125, "step": 146245 }, { "epoch": 1.2645805051404657, "grad_norm": 2.3309625934491764, "learning_rate": 1.7887774930606837e-06, "loss": 0.05354232788085937, "step": 146250 }, { "epoch": 1.264623738662009, "grad_norm": 5.1436418442903005, "learning_rate": 1.7885911057955047e-06, "loss": 0.20629806518554689, "step": 146255 }, { "epoch": 1.2646669721835522, "grad_norm": 1.1498879301266982, "learning_rate": 1.7884047241172373e-06, "loss": 0.30084075927734377, "step": 146260 }, { "epoch": 1.2647102057050956, "grad_norm": 6.238238558577527, "learning_rate": 1.7882183480267401e-06, "loss": 0.090032958984375, "step": 146265 }, { "epoch": 1.2647534392266389, "grad_norm": 25.843827857687966, "learning_rate": 1.7880319775248717e-06, "loss": 0.21913604736328124, "step": 146270 }, { "epoch": 1.264796672748182, "grad_norm": 5.988856668968141, "learning_rate": 1.7878456126124941e-06, "loss": 0.03941192626953125, "step": 146275 }, { "epoch": 1.2648399062697253, "grad_norm": 2.131087283202724, "learning_rate": 1.7876592532904651e-06, "loss": 0.022563934326171875, "step": 146280 }, { "epoch": 1.2648831397912685, "grad_norm": 0.46392617753259163, "learning_rate": 1.7874728995596439e-06, "loss": 0.01911468505859375, "step": 146285 }, { "epoch": 1.2649263733128118, "grad_norm": 3.768356029315571, "learning_rate": 1.7872865514208914e-06, "loss": 0.053415107727050784, "step": 146290 }, { "epoch": 1.264969606834355, "grad_norm": 0.20491081709482825, "learning_rate": 1.7871002088750663e-06, "loss": 0.01124401092529297, "step": 146295 }, { "epoch": 1.2650128403558982, "grad_norm": 2.1487921679157744, "learning_rate": 1.7869138719230264e-06, "loss": 0.13532867431640624, "step": 146300 }, { "epoch": 1.2650560738774417, "grad_norm": 1.4914104219074231, "learning_rate": 1.7867275405656337e-06, "loss": 0.18635902404785157, "step": 146305 }, { "epoch": 1.265099307398985, "grad_norm": 3.1719270452872235, "learning_rate": 1.7865412148037464e-06, "loss": 0.05981025695800781, "step": 146310 }, { "epoch": 1.2651425409205281, "grad_norm": 1.6473438188008243, "learning_rate": 1.7863548946382233e-06, "loss": 0.0835113525390625, "step": 146315 }, { "epoch": 1.2651857744420714, "grad_norm": 4.046075931077897, "learning_rate": 1.7861685800699248e-06, "loss": 0.02857666015625, "step": 146320 }, { "epoch": 1.2652290079636146, "grad_norm": 0.5942547506575845, "learning_rate": 1.7859822710997094e-06, "loss": 0.066644287109375, "step": 146325 }, { "epoch": 1.265272241485158, "grad_norm": 0.31016647712533013, "learning_rate": 1.7857959677284365e-06, "loss": 0.162176513671875, "step": 146330 }, { "epoch": 1.2653154750067013, "grad_norm": 12.492793430358738, "learning_rate": 1.785609669956964e-06, "loss": 0.04552268981933594, "step": 146335 }, { "epoch": 1.2653587085282445, "grad_norm": 12.09986764222969, "learning_rate": 1.7854233777861538e-06, "loss": 0.08725776672363281, "step": 146340 }, { "epoch": 1.2654019420497877, "grad_norm": 4.143844771653891, "learning_rate": 1.7852370912168631e-06, "loss": 0.006984329223632813, "step": 146345 }, { "epoch": 1.265445175571331, "grad_norm": 0.12675486234607988, "learning_rate": 1.7850508102499522e-06, "loss": 0.10915699005126953, "step": 146350 }, { "epoch": 1.2654884090928742, "grad_norm": 0.22451778010611828, "learning_rate": 1.7848645348862795e-06, "loss": 0.02346954345703125, "step": 146355 }, { "epoch": 1.2655316426144174, "grad_norm": 1.5185848673713314, "learning_rate": 1.7846782651267041e-06, "loss": 0.051756668090820315, "step": 146360 }, { "epoch": 1.2655748761359606, "grad_norm": 1.4633511278768527, "learning_rate": 1.784492000972084e-06, "loss": 0.048729324340820314, "step": 146365 }, { "epoch": 1.265618109657504, "grad_norm": 6.928860192680148, "learning_rate": 1.7843057424232809e-06, "loss": 0.055717086791992186, "step": 146370 }, { "epoch": 1.2656613431790473, "grad_norm": 2.8047896962122723, "learning_rate": 1.7841194894811514e-06, "loss": 0.016873550415039063, "step": 146375 }, { "epoch": 1.2657045767005906, "grad_norm": 30.533618081238334, "learning_rate": 1.7839332421465563e-06, "loss": 0.011133193969726562, "step": 146380 }, { "epoch": 1.2657478102221338, "grad_norm": 1.3395595503272757, "learning_rate": 1.783747000420353e-06, "loss": 0.08311271667480469, "step": 146385 }, { "epoch": 1.2657910437436772, "grad_norm": 2.4683277748325345, "learning_rate": 1.7835607643034014e-06, "loss": 0.021990203857421876, "step": 146390 }, { "epoch": 1.2658342772652205, "grad_norm": 0.9126724840226407, "learning_rate": 1.7833745337965589e-06, "loss": 0.055959320068359374, "step": 146395 }, { "epoch": 1.2658775107867637, "grad_norm": 13.060370095546931, "learning_rate": 1.7831883089006868e-06, "loss": 0.22242240905761718, "step": 146400 }, { "epoch": 1.265920744308307, "grad_norm": 5.991836480795402, "learning_rate": 1.7830020896166419e-06, "loss": 0.028042411804199217, "step": 146405 }, { "epoch": 1.2659639778298502, "grad_norm": 33.16774293614769, "learning_rate": 1.7828158759452845e-06, "loss": 0.083306884765625, "step": 146410 }, { "epoch": 1.2660072113513934, "grad_norm": 6.026742745822826, "learning_rate": 1.7826296678874729e-06, "loss": 0.02886962890625, "step": 146415 }, { "epoch": 1.2660504448729366, "grad_norm": 4.84965471352583, "learning_rate": 1.7824434654440656e-06, "loss": 0.025492095947265626, "step": 146420 }, { "epoch": 1.2660936783944798, "grad_norm": 7.658280609465227, "learning_rate": 1.7822572686159211e-06, "loss": 0.0947113037109375, "step": 146425 }, { "epoch": 1.2661369119160233, "grad_norm": 4.720148282889979, "learning_rate": 1.7820710774038976e-06, "loss": 0.1873809814453125, "step": 146430 }, { "epoch": 1.2661801454375665, "grad_norm": 1.0090556111162452, "learning_rate": 1.7818848918088559e-06, "loss": 0.015285491943359375, "step": 146435 }, { "epoch": 1.2662233789591097, "grad_norm": 15.209931195398433, "learning_rate": 1.7816987118316533e-06, "loss": 0.06375999450683593, "step": 146440 }, { "epoch": 1.266266612480653, "grad_norm": 1.7701963139952985, "learning_rate": 1.7815125374731486e-06, "loss": 0.064947509765625, "step": 146445 }, { "epoch": 1.2663098460021962, "grad_norm": 0.880861762298148, "learning_rate": 1.7813263687342003e-06, "loss": 0.006481170654296875, "step": 146450 }, { "epoch": 1.2663530795237397, "grad_norm": 22.05009007524834, "learning_rate": 1.7811402056156672e-06, "loss": 0.08459510803222656, "step": 146455 }, { "epoch": 1.2663963130452829, "grad_norm": 102.99241933776457, "learning_rate": 1.7809540481184065e-06, "loss": 0.15743331909179686, "step": 146460 }, { "epoch": 1.2664395465668261, "grad_norm": 4.859279102484517, "learning_rate": 1.7807678962432797e-06, "loss": 0.04324607849121094, "step": 146465 }, { "epoch": 1.2664827800883693, "grad_norm": 0.9125773056515746, "learning_rate": 1.7805817499911437e-06, "loss": 0.03831634521484375, "step": 146470 }, { "epoch": 1.2665260136099126, "grad_norm": 4.7603679612832295, "learning_rate": 1.7803956093628562e-06, "loss": 0.03459930419921875, "step": 146475 }, { "epoch": 1.2665692471314558, "grad_norm": 5.9014348116456645, "learning_rate": 1.7802094743592762e-06, "loss": 0.08191547393798829, "step": 146480 }, { "epoch": 1.266612480652999, "grad_norm": 39.27523594685748, "learning_rate": 1.780023344981263e-06, "loss": 0.12544708251953124, "step": 146485 }, { "epoch": 1.2666557141745423, "grad_norm": 5.78828327697317, "learning_rate": 1.7798372212296729e-06, "loss": 0.03988723754882813, "step": 146490 }, { "epoch": 1.2666989476960857, "grad_norm": 13.416532093247016, "learning_rate": 1.7796511031053674e-06, "loss": 0.0431365966796875, "step": 146495 }, { "epoch": 1.266742181217629, "grad_norm": 0.20200875577843766, "learning_rate": 1.7794649906092025e-06, "loss": 0.18921051025390626, "step": 146500 }, { "epoch": 1.2667854147391722, "grad_norm": 80.5478264091069, "learning_rate": 1.7792788837420377e-06, "loss": 0.2536712646484375, "step": 146505 }, { "epoch": 1.2668286482607154, "grad_norm": 2.266180334059112, "learning_rate": 1.77909278250473e-06, "loss": 0.013648414611816406, "step": 146510 }, { "epoch": 1.2668718817822586, "grad_norm": 0.06754506687911198, "learning_rate": 1.7789066868981392e-06, "loss": 0.15136260986328126, "step": 146515 }, { "epoch": 1.266915115303802, "grad_norm": 1.6395692929875965, "learning_rate": 1.778720596923123e-06, "loss": 0.0275634765625, "step": 146520 }, { "epoch": 1.2669583488253453, "grad_norm": 15.691348492578902, "learning_rate": 1.7785345125805382e-06, "loss": 0.02584381103515625, "step": 146525 }, { "epoch": 1.2670015823468885, "grad_norm": 25.664944534753843, "learning_rate": 1.778348433871245e-06, "loss": 0.07025318145751953, "step": 146530 }, { "epoch": 1.2670448158684318, "grad_norm": 5.510367534065819, "learning_rate": 1.7781623607961017e-06, "loss": 0.018989944458007814, "step": 146535 }, { "epoch": 1.267088049389975, "grad_norm": 0.21963923130386648, "learning_rate": 1.7779762933559642e-06, "loss": 0.017458534240722655, "step": 146540 }, { "epoch": 1.2671312829115182, "grad_norm": 0.5358446919478406, "learning_rate": 1.777790231551693e-06, "loss": 0.08803939819335938, "step": 146545 }, { "epoch": 1.2671745164330614, "grad_norm": 1.0900057065304878, "learning_rate": 1.7776041753841459e-06, "loss": 0.059732246398925784, "step": 146550 }, { "epoch": 1.2672177499546047, "grad_norm": 0.13080530271457028, "learning_rate": 1.7774181248541783e-06, "loss": 0.02763519287109375, "step": 146555 }, { "epoch": 1.2672609834761481, "grad_norm": 0.8237669409796048, "learning_rate": 1.7772320799626518e-06, "loss": 0.019928741455078124, "step": 146560 }, { "epoch": 1.2673042169976914, "grad_norm": 3.7548196027048837, "learning_rate": 1.777046040710423e-06, "loss": 0.061553955078125, "step": 146565 }, { "epoch": 1.2673474505192346, "grad_norm": 0.9251178988627641, "learning_rate": 1.7768600070983487e-06, "loss": 0.02408294677734375, "step": 146570 }, { "epoch": 1.2673906840407778, "grad_norm": 3.33393854228147, "learning_rate": 1.776673979127289e-06, "loss": 0.0471588134765625, "step": 146575 }, { "epoch": 1.267433917562321, "grad_norm": 44.237450440910635, "learning_rate": 1.776487956798101e-06, "loss": 0.07031440734863281, "step": 146580 }, { "epoch": 1.2674771510838645, "grad_norm": 7.790366162352499, "learning_rate": 1.776301940111641e-06, "loss": 0.0234588623046875, "step": 146585 }, { "epoch": 1.2675203846054077, "grad_norm": 0.23382922284418037, "learning_rate": 1.7761159290687692e-06, "loss": 0.0455657958984375, "step": 146590 }, { "epoch": 1.267563618126951, "grad_norm": 6.479130043867644, "learning_rate": 1.7759299236703432e-06, "loss": 0.05876007080078125, "step": 146595 }, { "epoch": 1.2676068516484942, "grad_norm": 11.933083762741122, "learning_rate": 1.7757439239172191e-06, "loss": 0.04574928283691406, "step": 146600 }, { "epoch": 1.2676500851700374, "grad_norm": 5.514763931871551, "learning_rate": 1.7755579298102568e-06, "loss": 0.15270004272460938, "step": 146605 }, { "epoch": 1.2676933186915806, "grad_norm": 0.9654428797954759, "learning_rate": 1.775371941350313e-06, "loss": 0.016897964477539062, "step": 146610 }, { "epoch": 1.2677365522131239, "grad_norm": 0.6000862324410955, "learning_rate": 1.7751859585382457e-06, "loss": 0.03598785400390625, "step": 146615 }, { "epoch": 1.267779785734667, "grad_norm": 0.599287849344039, "learning_rate": 1.774999981374911e-06, "loss": 0.14223594665527345, "step": 146620 }, { "epoch": 1.2678230192562105, "grad_norm": 9.734263639673605, "learning_rate": 1.774814009861169e-06, "loss": 0.040488815307617186, "step": 146625 }, { "epoch": 1.2678662527777538, "grad_norm": 13.611165570051055, "learning_rate": 1.7746280439978771e-06, "loss": 0.029549407958984374, "step": 146630 }, { "epoch": 1.267909486299297, "grad_norm": 5.471875946725065, "learning_rate": 1.7744420837858926e-06, "loss": 0.059879112243652347, "step": 146635 }, { "epoch": 1.2679527198208402, "grad_norm": 0.8270063241534169, "learning_rate": 1.7742561292260726e-06, "loss": 0.10394420623779296, "step": 146640 }, { "epoch": 1.2679959533423837, "grad_norm": 1.4586840475466658, "learning_rate": 1.7740701803192752e-06, "loss": 0.016463470458984376, "step": 146645 }, { "epoch": 1.268039186863927, "grad_norm": 18.916556561743754, "learning_rate": 1.773884237066356e-06, "loss": 0.06734390258789062, "step": 146650 }, { "epoch": 1.2680824203854701, "grad_norm": 0.34554751902054004, "learning_rate": 1.7736982994681758e-06, "loss": 0.06328506469726562, "step": 146655 }, { "epoch": 1.2681256539070134, "grad_norm": 10.154260639979475, "learning_rate": 1.7735123675255908e-06, "loss": 0.05790557861328125, "step": 146660 }, { "epoch": 1.2681688874285566, "grad_norm": 0.6601457247651252, "learning_rate": 1.7733264412394585e-06, "loss": 0.012054443359375, "step": 146665 }, { "epoch": 1.2682121209500998, "grad_norm": 0.7975512042271128, "learning_rate": 1.7731405206106369e-06, "loss": 0.018905830383300782, "step": 146670 }, { "epoch": 1.268255354471643, "grad_norm": 18.243221822092664, "learning_rate": 1.7729546056399814e-06, "loss": 0.13332672119140626, "step": 146675 }, { "epoch": 1.2682985879931863, "grad_norm": 0.41327375954997103, "learning_rate": 1.7727686963283518e-06, "loss": 0.1465301513671875, "step": 146680 }, { "epoch": 1.2683418215147297, "grad_norm": 8.35038739071777, "learning_rate": 1.7725827926766032e-06, "loss": 0.1237335205078125, "step": 146685 }, { "epoch": 1.268385055036273, "grad_norm": 1.7569586588097688, "learning_rate": 1.7723968946855952e-06, "loss": 0.016793060302734374, "step": 146690 }, { "epoch": 1.2684282885578162, "grad_norm": 62.56603437125767, "learning_rate": 1.7722110023561848e-06, "loss": 0.38873748779296874, "step": 146695 }, { "epoch": 1.2684715220793594, "grad_norm": 10.334883642910794, "learning_rate": 1.7720251156892285e-06, "loss": 0.018058013916015626, "step": 146700 }, { "epoch": 1.2685147556009027, "grad_norm": 13.040677411441651, "learning_rate": 1.7718392346855831e-06, "loss": 0.033317184448242186, "step": 146705 }, { "epoch": 1.268557989122446, "grad_norm": 1.9462422940263377, "learning_rate": 1.7716533593461074e-06, "loss": 0.17498626708984374, "step": 146710 }, { "epoch": 1.2686012226439893, "grad_norm": 0.03811332276385615, "learning_rate": 1.7714674896716567e-06, "loss": 0.006806755065917968, "step": 146715 }, { "epoch": 1.2686444561655326, "grad_norm": 6.102170247377011, "learning_rate": 1.7712816256630909e-06, "loss": 0.0609771728515625, "step": 146720 }, { "epoch": 1.2686876896870758, "grad_norm": 82.18775404381473, "learning_rate": 1.771095767321265e-06, "loss": 0.34397258758544924, "step": 146725 }, { "epoch": 1.268730923208619, "grad_norm": 18.469406187184557, "learning_rate": 1.7709099146470373e-06, "loss": 0.262188720703125, "step": 146730 }, { "epoch": 1.2687741567301623, "grad_norm": 1.1158906863051483, "learning_rate": 1.7707240676412634e-06, "loss": 0.0274383544921875, "step": 146735 }, { "epoch": 1.2688173902517055, "grad_norm": 1.1363228143789041, "learning_rate": 1.7705382263048025e-06, "loss": 0.03648567199707031, "step": 146740 }, { "epoch": 1.2688606237732487, "grad_norm": 11.765143688653641, "learning_rate": 1.7703523906385096e-06, "loss": 0.03943071365356445, "step": 146745 }, { "epoch": 1.2689038572947922, "grad_norm": 5.439243436903285, "learning_rate": 1.7701665606432436e-06, "loss": 0.2434467315673828, "step": 146750 }, { "epoch": 1.2689470908163354, "grad_norm": 0.09952644209167798, "learning_rate": 1.7699807363198612e-06, "loss": 0.026914215087890624, "step": 146755 }, { "epoch": 1.2689903243378786, "grad_norm": 6.438921090948533, "learning_rate": 1.7697949176692188e-06, "loss": 0.055490875244140626, "step": 146760 }, { "epoch": 1.2690335578594218, "grad_norm": 35.77454703642094, "learning_rate": 1.7696091046921726e-06, "loss": 0.049295806884765626, "step": 146765 }, { "epoch": 1.269076791380965, "grad_norm": 10.4778737950124, "learning_rate": 1.7694232973895812e-06, "loss": 0.04398136138916016, "step": 146770 }, { "epoch": 1.2691200249025085, "grad_norm": 2.6581484947267207, "learning_rate": 1.7692374957623012e-06, "loss": 0.03326873779296875, "step": 146775 }, { "epoch": 1.2691632584240518, "grad_norm": 0.4516998336678568, "learning_rate": 1.7690516998111875e-06, "loss": 0.084716796875, "step": 146780 }, { "epoch": 1.269206491945595, "grad_norm": 0.7230457788448463, "learning_rate": 1.7688659095370998e-06, "loss": 0.02989368438720703, "step": 146785 }, { "epoch": 1.2692497254671382, "grad_norm": 1.6305837791080868, "learning_rate": 1.768680124940894e-06, "loss": 0.0318267822265625, "step": 146790 }, { "epoch": 1.2692929589886814, "grad_norm": 0.19719828351978683, "learning_rate": 1.768494346023426e-06, "loss": 0.009812545776367188, "step": 146795 }, { "epoch": 1.2693361925102247, "grad_norm": 4.502771406851766, "learning_rate": 1.7683085727855539e-06, "loss": 0.05888786315917969, "step": 146800 }, { "epoch": 1.269379426031768, "grad_norm": 3.927671975834571, "learning_rate": 1.7681228052281338e-06, "loss": 0.15326156616210937, "step": 146805 }, { "epoch": 1.2694226595533111, "grad_norm": 1.2351633111088045, "learning_rate": 1.767937043352021e-06, "loss": 0.012562370300292969, "step": 146810 }, { "epoch": 1.2694658930748546, "grad_norm": 0.4062182318862294, "learning_rate": 1.7677512871580752e-06, "loss": 0.20044708251953125, "step": 146815 }, { "epoch": 1.2695091265963978, "grad_norm": 0.4778644002222492, "learning_rate": 1.7675655366471518e-06, "loss": 0.008794403076171875, "step": 146820 }, { "epoch": 1.269552360117941, "grad_norm": 4.909746511418852, "learning_rate": 1.767379791820106e-06, "loss": 0.07470779418945313, "step": 146825 }, { "epoch": 1.2695955936394843, "grad_norm": 1.8806252890428459, "learning_rate": 1.7671940526777968e-06, "loss": 0.033367919921875, "step": 146830 }, { "epoch": 1.2696388271610275, "grad_norm": 49.710562386831924, "learning_rate": 1.7670083192210794e-06, "loss": 0.4088153839111328, "step": 146835 }, { "epoch": 1.269682060682571, "grad_norm": 0.3482753920487356, "learning_rate": 1.7668225914508096e-06, "loss": 0.011597442626953124, "step": 146840 }, { "epoch": 1.2697252942041142, "grad_norm": 0.21762160909348363, "learning_rate": 1.7666368693678465e-06, "loss": 0.10012779235839844, "step": 146845 }, { "epoch": 1.2697685277256574, "grad_norm": 0.3346632742841278, "learning_rate": 1.7664511529730441e-06, "loss": 0.127099609375, "step": 146850 }, { "epoch": 1.2698117612472006, "grad_norm": 12.89082149656481, "learning_rate": 1.7662654422672608e-06, "loss": 0.0258087158203125, "step": 146855 }, { "epoch": 1.2698549947687439, "grad_norm": 9.71596806880014, "learning_rate": 1.7660797372513524e-06, "loss": 0.03893623352050781, "step": 146860 }, { "epoch": 1.269898228290287, "grad_norm": 0.17274350930636376, "learning_rate": 1.765894037926175e-06, "loss": 0.00937957763671875, "step": 146865 }, { "epoch": 1.2699414618118303, "grad_norm": 1.6824935587703187, "learning_rate": 1.7657083442925856e-06, "loss": 0.1677734375, "step": 146870 }, { "epoch": 1.2699846953333735, "grad_norm": 0.6890370837863657, "learning_rate": 1.7655226563514383e-06, "loss": 0.06474838256835938, "step": 146875 }, { "epoch": 1.270027928854917, "grad_norm": 49.19099023448728, "learning_rate": 1.765336974103593e-06, "loss": 0.07311782836914063, "step": 146880 }, { "epoch": 1.2700711623764602, "grad_norm": 0.5142956399927106, "learning_rate": 1.7651512975499047e-06, "loss": 0.04686794281005859, "step": 146885 }, { "epoch": 1.2701143958980035, "grad_norm": 0.41085887542438, "learning_rate": 1.7649656266912291e-06, "loss": 0.13770017623901368, "step": 146890 }, { "epoch": 1.2701576294195467, "grad_norm": 10.452906407905436, "learning_rate": 1.7647799615284235e-06, "loss": 0.056447601318359374, "step": 146895 }, { "epoch": 1.2702008629410901, "grad_norm": 11.925488640199777, "learning_rate": 1.7645943020623426e-06, "loss": 0.04156494140625, "step": 146900 }, { "epoch": 1.2702440964626334, "grad_norm": 0.08001019042256584, "learning_rate": 1.7644086482938434e-06, "loss": 0.029207992553710937, "step": 146905 }, { "epoch": 1.2702873299841766, "grad_norm": 7.14798799670468, "learning_rate": 1.7642230002237827e-06, "loss": 0.05311126708984375, "step": 146910 }, { "epoch": 1.2703305635057198, "grad_norm": 1.0902969497902513, "learning_rate": 1.7640373578530167e-06, "loss": 0.01048431396484375, "step": 146915 }, { "epoch": 1.270373797027263, "grad_norm": 10.908437458534513, "learning_rate": 1.7638517211824016e-06, "loss": 0.07399120330810546, "step": 146920 }, { "epoch": 1.2704170305488063, "grad_norm": 0.29313456994366743, "learning_rate": 1.7636660902127929e-06, "loss": 0.012622833251953125, "step": 146925 }, { "epoch": 1.2704602640703495, "grad_norm": 3.8340634492126857, "learning_rate": 1.7634804649450463e-06, "loss": 0.044436073303222655, "step": 146930 }, { "epoch": 1.2705034975918927, "grad_norm": 11.499960024236366, "learning_rate": 1.7632948453800185e-06, "loss": 0.07654266357421875, "step": 146935 }, { "epoch": 1.2705467311134362, "grad_norm": 0.3498293793190215, "learning_rate": 1.7631092315185659e-06, "loss": 0.18647270202636718, "step": 146940 }, { "epoch": 1.2705899646349794, "grad_norm": 5.1253469638464795, "learning_rate": 1.7629236233615447e-06, "loss": 0.03794174194335938, "step": 146945 }, { "epoch": 1.2706331981565226, "grad_norm": 0.2191643098093055, "learning_rate": 1.7627380209098103e-06, "loss": 0.01937103271484375, "step": 146950 }, { "epoch": 1.2706764316780659, "grad_norm": 9.235608121228427, "learning_rate": 1.7625524241642188e-06, "loss": 0.0366729736328125, "step": 146955 }, { "epoch": 1.270719665199609, "grad_norm": 3.1108529636133957, "learning_rate": 1.7623668331256255e-06, "loss": 0.02093963623046875, "step": 146960 }, { "epoch": 1.2707628987211526, "grad_norm": 0.14382874691009503, "learning_rate": 1.7621812477948878e-06, "loss": 0.12401390075683594, "step": 146965 }, { "epoch": 1.2708061322426958, "grad_norm": 3.234409732340736, "learning_rate": 1.7619956681728595e-06, "loss": 0.025922775268554688, "step": 146970 }, { "epoch": 1.270849365764239, "grad_norm": 86.72449795972904, "learning_rate": 1.761810094260399e-06, "loss": 0.19646759033203126, "step": 146975 }, { "epoch": 1.2708925992857822, "grad_norm": 1.777674315986357, "learning_rate": 1.7616245260583608e-06, "loss": 0.01683349609375, "step": 146980 }, { "epoch": 1.2709358328073255, "grad_norm": 1.8237939517368877, "learning_rate": 1.7614389635676012e-06, "loss": 0.054994964599609376, "step": 146985 }, { "epoch": 1.2709790663288687, "grad_norm": 30.985055186354824, "learning_rate": 1.761253406788975e-06, "loss": 0.029143905639648436, "step": 146990 }, { "epoch": 1.271022299850412, "grad_norm": 2.6929019816211075, "learning_rate": 1.7610678557233387e-06, "loss": 0.0886566162109375, "step": 146995 }, { "epoch": 1.2710655333719552, "grad_norm": 2.5675786421941242, "learning_rate": 1.7608823103715476e-06, "loss": 0.10864028930664063, "step": 147000 }, { "epoch": 1.2711087668934986, "grad_norm": 8.760052825653291, "learning_rate": 1.7606967707344583e-06, "loss": 0.13162155151367189, "step": 147005 }, { "epoch": 1.2711520004150418, "grad_norm": 3.0644823517406268, "learning_rate": 1.7605112368129265e-06, "loss": 0.25827484130859374, "step": 147010 }, { "epoch": 1.271195233936585, "grad_norm": 2.2623509983624865, "learning_rate": 1.7603257086078073e-06, "loss": 0.0435028076171875, "step": 147015 }, { "epoch": 1.2712384674581283, "grad_norm": 8.056333390951364, "learning_rate": 1.7601401861199555e-06, "loss": 0.06737480163574219, "step": 147020 }, { "epoch": 1.2712817009796715, "grad_norm": 2.3800616333201, "learning_rate": 1.7599546693502283e-06, "loss": 0.01674957275390625, "step": 147025 }, { "epoch": 1.271324934501215, "grad_norm": 0.5699643911425156, "learning_rate": 1.7597691582994806e-06, "loss": 0.02180633544921875, "step": 147030 }, { "epoch": 1.2713681680227582, "grad_norm": 2.7133858793036896, "learning_rate": 1.759583652968567e-06, "loss": 0.35080795288085936, "step": 147035 }, { "epoch": 1.2714114015443014, "grad_norm": 52.74584666347691, "learning_rate": 1.759398153358345e-06, "loss": 0.1336273193359375, "step": 147040 }, { "epoch": 1.2714546350658447, "grad_norm": 6.319928960089721, "learning_rate": 1.7592126594696687e-06, "loss": 0.041941070556640626, "step": 147045 }, { "epoch": 1.271497868587388, "grad_norm": 1.4781956309205544, "learning_rate": 1.759027171303394e-06, "loss": 0.012333297729492187, "step": 147050 }, { "epoch": 1.2715411021089311, "grad_norm": 66.86183480353046, "learning_rate": 1.7588416888603767e-06, "loss": 0.156878662109375, "step": 147055 }, { "epoch": 1.2715843356304743, "grad_norm": 6.8969611691211465, "learning_rate": 1.7586562121414718e-06, "loss": 0.036153411865234374, "step": 147060 }, { "epoch": 1.2716275691520176, "grad_norm": 0.5632486227039072, "learning_rate": 1.7584707411475333e-06, "loss": 0.04991836547851562, "step": 147065 }, { "epoch": 1.271670802673561, "grad_norm": 13.280299344606933, "learning_rate": 1.7582852758794197e-06, "loss": 0.11170501708984375, "step": 147070 }, { "epoch": 1.2717140361951043, "grad_norm": 1.3209186980521685, "learning_rate": 1.758099816337984e-06, "loss": 0.014337158203125, "step": 147075 }, { "epoch": 1.2717572697166475, "grad_norm": 9.303750231750657, "learning_rate": 1.7579143625240827e-06, "loss": 0.16617202758789062, "step": 147080 }, { "epoch": 1.2718005032381907, "grad_norm": 0.12623819804647166, "learning_rate": 1.7577289144385703e-06, "loss": 0.052508544921875, "step": 147085 }, { "epoch": 1.271843736759734, "grad_norm": 0.08750541092752094, "learning_rate": 1.7575434720823029e-06, "loss": 0.11390876770019531, "step": 147090 }, { "epoch": 1.2718869702812774, "grad_norm": 37.22031385454893, "learning_rate": 1.7573580354561338e-06, "loss": 0.12656230926513673, "step": 147095 }, { "epoch": 1.2719302038028206, "grad_norm": 2.178212654918961, "learning_rate": 1.7571726045609211e-06, "loss": 0.0547698974609375, "step": 147100 }, { "epoch": 1.2719734373243639, "grad_norm": 0.4423753295020894, "learning_rate": 1.7569871793975177e-06, "loss": 0.07271194458007812, "step": 147105 }, { "epoch": 1.272016670845907, "grad_norm": 1.0298523998426177, "learning_rate": 1.75680175996678e-06, "loss": 0.0355621337890625, "step": 147110 }, { "epoch": 1.2720599043674503, "grad_norm": 40.43873215125101, "learning_rate": 1.756616346269563e-06, "loss": 0.1712982177734375, "step": 147115 }, { "epoch": 1.2721031378889935, "grad_norm": 0.12790340941999978, "learning_rate": 1.7564309383067217e-06, "loss": 0.10086631774902344, "step": 147120 }, { "epoch": 1.2721463714105368, "grad_norm": 0.3431649675593611, "learning_rate": 1.75624553607911e-06, "loss": 0.049896240234375, "step": 147125 }, { "epoch": 1.2721896049320802, "grad_norm": 1.2246786269444385, "learning_rate": 1.7560601395875844e-06, "loss": 0.33088531494140627, "step": 147130 }, { "epoch": 1.2722328384536234, "grad_norm": 0.6587736780952661, "learning_rate": 1.7558747488329992e-06, "loss": 0.1573699951171875, "step": 147135 }, { "epoch": 1.2722760719751667, "grad_norm": 41.4042727129067, "learning_rate": 1.7556893638162105e-06, "loss": 0.25655555725097656, "step": 147140 }, { "epoch": 1.27231930549671, "grad_norm": 1.1268647001876435, "learning_rate": 1.7555039845380724e-06, "loss": 0.046112442016601564, "step": 147145 }, { "epoch": 1.2723625390182531, "grad_norm": 1.4299612543850873, "learning_rate": 1.7553186109994401e-06, "loss": 0.042529296875, "step": 147150 }, { "epoch": 1.2724057725397966, "grad_norm": 0.1458262031499102, "learning_rate": 1.7551332432011677e-06, "loss": 0.034281158447265626, "step": 147155 }, { "epoch": 1.2724490060613398, "grad_norm": 5.018398369607176, "learning_rate": 1.7549478811441109e-06, "loss": 0.10683746337890625, "step": 147160 }, { "epoch": 1.272492239582883, "grad_norm": 1.0224363892289012, "learning_rate": 1.754762524829124e-06, "loss": 0.01152801513671875, "step": 147165 }, { "epoch": 1.2725354731044263, "grad_norm": 1.4260640982557315, "learning_rate": 1.7545771742570636e-06, "loss": 0.1925872802734375, "step": 147170 }, { "epoch": 1.2725787066259695, "grad_norm": 1.17047421619773, "learning_rate": 1.7543918294287825e-06, "loss": 0.02530364990234375, "step": 147175 }, { "epoch": 1.2726219401475127, "grad_norm": 8.883611710733634, "learning_rate": 1.7542064903451366e-06, "loss": 0.0314483642578125, "step": 147180 }, { "epoch": 1.272665173669056, "grad_norm": 4.62474646135938, "learning_rate": 1.7540211570069798e-06, "loss": 0.0755197525024414, "step": 147185 }, { "epoch": 1.2727084071905992, "grad_norm": 46.20326532480721, "learning_rate": 1.7538358294151672e-06, "loss": 0.13568077087402344, "step": 147190 }, { "epoch": 1.2727516407121426, "grad_norm": 2.1634831453792054, "learning_rate": 1.7536505075705533e-06, "loss": 0.0291473388671875, "step": 147195 }, { "epoch": 1.2727948742336859, "grad_norm": 1.5955325471343265, "learning_rate": 1.7534651914739946e-06, "loss": 0.03307952880859375, "step": 147200 }, { "epoch": 1.272838107755229, "grad_norm": 2.9138525877589263, "learning_rate": 1.7532798811263438e-06, "loss": 0.0202056884765625, "step": 147205 }, { "epoch": 1.2728813412767723, "grad_norm": 0.3017910020102906, "learning_rate": 1.7530945765284556e-06, "loss": 0.179656982421875, "step": 147210 }, { "epoch": 1.2729245747983156, "grad_norm": 8.788875146822333, "learning_rate": 1.7529092776811848e-06, "loss": 0.04554443359375, "step": 147215 }, { "epoch": 1.272967808319859, "grad_norm": 2.9371212837903498, "learning_rate": 1.752723984585387e-06, "loss": 0.032441329956054685, "step": 147220 }, { "epoch": 1.2730110418414022, "grad_norm": 30.19765372702906, "learning_rate": 1.7525386972419143e-06, "loss": 0.12169647216796875, "step": 147225 }, { "epoch": 1.2730542753629455, "grad_norm": 1.2426127675378387, "learning_rate": 1.7523534156516248e-06, "loss": 0.0439178466796875, "step": 147230 }, { "epoch": 1.2730975088844887, "grad_norm": 0.28278234398874, "learning_rate": 1.7521681398153703e-06, "loss": 0.032566070556640625, "step": 147235 }, { "epoch": 1.273140742406032, "grad_norm": 0.5922868025521237, "learning_rate": 1.7519828697340065e-06, "loss": 0.0212127685546875, "step": 147240 }, { "epoch": 1.2731839759275752, "grad_norm": 5.901182838524975, "learning_rate": 1.7517976054083872e-06, "loss": 0.05986175537109375, "step": 147245 }, { "epoch": 1.2732272094491184, "grad_norm": 1.8607597502497442, "learning_rate": 1.7516123468393673e-06, "loss": 0.03759346008300781, "step": 147250 }, { "epoch": 1.2732704429706616, "grad_norm": 1.8984295143283278, "learning_rate": 1.7514270940278004e-06, "loss": 0.16548919677734375, "step": 147255 }, { "epoch": 1.273313676492205, "grad_norm": 5.942026656348355, "learning_rate": 1.7512418469745407e-06, "loss": 0.156097412109375, "step": 147260 }, { "epoch": 1.2733569100137483, "grad_norm": 0.06621033496659573, "learning_rate": 1.7510566056804443e-06, "loss": 0.0429168701171875, "step": 147265 }, { "epoch": 1.2734001435352915, "grad_norm": 3.338994244264297, "learning_rate": 1.7508713701463645e-06, "loss": 0.06754417419433593, "step": 147270 }, { "epoch": 1.2734433770568347, "grad_norm": 4.490357954211536, "learning_rate": 1.7506861403731559e-06, "loss": 0.015114212036132812, "step": 147275 }, { "epoch": 1.273486610578378, "grad_norm": 2.9537207078059278, "learning_rate": 1.7505009163616722e-06, "loss": 0.04807338714599609, "step": 147280 }, { "epoch": 1.2735298440999214, "grad_norm": 0.7714356320239064, "learning_rate": 1.7503156981127682e-06, "loss": 0.1000091552734375, "step": 147285 }, { "epoch": 1.2735730776214647, "grad_norm": 0.7420476794450952, "learning_rate": 1.750130485627296e-06, "loss": 0.037862396240234374, "step": 147290 }, { "epoch": 1.2736163111430079, "grad_norm": 21.705250567016446, "learning_rate": 1.749945278906113e-06, "loss": 0.05748147964477539, "step": 147295 }, { "epoch": 1.2736595446645511, "grad_norm": 0.48619257995655024, "learning_rate": 1.749760077950072e-06, "loss": 0.08872432708740234, "step": 147300 }, { "epoch": 1.2737027781860943, "grad_norm": 1.0814477529508182, "learning_rate": 1.7495748827600273e-06, "loss": 0.14722614288330077, "step": 147305 }, { "epoch": 1.2737460117076376, "grad_norm": 0.43684389793667133, "learning_rate": 1.749389693336831e-06, "loss": 0.26506500244140624, "step": 147310 }, { "epoch": 1.2737892452291808, "grad_norm": 15.733012596470788, "learning_rate": 1.7492045096813407e-06, "loss": 0.05047149658203125, "step": 147315 }, { "epoch": 1.273832478750724, "grad_norm": 3.621983451171886, "learning_rate": 1.7490193317944072e-06, "loss": 0.026491928100585937, "step": 147320 }, { "epoch": 1.2738757122722675, "grad_norm": 1.809673379994302, "learning_rate": 1.7488341596768871e-06, "loss": 0.025208282470703124, "step": 147325 }, { "epoch": 1.2739189457938107, "grad_norm": 1.5943661950154104, "learning_rate": 1.7486489933296333e-06, "loss": 0.06102294921875, "step": 147330 }, { "epoch": 1.273962179315354, "grad_norm": 0.5498910890098443, "learning_rate": 1.7484638327535e-06, "loss": 0.03853874206542969, "step": 147335 }, { "epoch": 1.2740054128368972, "grad_norm": 1.5417943226540376, "learning_rate": 1.7482786779493409e-06, "loss": 0.030955123901367187, "step": 147340 }, { "epoch": 1.2740486463584406, "grad_norm": 1.041221875367025, "learning_rate": 1.7480935289180097e-06, "loss": 0.030389404296875, "step": 147345 }, { "epoch": 1.2740918798799838, "grad_norm": 3.1335042242852484, "learning_rate": 1.7479083856603594e-06, "loss": 0.22273788452148438, "step": 147350 }, { "epoch": 1.274135113401527, "grad_norm": 0.9289853483398514, "learning_rate": 1.747723248177246e-06, "loss": 0.10132923126220703, "step": 147355 }, { "epoch": 1.2741783469230703, "grad_norm": 0.25725433444190154, "learning_rate": 1.7475381164695228e-06, "loss": 0.01949596405029297, "step": 147360 }, { "epoch": 1.2742215804446135, "grad_norm": 2.784709796932725, "learning_rate": 1.7473529905380429e-06, "loss": 0.07273406982421875, "step": 147365 }, { "epoch": 1.2742648139661568, "grad_norm": 1.0358427642480323, "learning_rate": 1.7471678703836592e-06, "loss": 0.016481399536132812, "step": 147370 }, { "epoch": 1.2743080474877, "grad_norm": 4.735880596984376, "learning_rate": 1.7469827560072278e-06, "loss": 0.03046722412109375, "step": 147375 }, { "epoch": 1.2743512810092432, "grad_norm": 5.409846659756351, "learning_rate": 1.746797647409601e-06, "loss": 0.0485076904296875, "step": 147380 }, { "epoch": 1.2743945145307867, "grad_norm": 1.4291860461484065, "learning_rate": 1.7466125445916317e-06, "loss": 0.01501312255859375, "step": 147385 }, { "epoch": 1.27443774805233, "grad_norm": 4.017400315940309, "learning_rate": 1.7464274475541758e-06, "loss": 0.0245513916015625, "step": 147390 }, { "epoch": 1.2744809815738731, "grad_norm": 1.4922853622200372, "learning_rate": 1.746242356298085e-06, "loss": 0.03807144165039063, "step": 147395 }, { "epoch": 1.2745242150954164, "grad_norm": 44.78815775082465, "learning_rate": 1.7460572708242146e-06, "loss": 0.24233970642089844, "step": 147400 }, { "epoch": 1.2745674486169596, "grad_norm": 34.79079636068095, "learning_rate": 1.7458721911334171e-06, "loss": 0.244244384765625, "step": 147405 }, { "epoch": 1.274610682138503, "grad_norm": 25.40565187606553, "learning_rate": 1.7456871172265462e-06, "loss": 0.16433334350585938, "step": 147410 }, { "epoch": 1.2746539156600463, "grad_norm": 21.02895864668925, "learning_rate": 1.7455020491044542e-06, "loss": 0.2944000244140625, "step": 147415 }, { "epoch": 1.2746971491815895, "grad_norm": 5.24572655600625, "learning_rate": 1.7453169867679972e-06, "loss": 0.033930206298828126, "step": 147420 }, { "epoch": 1.2747403827031327, "grad_norm": 10.868299810840298, "learning_rate": 1.7451319302180274e-06, "loss": 0.15138587951660157, "step": 147425 }, { "epoch": 1.274783616224676, "grad_norm": 1.4138961685174343, "learning_rate": 1.7449468794553966e-06, "loss": 0.06172637939453125, "step": 147430 }, { "epoch": 1.2748268497462192, "grad_norm": 10.385168916239516, "learning_rate": 1.7447618344809617e-06, "loss": 0.089300537109375, "step": 147435 }, { "epoch": 1.2748700832677624, "grad_norm": 2.2371411915120683, "learning_rate": 1.7445767952955738e-06, "loss": 0.020928955078125, "step": 147440 }, { "epoch": 1.2749133167893056, "grad_norm": 1.7787451227019915, "learning_rate": 1.7443917619000863e-06, "loss": 0.03491363525390625, "step": 147445 }, { "epoch": 1.274956550310849, "grad_norm": 3.2933645418044595, "learning_rate": 1.7442067342953534e-06, "loss": 0.01978015899658203, "step": 147450 }, { "epoch": 1.2749997838323923, "grad_norm": 7.281655497765242, "learning_rate": 1.7440217124822287e-06, "loss": 0.07226448059082032, "step": 147455 }, { "epoch": 1.2750430173539355, "grad_norm": 0.8401171177642601, "learning_rate": 1.7438366964615646e-06, "loss": 0.0490447998046875, "step": 147460 }, { "epoch": 1.2750862508754788, "grad_norm": 4.166209801107811, "learning_rate": 1.7436516862342144e-06, "loss": 0.018527984619140625, "step": 147465 }, { "epoch": 1.275129484397022, "grad_norm": 0.24300105888906756, "learning_rate": 1.7434666818010321e-06, "loss": 0.017159271240234374, "step": 147470 }, { "epoch": 1.2751727179185655, "grad_norm": 0.8517597322359146, "learning_rate": 1.74328168316287e-06, "loss": 0.058310317993164065, "step": 147475 }, { "epoch": 1.2752159514401087, "grad_norm": 0.7204321875563432, "learning_rate": 1.7430966903205814e-06, "loss": 0.00841522216796875, "step": 147480 }, { "epoch": 1.275259184961652, "grad_norm": 1.2076514959346565, "learning_rate": 1.742911703275019e-06, "loss": 0.043450927734375, "step": 147485 }, { "epoch": 1.2753024184831951, "grad_norm": 8.595461681398843, "learning_rate": 1.7427267220270381e-06, "loss": 0.08821182250976563, "step": 147490 }, { "epoch": 1.2753456520047384, "grad_norm": 2.3234622217293284, "learning_rate": 1.7425417465774906e-06, "loss": 0.09036846160888672, "step": 147495 }, { "epoch": 1.2753888855262816, "grad_norm": 5.400075526692265, "learning_rate": 1.7423567769272291e-06, "loss": 0.03788681030273437, "step": 147500 }, { "epoch": 1.2754321190478248, "grad_norm": 3.467142212335881, "learning_rate": 1.7421718130771077e-06, "loss": 0.04729537963867188, "step": 147505 }, { "epoch": 1.275475352569368, "grad_norm": 24.513448855764214, "learning_rate": 1.7419868550279767e-06, "loss": 0.09659347534179688, "step": 147510 }, { "epoch": 1.2755185860909115, "grad_norm": 10.082799164766108, "learning_rate": 1.741801902780693e-06, "loss": 0.0900634765625, "step": 147515 }, { "epoch": 1.2755618196124547, "grad_norm": 0.17194193494065615, "learning_rate": 1.7416169563361076e-06, "loss": 0.10961074829101562, "step": 147520 }, { "epoch": 1.275605053133998, "grad_norm": 6.030331521173895, "learning_rate": 1.7414320156950735e-06, "loss": 0.0472991943359375, "step": 147525 }, { "epoch": 1.2756482866555412, "grad_norm": 11.56816518184554, "learning_rate": 1.741247080858444e-06, "loss": 0.32183074951171875, "step": 147530 }, { "epoch": 1.2756915201770844, "grad_norm": 23.943395434362248, "learning_rate": 1.74106215182707e-06, "loss": 0.038619041442871094, "step": 147535 }, { "epoch": 1.2757347536986279, "grad_norm": 4.250974259853588, "learning_rate": 1.7408772286018077e-06, "loss": 0.06263179779052734, "step": 147540 }, { "epoch": 1.275777987220171, "grad_norm": 0.7536825739906332, "learning_rate": 1.7406923111835074e-06, "loss": 0.02358856201171875, "step": 147545 }, { "epoch": 1.2758212207417143, "grad_norm": 1.2258501158128041, "learning_rate": 1.7405073995730235e-06, "loss": 0.054486846923828124, "step": 147550 }, { "epoch": 1.2758644542632576, "grad_norm": 6.837515892572705, "learning_rate": 1.7403224937712085e-06, "loss": 0.03953857421875, "step": 147555 }, { "epoch": 1.2759076877848008, "grad_norm": 0.9123728405049559, "learning_rate": 1.7401375937789147e-06, "loss": 0.0178070068359375, "step": 147560 }, { "epoch": 1.275950921306344, "grad_norm": 4.132459892042112, "learning_rate": 1.7399526995969953e-06, "loss": 0.05679779052734375, "step": 147565 }, { "epoch": 1.2759941548278873, "grad_norm": 0.17204907234567493, "learning_rate": 1.7397678112263025e-06, "loss": 0.06262969970703125, "step": 147570 }, { "epoch": 1.2760373883494305, "grad_norm": 0.42028781838834534, "learning_rate": 1.7395829286676879e-06, "loss": 0.01669921875, "step": 147575 }, { "epoch": 1.276080621870974, "grad_norm": 2.411477489499507, "learning_rate": 1.7393980519220066e-06, "loss": 0.24878959655761718, "step": 147580 }, { "epoch": 1.2761238553925172, "grad_norm": 2.8172980065225994, "learning_rate": 1.7392131809901103e-06, "loss": 0.0209686279296875, "step": 147585 }, { "epoch": 1.2761670889140604, "grad_norm": 1.4219651519164997, "learning_rate": 1.7390283158728515e-06, "loss": 0.006448078155517578, "step": 147590 }, { "epoch": 1.2762103224356036, "grad_norm": 43.642218089801624, "learning_rate": 1.7388434565710813e-06, "loss": 0.20093135833740233, "step": 147595 }, { "epoch": 1.276253555957147, "grad_norm": 27.97553702172424, "learning_rate": 1.7386586030856552e-06, "loss": 0.08603286743164062, "step": 147600 }, { "epoch": 1.2762967894786903, "grad_norm": 9.9119516644393, "learning_rate": 1.7384737554174226e-06, "loss": 0.22733116149902344, "step": 147605 }, { "epoch": 1.2763400230002335, "grad_norm": 3.1209429987023336, "learning_rate": 1.738288913567239e-06, "loss": 0.06884498596191406, "step": 147610 }, { "epoch": 1.2763832565217768, "grad_norm": 30.486916730767025, "learning_rate": 1.7381040775359554e-06, "loss": 0.1955799102783203, "step": 147615 }, { "epoch": 1.27642649004332, "grad_norm": 0.28876956415294697, "learning_rate": 1.7379192473244242e-06, "loss": 0.01645355224609375, "step": 147620 }, { "epoch": 1.2764697235648632, "grad_norm": 0.49537314101985386, "learning_rate": 1.7377344229334975e-06, "loss": 0.035167694091796875, "step": 147625 }, { "epoch": 1.2765129570864064, "grad_norm": 4.448912057242801, "learning_rate": 1.737549604364029e-06, "loss": 0.2906646728515625, "step": 147630 }, { "epoch": 1.2765561906079497, "grad_norm": 2.3464022618545246, "learning_rate": 1.7373647916168695e-06, "loss": 0.033161163330078125, "step": 147635 }, { "epoch": 1.2765994241294931, "grad_norm": 0.20809489200262377, "learning_rate": 1.737179984692871e-06, "loss": 0.015103912353515625, "step": 147640 }, { "epoch": 1.2766426576510363, "grad_norm": 0.16683079957660665, "learning_rate": 1.7369951835928882e-06, "loss": 0.013175582885742188, "step": 147645 }, { "epoch": 1.2766858911725796, "grad_norm": 34.068231527410106, "learning_rate": 1.7368103883177716e-06, "loss": 0.07843017578125, "step": 147650 }, { "epoch": 1.2767291246941228, "grad_norm": 0.32836938037565877, "learning_rate": 1.736625598868373e-06, "loss": 0.04041900634765625, "step": 147655 }, { "epoch": 1.276772358215666, "grad_norm": 0.9227868492776349, "learning_rate": 1.7364408152455467e-06, "loss": 0.05728302001953125, "step": 147660 }, { "epoch": 1.2768155917372095, "grad_norm": 15.19626079167544, "learning_rate": 1.7362560374501435e-06, "loss": 0.07416610717773438, "step": 147665 }, { "epoch": 1.2768588252587527, "grad_norm": 0.08122335112976549, "learning_rate": 1.7360712654830144e-06, "loss": 0.1821868896484375, "step": 147670 }, { "epoch": 1.276902058780296, "grad_norm": 8.818814441002473, "learning_rate": 1.7358864993450148e-06, "loss": 0.09079055786132813, "step": 147675 }, { "epoch": 1.2769452923018392, "grad_norm": 0.557533977731093, "learning_rate": 1.7357017390369944e-06, "loss": 0.042176437377929685, "step": 147680 }, { "epoch": 1.2769885258233824, "grad_norm": 54.54764858252828, "learning_rate": 1.735516984559806e-06, "loss": 0.07818088531494141, "step": 147685 }, { "epoch": 1.2770317593449256, "grad_norm": 11.602172848959642, "learning_rate": 1.7353322359143016e-06, "loss": 0.03804779052734375, "step": 147690 }, { "epoch": 1.2770749928664689, "grad_norm": 1.6675622841776967, "learning_rate": 1.7351474931013332e-06, "loss": 0.026381683349609376, "step": 147695 }, { "epoch": 1.277118226388012, "grad_norm": 2.155988289783583, "learning_rate": 1.7349627561217518e-06, "loss": 0.026912117004394533, "step": 147700 }, { "epoch": 1.2771614599095555, "grad_norm": 14.940386786068617, "learning_rate": 1.7347780249764113e-06, "loss": 0.08371353149414062, "step": 147705 }, { "epoch": 1.2772046934310988, "grad_norm": 1.156776890649015, "learning_rate": 1.7345932996661618e-06, "loss": 0.13481063842773439, "step": 147710 }, { "epoch": 1.277247926952642, "grad_norm": 0.3752452778220673, "learning_rate": 1.7344085801918573e-06, "loss": 0.08085365295410156, "step": 147715 }, { "epoch": 1.2772911604741852, "grad_norm": 0.95142163989171, "learning_rate": 1.7342238665543484e-06, "loss": 0.46436080932617185, "step": 147720 }, { "epoch": 1.2773343939957285, "grad_norm": 11.00807265341361, "learning_rate": 1.7340391587544872e-06, "loss": 0.1048095703125, "step": 147725 }, { "epoch": 1.277377627517272, "grad_norm": 2.422524608772564, "learning_rate": 1.7338544567931259e-06, "loss": 0.061709022521972655, "step": 147730 }, { "epoch": 1.2774208610388151, "grad_norm": 1.3224036448431555, "learning_rate": 1.733669760671114e-06, "loss": 0.02398681640625, "step": 147735 }, { "epoch": 1.2774640945603584, "grad_norm": 16.824673610370755, "learning_rate": 1.7334850703893072e-06, "loss": 0.15431594848632812, "step": 147740 }, { "epoch": 1.2775073280819016, "grad_norm": 0.9061478252171842, "learning_rate": 1.7333003859485551e-06, "loss": 0.01493988037109375, "step": 147745 }, { "epoch": 1.2775505616034448, "grad_norm": 0.27040261883358924, "learning_rate": 1.73311570734971e-06, "loss": 0.02916259765625, "step": 147750 }, { "epoch": 1.277593795124988, "grad_norm": 0.09151052989080422, "learning_rate": 1.7329310345936226e-06, "loss": 0.036882781982421876, "step": 147755 }, { "epoch": 1.2776370286465313, "grad_norm": 1.7390839627757801, "learning_rate": 1.7327463676811446e-06, "loss": 0.05523109436035156, "step": 147760 }, { "epoch": 1.2776802621680745, "grad_norm": 0.36459950495664367, "learning_rate": 1.7325617066131296e-06, "loss": 0.143780517578125, "step": 147765 }, { "epoch": 1.277723495689618, "grad_norm": 23.193459372339127, "learning_rate": 1.7323770513904267e-06, "loss": 0.06172943115234375, "step": 147770 }, { "epoch": 1.2777667292111612, "grad_norm": 33.140021690414216, "learning_rate": 1.7321924020138902e-06, "loss": 0.2201385498046875, "step": 147775 }, { "epoch": 1.2778099627327044, "grad_norm": 0.6772326082398591, "learning_rate": 1.7320077584843702e-06, "loss": 0.13920097351074218, "step": 147780 }, { "epoch": 1.2778531962542476, "grad_norm": 6.732116413736642, "learning_rate": 1.7318231208027182e-06, "loss": 0.034820556640625, "step": 147785 }, { "epoch": 1.2778964297757909, "grad_norm": 0.04441229439225368, "learning_rate": 1.7316384889697862e-06, "loss": 0.041305923461914064, "step": 147790 }, { "epoch": 1.2779396632973343, "grad_norm": 4.630744842242504, "learning_rate": 1.7314538629864239e-06, "loss": 0.043950653076171874, "step": 147795 }, { "epoch": 1.2779828968188776, "grad_norm": 3.9606628739858087, "learning_rate": 1.7312692428534855e-06, "loss": 0.03337554931640625, "step": 147800 }, { "epoch": 1.2780261303404208, "grad_norm": 7.122218657493884, "learning_rate": 1.7310846285718215e-06, "loss": 0.11379165649414062, "step": 147805 }, { "epoch": 1.278069363861964, "grad_norm": 3.077340830614984, "learning_rate": 1.7309000201422832e-06, "loss": 0.09119949340820313, "step": 147810 }, { "epoch": 1.2781125973835072, "grad_norm": 2.2243473993701466, "learning_rate": 1.7307154175657214e-06, "loss": 0.01605224609375, "step": 147815 }, { "epoch": 1.2781558309050505, "grad_norm": 0.6953877815266389, "learning_rate": 1.730530820842987e-06, "loss": 0.03826065063476562, "step": 147820 }, { "epoch": 1.2781990644265937, "grad_norm": 0.07359269616405657, "learning_rate": 1.7303462299749335e-06, "loss": 0.025485992431640625, "step": 147825 }, { "epoch": 1.278242297948137, "grad_norm": 6.7170987953013075, "learning_rate": 1.73016164496241e-06, "loss": 0.21812896728515624, "step": 147830 }, { "epoch": 1.2782855314696804, "grad_norm": 0.6931291727858018, "learning_rate": 1.72997706580627e-06, "loss": 0.056623077392578124, "step": 147835 }, { "epoch": 1.2783287649912236, "grad_norm": 0.7355339102743716, "learning_rate": 1.7297924925073632e-06, "loss": 0.06914825439453125, "step": 147840 }, { "epoch": 1.2783719985127668, "grad_norm": 0.7683732535962553, "learning_rate": 1.729607925066541e-06, "loss": 0.018515777587890626, "step": 147845 }, { "epoch": 1.27841523203431, "grad_norm": 12.611728658732055, "learning_rate": 1.729423363484655e-06, "loss": 0.052899932861328124, "step": 147850 }, { "epoch": 1.2784584655558535, "grad_norm": 1.7227975182585744, "learning_rate": 1.7292388077625562e-06, "loss": 0.0627197265625, "step": 147855 }, { "epoch": 1.2785016990773967, "grad_norm": 44.130979772241034, "learning_rate": 1.729054257901095e-06, "loss": 0.0852935791015625, "step": 147860 }, { "epoch": 1.27854493259894, "grad_norm": 0.3808712249678803, "learning_rate": 1.728869713901124e-06, "loss": 0.06874237060546876, "step": 147865 }, { "epoch": 1.2785881661204832, "grad_norm": 5.667509489088291, "learning_rate": 1.7286851757634934e-06, "loss": 0.11079540252685546, "step": 147870 }, { "epoch": 1.2786313996420264, "grad_norm": 30.032770185983257, "learning_rate": 1.728500643489055e-06, "loss": 0.07596206665039062, "step": 147875 }, { "epoch": 1.2786746331635697, "grad_norm": 2.6732504025354284, "learning_rate": 1.7283161170786575e-06, "loss": 0.0708740234375, "step": 147880 }, { "epoch": 1.278717866685113, "grad_norm": 1.9912599210298692, "learning_rate": 1.728131596533155e-06, "loss": 0.031221771240234376, "step": 147885 }, { "epoch": 1.2787611002066561, "grad_norm": 2.4987594144689127, "learning_rate": 1.7279470818533963e-06, "loss": 0.14156227111816405, "step": 147890 }, { "epoch": 1.2788043337281996, "grad_norm": 2.3098663040379965, "learning_rate": 1.7277625730402341e-06, "loss": 0.014562225341796875, "step": 147895 }, { "epoch": 1.2788475672497428, "grad_norm": 0.1615703069338217, "learning_rate": 1.7275780700945188e-06, "loss": 0.043889617919921874, "step": 147900 }, { "epoch": 1.278890800771286, "grad_norm": 13.866679636466566, "learning_rate": 1.727393573017101e-06, "loss": 0.028326797485351562, "step": 147905 }, { "epoch": 1.2789340342928293, "grad_norm": 0.4182700097184476, "learning_rate": 1.7272090818088313e-06, "loss": 0.02396240234375, "step": 147910 }, { "epoch": 1.2789772678143725, "grad_norm": 0.49100546117723803, "learning_rate": 1.7270245964705613e-06, "loss": 0.036557579040527345, "step": 147915 }, { "epoch": 1.279020501335916, "grad_norm": 1.3219649959430597, "learning_rate": 1.726840117003141e-06, "loss": 0.08523788452148437, "step": 147920 }, { "epoch": 1.2790637348574592, "grad_norm": 7.549104401772849, "learning_rate": 1.7266556434074208e-06, "loss": 0.1423370361328125, "step": 147925 }, { "epoch": 1.2791069683790024, "grad_norm": 4.692268276945877, "learning_rate": 1.7264711756842528e-06, "loss": 0.057586669921875, "step": 147930 }, { "epoch": 1.2791502019005456, "grad_norm": 1.6457713037869295, "learning_rate": 1.7262867138344866e-06, "loss": 0.024729347229003905, "step": 147935 }, { "epoch": 1.2791934354220889, "grad_norm": 1.0070541088342624, "learning_rate": 1.7261022578589743e-06, "loss": 0.16134109497070312, "step": 147940 }, { "epoch": 1.279236668943632, "grad_norm": 1.2213003252643688, "learning_rate": 1.7259178077585663e-06, "loss": 0.0540283203125, "step": 147945 }, { "epoch": 1.2792799024651753, "grad_norm": 0.29045324472771855, "learning_rate": 1.7257333635341124e-06, "loss": 0.209552001953125, "step": 147950 }, { "epoch": 1.2793231359867185, "grad_norm": 0.9825442894262656, "learning_rate": 1.7255489251864624e-06, "loss": 0.07875213623046876, "step": 147955 }, { "epoch": 1.279366369508262, "grad_norm": 0.19021620672228273, "learning_rate": 1.7253644927164694e-06, "loss": 0.1052642822265625, "step": 147960 }, { "epoch": 1.2794096030298052, "grad_norm": 0.054491721847183386, "learning_rate": 1.725180066124983e-06, "loss": 0.21148643493652344, "step": 147965 }, { "epoch": 1.2794528365513484, "grad_norm": 1.8308410196356881, "learning_rate": 1.724995645412853e-06, "loss": 0.011740875244140626, "step": 147970 }, { "epoch": 1.2794960700728917, "grad_norm": 1.8168257666285328, "learning_rate": 1.724811230580931e-06, "loss": 0.10568351745605468, "step": 147975 }, { "epoch": 1.279539303594435, "grad_norm": 6.018785669981183, "learning_rate": 1.7246268216300665e-06, "loss": 0.08626556396484375, "step": 147980 }, { "epoch": 1.2795825371159784, "grad_norm": 2.331710638527123, "learning_rate": 1.7244424185611097e-06, "loss": 0.01761016845703125, "step": 147985 }, { "epoch": 1.2796257706375216, "grad_norm": 3.629187123007659, "learning_rate": 1.7242580213749123e-06, "loss": 0.06299209594726562, "step": 147990 }, { "epoch": 1.2796690041590648, "grad_norm": 10.684388792009303, "learning_rate": 1.7240736300723233e-06, "loss": 0.4866218566894531, "step": 147995 }, { "epoch": 1.279712237680608, "grad_norm": 1.513307983193962, "learning_rate": 1.7238892446541954e-06, "loss": 0.04851531982421875, "step": 148000 }, { "epoch": 1.2797554712021513, "grad_norm": 0.23841293680508965, "learning_rate": 1.7237048651213771e-06, "loss": 0.04012107849121094, "step": 148005 }, { "epoch": 1.2797987047236945, "grad_norm": 1.426640054916802, "learning_rate": 1.7235204914747194e-06, "loss": 0.11191596984863281, "step": 148010 }, { "epoch": 1.2798419382452377, "grad_norm": 2.188243708953122, "learning_rate": 1.7233361237150726e-06, "loss": 0.010261344909667968, "step": 148015 }, { "epoch": 1.279885171766781, "grad_norm": 4.749579040584393, "learning_rate": 1.7231517618432851e-06, "loss": 0.13408966064453126, "step": 148020 }, { "epoch": 1.2799284052883244, "grad_norm": 7.272595498398686, "learning_rate": 1.7229674058602103e-06, "loss": 0.03697967529296875, "step": 148025 }, { "epoch": 1.2799716388098676, "grad_norm": 2.0990699516120976, "learning_rate": 1.722783055766697e-06, "loss": 0.010650253295898438, "step": 148030 }, { "epoch": 1.2800148723314109, "grad_norm": 10.208645746067917, "learning_rate": 1.7225987115635955e-06, "loss": 0.16515331268310546, "step": 148035 }, { "epoch": 1.280058105852954, "grad_norm": 2.0243288612723154, "learning_rate": 1.7224143732517557e-06, "loss": 0.025939178466796876, "step": 148040 }, { "epoch": 1.2801013393744975, "grad_norm": 1.2199839423693593, "learning_rate": 1.7222300408320268e-06, "loss": 0.0417755126953125, "step": 148045 }, { "epoch": 1.2801445728960408, "grad_norm": 23.15440485419056, "learning_rate": 1.7220457143052616e-06, "loss": 0.05052947998046875, "step": 148050 }, { "epoch": 1.280187806417584, "grad_norm": 50.610057575442056, "learning_rate": 1.7218613936723068e-06, "loss": 0.15069046020507812, "step": 148055 }, { "epoch": 1.2802310399391272, "grad_norm": 9.173369553108046, "learning_rate": 1.7216770789340156e-06, "loss": 0.03535003662109375, "step": 148060 }, { "epoch": 1.2802742734606705, "grad_norm": 1.64265410265683, "learning_rate": 1.7214927700912373e-06, "loss": 0.035255813598632814, "step": 148065 }, { "epoch": 1.2803175069822137, "grad_norm": 0.37436595956966706, "learning_rate": 1.7213084671448209e-06, "loss": 0.087835693359375, "step": 148070 }, { "epoch": 1.280360740503757, "grad_norm": 1.3762952653142215, "learning_rate": 1.7211241700956172e-06, "loss": 0.07216014862060546, "step": 148075 }, { "epoch": 1.2804039740253002, "grad_norm": 0.8101018841005895, "learning_rate": 1.7209398789444758e-06, "loss": 0.033164596557617186, "step": 148080 }, { "epoch": 1.2804472075468436, "grad_norm": 30.944836822963993, "learning_rate": 1.7207555936922452e-06, "loss": 0.4976593017578125, "step": 148085 }, { "epoch": 1.2804904410683868, "grad_norm": 7.341476931771787, "learning_rate": 1.720571314339778e-06, "loss": 0.028907012939453126, "step": 148090 }, { "epoch": 1.28053367458993, "grad_norm": 0.3630527132402047, "learning_rate": 1.720387040887923e-06, "loss": 0.3985755920410156, "step": 148095 }, { "epoch": 1.2805769081114733, "grad_norm": 0.8047561431597057, "learning_rate": 1.7202027733375294e-06, "loss": 0.17510757446289063, "step": 148100 }, { "epoch": 1.2806201416330165, "grad_norm": 48.15200187339019, "learning_rate": 1.720018511689447e-06, "loss": 0.37434921264648435, "step": 148105 }, { "epoch": 1.28066337515456, "grad_norm": 0.7966604700434959, "learning_rate": 1.7198342559445267e-06, "loss": 0.096044921875, "step": 148110 }, { "epoch": 1.2807066086761032, "grad_norm": 1.3101340842293219, "learning_rate": 1.719650006103617e-06, "loss": 0.01232147216796875, "step": 148115 }, { "epoch": 1.2807498421976464, "grad_norm": 0.7629071905240137, "learning_rate": 1.7194657621675693e-06, "loss": 0.01981201171875, "step": 148120 }, { "epoch": 1.2807930757191897, "grad_norm": 4.090707321157793, "learning_rate": 1.7192815241372328e-06, "loss": 0.014056396484375, "step": 148125 }, { "epoch": 1.2808363092407329, "grad_norm": 2.0362065856786775, "learning_rate": 1.7190972920134563e-06, "loss": 0.1854156494140625, "step": 148130 }, { "epoch": 1.2808795427622761, "grad_norm": 72.71386375919948, "learning_rate": 1.7189130657970898e-06, "loss": 0.177227783203125, "step": 148135 }, { "epoch": 1.2809227762838193, "grad_norm": 1.8847284938351134, "learning_rate": 1.7187288454889833e-06, "loss": 0.0471954345703125, "step": 148140 }, { "epoch": 1.2809660098053626, "grad_norm": 1.4124338795447549, "learning_rate": 1.7185446310899851e-06, "loss": 0.04890594482421875, "step": 148145 }, { "epoch": 1.281009243326906, "grad_norm": 12.290338874867501, "learning_rate": 1.7183604226009465e-06, "loss": 0.09947509765625, "step": 148150 }, { "epoch": 1.2810524768484493, "grad_norm": 0.7149035084774946, "learning_rate": 1.7181762200227166e-06, "loss": 0.20793609619140624, "step": 148155 }, { "epoch": 1.2810957103699925, "grad_norm": 2.877366270722861, "learning_rate": 1.717992023356144e-06, "loss": 0.19728164672851561, "step": 148160 }, { "epoch": 1.2811389438915357, "grad_norm": 2.620761139175806, "learning_rate": 1.7178078326020796e-06, "loss": 0.1717905044555664, "step": 148165 }, { "epoch": 1.281182177413079, "grad_norm": 6.466441225026648, "learning_rate": 1.717623647761372e-06, "loss": 0.02959728240966797, "step": 148170 }, { "epoch": 1.2812254109346224, "grad_norm": 27.560948328344203, "learning_rate": 1.7174394688348711e-06, "loss": 0.04285507202148438, "step": 148175 }, { "epoch": 1.2812686444561656, "grad_norm": 2.665483530018098, "learning_rate": 1.7172552958234249e-06, "loss": 0.028482818603515626, "step": 148180 }, { "epoch": 1.2813118779777088, "grad_norm": 57.33307250516694, "learning_rate": 1.717071128727885e-06, "loss": 0.20663604736328126, "step": 148185 }, { "epoch": 1.281355111499252, "grad_norm": 0.4655772963448537, "learning_rate": 1.7168869675491e-06, "loss": 0.008281898498535157, "step": 148190 }, { "epoch": 1.2813983450207953, "grad_norm": 0.9762754902138657, "learning_rate": 1.7167028122879186e-06, "loss": 0.029550933837890626, "step": 148195 }, { "epoch": 1.2814415785423385, "grad_norm": 25.120404417787235, "learning_rate": 1.7165186629451909e-06, "loss": 0.0929962158203125, "step": 148200 }, { "epoch": 1.2814848120638818, "grad_norm": 11.318131877231686, "learning_rate": 1.7163345195217653e-06, "loss": 0.12601318359375, "step": 148205 }, { "epoch": 1.281528045585425, "grad_norm": 0.5022779862909547, "learning_rate": 1.71615038201849e-06, "loss": 0.09104156494140625, "step": 148210 }, { "epoch": 1.2815712791069684, "grad_norm": 0.6612046570309233, "learning_rate": 1.7159662504362172e-06, "loss": 0.14735336303710939, "step": 148215 }, { "epoch": 1.2816145126285117, "grad_norm": 1.3940651572581624, "learning_rate": 1.7157821247757934e-06, "loss": 0.04836883544921875, "step": 148220 }, { "epoch": 1.281657746150055, "grad_norm": 48.82819670474331, "learning_rate": 1.7155980050380705e-06, "loss": 0.15528478622436523, "step": 148225 }, { "epoch": 1.2817009796715981, "grad_norm": 19.852218651114917, "learning_rate": 1.7154138912238954e-06, "loss": 0.12448806762695312, "step": 148230 }, { "epoch": 1.2817442131931414, "grad_norm": 0.6663580379201094, "learning_rate": 1.7152297833341185e-06, "loss": 0.23520336151123047, "step": 148235 }, { "epoch": 1.2817874467146848, "grad_norm": 0.18427642046836393, "learning_rate": 1.715045681369587e-06, "loss": 0.09947357177734376, "step": 148240 }, { "epoch": 1.281830680236228, "grad_norm": 7.985831895780795, "learning_rate": 1.7148615853311524e-06, "loss": 0.06663360595703124, "step": 148245 }, { "epoch": 1.2818739137577713, "grad_norm": 12.820814740163042, "learning_rate": 1.7146774952196624e-06, "loss": 0.06394805908203124, "step": 148250 }, { "epoch": 1.2819171472793145, "grad_norm": 1.2884407345834887, "learning_rate": 1.7144934110359667e-06, "loss": 0.031698989868164065, "step": 148255 }, { "epoch": 1.2819603808008577, "grad_norm": 0.18510594918116546, "learning_rate": 1.7143093327809136e-06, "loss": 0.020476913452148436, "step": 148260 }, { "epoch": 1.282003614322401, "grad_norm": 4.944963185085121, "learning_rate": 1.714125260455352e-06, "loss": 0.03122711181640625, "step": 148265 }, { "epoch": 1.2820468478439442, "grad_norm": 9.011080126830313, "learning_rate": 1.7139411940601302e-06, "loss": 0.051751708984375, "step": 148270 }, { "epoch": 1.2820900813654874, "grad_norm": 3.572883600094393, "learning_rate": 1.713757133596099e-06, "loss": 0.01618499755859375, "step": 148275 }, { "epoch": 1.2821333148870309, "grad_norm": 1.3220097777281128, "learning_rate": 1.7135730790641054e-06, "loss": 0.02560844421386719, "step": 148280 }, { "epoch": 1.282176548408574, "grad_norm": 1.4752547482575649, "learning_rate": 1.7133890304649995e-06, "loss": 0.02685127258300781, "step": 148285 }, { "epoch": 1.2822197819301173, "grad_norm": 0.8861772565473555, "learning_rate": 1.7132049877996306e-06, "loss": 0.0349365234375, "step": 148290 }, { "epoch": 1.2822630154516605, "grad_norm": 26.00146323765929, "learning_rate": 1.713020951068846e-06, "loss": 0.22557830810546875, "step": 148295 }, { "epoch": 1.282306248973204, "grad_norm": 7.072125082481713, "learning_rate": 1.7128369202734951e-06, "loss": 0.01141204833984375, "step": 148300 }, { "epoch": 1.2823494824947472, "grad_norm": 27.144408967942425, "learning_rate": 1.7126528954144257e-06, "loss": 0.17729110717773439, "step": 148305 }, { "epoch": 1.2823927160162905, "grad_norm": 40.09564582840965, "learning_rate": 1.7124688764924883e-06, "loss": 0.05080413818359375, "step": 148310 }, { "epoch": 1.2824359495378337, "grad_norm": 7.841926944370391, "learning_rate": 1.7122848635085308e-06, "loss": 0.022068214416503907, "step": 148315 }, { "epoch": 1.282479183059377, "grad_norm": 4.485635910123957, "learning_rate": 1.7121008564634016e-06, "loss": 0.0397064208984375, "step": 148320 }, { "epoch": 1.2825224165809201, "grad_norm": 20.081329195478794, "learning_rate": 1.7119168553579497e-06, "loss": 0.06788291931152343, "step": 148325 }, { "epoch": 1.2825656501024634, "grad_norm": 0.9805073420701629, "learning_rate": 1.711732860193022e-06, "loss": 0.07615203857421875, "step": 148330 }, { "epoch": 1.2826088836240066, "grad_norm": 6.109058328252734, "learning_rate": 1.71154887096947e-06, "loss": 0.09412193298339844, "step": 148335 }, { "epoch": 1.28265211714555, "grad_norm": 1.746480896382398, "learning_rate": 1.7113648876881393e-06, "loss": 0.14564743041992187, "step": 148340 }, { "epoch": 1.2826953506670933, "grad_norm": 0.9190627986371863, "learning_rate": 1.7111809103498812e-06, "loss": 0.0672271728515625, "step": 148345 }, { "epoch": 1.2827385841886365, "grad_norm": 0.517045453506322, "learning_rate": 1.7109969389555429e-06, "loss": 0.0621826171875, "step": 148350 }, { "epoch": 1.2827818177101797, "grad_norm": 0.22496484566216413, "learning_rate": 1.7108129735059727e-06, "loss": 0.123260498046875, "step": 148355 }, { "epoch": 1.282825051231723, "grad_norm": 0.8297116157726684, "learning_rate": 1.710629014002019e-06, "loss": 0.17778091430664061, "step": 148360 }, { "epoch": 1.2828682847532664, "grad_norm": 3.7677295798041395, "learning_rate": 1.7104450604445305e-06, "loss": 0.24073028564453125, "step": 148365 }, { "epoch": 1.2829115182748096, "grad_norm": 3.8444736900044965, "learning_rate": 1.7102611128343547e-06, "loss": 0.11122894287109375, "step": 148370 }, { "epoch": 1.2829547517963529, "grad_norm": 6.746445785434472, "learning_rate": 1.7100771711723414e-06, "loss": 0.04168243408203125, "step": 148375 }, { "epoch": 1.282997985317896, "grad_norm": 2.6573139705442763, "learning_rate": 1.709893235459338e-06, "loss": 0.06463623046875, "step": 148380 }, { "epoch": 1.2830412188394393, "grad_norm": 3.6879160997163125, "learning_rate": 1.709709305696192e-06, "loss": 0.023116302490234376, "step": 148385 }, { "epoch": 1.2830844523609826, "grad_norm": 15.023075121296658, "learning_rate": 1.7095253818837541e-06, "loss": 0.11783885955810547, "step": 148390 }, { "epoch": 1.2831276858825258, "grad_norm": 0.2161658176383311, "learning_rate": 1.7093414640228709e-06, "loss": 0.07639141082763672, "step": 148395 }, { "epoch": 1.283170919404069, "grad_norm": 21.31744115148167, "learning_rate": 1.70915755211439e-06, "loss": 0.06664600372314453, "step": 148400 }, { "epoch": 1.2832141529256125, "grad_norm": 2.919532616858341, "learning_rate": 1.7089736461591606e-06, "loss": 0.03166885375976562, "step": 148405 }, { "epoch": 1.2832573864471557, "grad_norm": 4.650927065713521, "learning_rate": 1.7087897461580315e-06, "loss": 0.36612930297851565, "step": 148410 }, { "epoch": 1.283300619968699, "grad_norm": 5.18748693284951, "learning_rate": 1.7086058521118502e-06, "loss": 0.06602630615234376, "step": 148415 }, { "epoch": 1.2833438534902422, "grad_norm": 0.177981683825863, "learning_rate": 1.708421964021464e-06, "loss": 0.035555267333984376, "step": 148420 }, { "epoch": 1.2833870870117854, "grad_norm": 11.398769963905046, "learning_rate": 1.7082380818877216e-06, "loss": 0.047317123413085936, "step": 148425 }, { "epoch": 1.2834303205333288, "grad_norm": 49.579772066040704, "learning_rate": 1.7080542057114715e-06, "loss": 0.15746421813964845, "step": 148430 }, { "epoch": 1.283473554054872, "grad_norm": 11.922181700264668, "learning_rate": 1.7078703354935598e-06, "loss": 0.05934410095214844, "step": 148435 }, { "epoch": 1.2835167875764153, "grad_norm": 0.40671867155156105, "learning_rate": 1.707686471234837e-06, "loss": 0.017750930786132813, "step": 148440 }, { "epoch": 1.2835600210979585, "grad_norm": 0.6923407215103377, "learning_rate": 1.7075026129361492e-06, "loss": 0.15013198852539061, "step": 148445 }, { "epoch": 1.2836032546195018, "grad_norm": 17.88892123734779, "learning_rate": 1.707318760598346e-06, "loss": 0.0649169921875, "step": 148450 }, { "epoch": 1.283646488141045, "grad_norm": 0.13835683549026107, "learning_rate": 1.7071349142222745e-06, "loss": 0.0870941162109375, "step": 148455 }, { "epoch": 1.2836897216625882, "grad_norm": 0.760776454623868, "learning_rate": 1.7069510738087828e-06, "loss": 0.06387100219726563, "step": 148460 }, { "epoch": 1.2837329551841314, "grad_norm": 2.752024392510488, "learning_rate": 1.7067672393587167e-06, "loss": 0.04010009765625, "step": 148465 }, { "epoch": 1.283776188705675, "grad_norm": 0.42222650180774884, "learning_rate": 1.7065834108729277e-06, "loss": 0.0210723876953125, "step": 148470 }, { "epoch": 1.2838194222272181, "grad_norm": 7.38231251678474, "learning_rate": 1.7063995883522614e-06, "loss": 0.06088333129882813, "step": 148475 }, { "epoch": 1.2838626557487613, "grad_norm": 4.98699581464746, "learning_rate": 1.706215771797566e-06, "loss": 0.09545440673828125, "step": 148480 }, { "epoch": 1.2839058892703046, "grad_norm": 1.3613920576495817, "learning_rate": 1.7060319612096888e-06, "loss": 0.06393585205078126, "step": 148485 }, { "epoch": 1.2839491227918478, "grad_norm": 6.575519742131866, "learning_rate": 1.7058481565894782e-06, "loss": 0.053738975524902345, "step": 148490 }, { "epoch": 1.2839923563133913, "grad_norm": 3.1594784325703085, "learning_rate": 1.70566435793778e-06, "loss": 0.0694915771484375, "step": 148495 }, { "epoch": 1.2840355898349345, "grad_norm": 42.92229945606733, "learning_rate": 1.705480565255445e-06, "loss": 0.2203521728515625, "step": 148500 }, { "epoch": 1.2840788233564777, "grad_norm": 3.618806776092431, "learning_rate": 1.705296778543318e-06, "loss": 0.045730972290039064, "step": 148505 }, { "epoch": 1.284122056878021, "grad_norm": 1.1493721355782511, "learning_rate": 1.7051129978022486e-06, "loss": 0.1684112548828125, "step": 148510 }, { "epoch": 1.2841652903995642, "grad_norm": 1.2377110199071049, "learning_rate": 1.7049292230330843e-06, "loss": 0.045407867431640624, "step": 148515 }, { "epoch": 1.2842085239211074, "grad_norm": 2.6820608061425015, "learning_rate": 1.7047454542366715e-06, "loss": 0.011588287353515626, "step": 148520 }, { "epoch": 1.2842517574426506, "grad_norm": 1.1276969709389533, "learning_rate": 1.7045616914138586e-06, "loss": 0.14581146240234374, "step": 148525 }, { "epoch": 1.2842949909641939, "grad_norm": 0.15743017246854576, "learning_rate": 1.7043779345654912e-06, "loss": 0.14048938751220702, "step": 148530 }, { "epoch": 1.2843382244857373, "grad_norm": 8.496347349806937, "learning_rate": 1.7041941836924197e-06, "loss": 0.06856231689453125, "step": 148535 }, { "epoch": 1.2843814580072805, "grad_norm": 0.6321864049799717, "learning_rate": 1.7040104387954898e-06, "loss": 0.06378517150878907, "step": 148540 }, { "epoch": 1.2844246915288238, "grad_norm": 21.597921096731373, "learning_rate": 1.7038266998755494e-06, "loss": 0.07779541015625, "step": 148545 }, { "epoch": 1.284467925050367, "grad_norm": 2.171806043879562, "learning_rate": 1.703642966933446e-06, "loss": 0.0185821533203125, "step": 148550 }, { "epoch": 1.2845111585719104, "grad_norm": 0.15150596541396213, "learning_rate": 1.703459239970025e-06, "loss": 0.007854461669921875, "step": 148555 }, { "epoch": 1.2845543920934537, "grad_norm": 1.7763576788192657, "learning_rate": 1.7032755189861359e-06, "loss": 0.031976318359375, "step": 148560 }, { "epoch": 1.284597625614997, "grad_norm": 1.3070613739572134, "learning_rate": 1.7030918039826265e-06, "loss": 0.03934459686279297, "step": 148565 }, { "epoch": 1.2846408591365401, "grad_norm": 12.822086032526945, "learning_rate": 1.702908094960343e-06, "loss": 0.10889816284179688, "step": 148570 }, { "epoch": 1.2846840926580834, "grad_norm": 15.024288780742621, "learning_rate": 1.702724391920133e-06, "loss": 0.11886749267578126, "step": 148575 }, { "epoch": 1.2847273261796266, "grad_norm": 43.16936712553501, "learning_rate": 1.7025406948628433e-06, "loss": 0.32008895874023435, "step": 148580 }, { "epoch": 1.2847705597011698, "grad_norm": 2.187561817372397, "learning_rate": 1.7023570037893212e-06, "loss": 0.016852188110351562, "step": 148585 }, { "epoch": 1.284813793222713, "grad_norm": 3.473102556434877, "learning_rate": 1.7021733187004145e-06, "loss": 0.197857666015625, "step": 148590 }, { "epoch": 1.2848570267442565, "grad_norm": 1.4355767809062623, "learning_rate": 1.701989639596968e-06, "loss": 0.3487640380859375, "step": 148595 }, { "epoch": 1.2849002602657997, "grad_norm": 0.9369219413224885, "learning_rate": 1.701805966479832e-06, "loss": 0.04546051025390625, "step": 148600 }, { "epoch": 1.284943493787343, "grad_norm": 1.5414528620500614, "learning_rate": 1.7016222993498518e-06, "loss": 0.063262939453125, "step": 148605 }, { "epoch": 1.2849867273088862, "grad_norm": 0.32700477255365573, "learning_rate": 1.701438638207874e-06, "loss": 0.13437576293945314, "step": 148610 }, { "epoch": 1.2850299608304294, "grad_norm": 0.09633597037299788, "learning_rate": 1.7012549830547478e-06, "loss": 0.012936687469482422, "step": 148615 }, { "epoch": 1.2850731943519729, "grad_norm": 0.14383305661736404, "learning_rate": 1.7010713338913184e-06, "loss": 0.029950714111328124, "step": 148620 }, { "epoch": 1.285116427873516, "grad_norm": 1.3294497457019323, "learning_rate": 1.7008876907184325e-06, "loss": 0.0752685546875, "step": 148625 }, { "epoch": 1.2851596613950593, "grad_norm": 8.806770572654163, "learning_rate": 1.7007040535369387e-06, "loss": 0.03574676513671875, "step": 148630 }, { "epoch": 1.2852028949166026, "grad_norm": 0.4063562801648468, "learning_rate": 1.700520422347683e-06, "loss": 0.03842926025390625, "step": 148635 }, { "epoch": 1.2852461284381458, "grad_norm": 0.7910125998231377, "learning_rate": 1.7003367971515124e-06, "loss": 0.03632621765136719, "step": 148640 }, { "epoch": 1.285289361959689, "grad_norm": 7.244544198511873, "learning_rate": 1.7001531779492733e-06, "loss": 0.052434539794921874, "step": 148645 }, { "epoch": 1.2853325954812322, "grad_norm": 0.038928250344395524, "learning_rate": 1.6999695647418136e-06, "loss": 0.0023279190063476562, "step": 148650 }, { "epoch": 1.2853758290027755, "grad_norm": 8.77574977690387, "learning_rate": 1.6997859575299775e-06, "loss": 0.080126953125, "step": 148655 }, { "epoch": 1.285419062524319, "grad_norm": 0.1768914887032816, "learning_rate": 1.6996023563146154e-06, "loss": 0.08459625244140626, "step": 148660 }, { "epoch": 1.2854622960458622, "grad_norm": 0.23006330811920456, "learning_rate": 1.699418761096572e-06, "loss": 0.12232208251953125, "step": 148665 }, { "epoch": 1.2855055295674054, "grad_norm": 2.4316986404728005, "learning_rate": 1.6992351718766933e-06, "loss": 0.01812896728515625, "step": 148670 }, { "epoch": 1.2855487630889486, "grad_norm": 0.34342279494579003, "learning_rate": 1.699051588655828e-06, "loss": 0.13677978515625, "step": 148675 }, { "epoch": 1.2855919966104918, "grad_norm": 6.93242051836567, "learning_rate": 1.698868011434822e-06, "loss": 0.03961315155029297, "step": 148680 }, { "epoch": 1.2856352301320353, "grad_norm": 0.8257843307742095, "learning_rate": 1.6986844402145217e-06, "loss": 0.07495288848876953, "step": 148685 }, { "epoch": 1.2856784636535785, "grad_norm": 22.924248431862196, "learning_rate": 1.6985008749957727e-06, "loss": 0.1233642578125, "step": 148690 }, { "epoch": 1.2857216971751217, "grad_norm": 9.319587630775876, "learning_rate": 1.6983173157794238e-06, "loss": 0.16894264221191407, "step": 148695 }, { "epoch": 1.285764930696665, "grad_norm": 0.9000897280373078, "learning_rate": 1.6981337625663203e-06, "loss": 0.034859085083007814, "step": 148700 }, { "epoch": 1.2858081642182082, "grad_norm": 0.83093045483424, "learning_rate": 1.6979502153573092e-06, "loss": 0.3031044006347656, "step": 148705 }, { "epoch": 1.2858513977397514, "grad_norm": 3.3132442858572033, "learning_rate": 1.6977666741532364e-06, "loss": 0.06202850341796875, "step": 148710 }, { "epoch": 1.2858946312612947, "grad_norm": 6.025149119430935, "learning_rate": 1.6975831389549488e-06, "loss": 0.060535621643066403, "step": 148715 }, { "epoch": 1.285937864782838, "grad_norm": 2.437416413268045, "learning_rate": 1.6973996097632913e-06, "loss": 0.46062240600585935, "step": 148720 }, { "epoch": 1.2859810983043813, "grad_norm": 1.331413354699273, "learning_rate": 1.6972160865791128e-06, "loss": 0.010544109344482421, "step": 148725 }, { "epoch": 1.2860243318259246, "grad_norm": 0.6242089279039476, "learning_rate": 1.6970325694032575e-06, "loss": 0.038003158569335935, "step": 148730 }, { "epoch": 1.2860675653474678, "grad_norm": 6.471487616009842, "learning_rate": 1.696849058236574e-06, "loss": 0.03370399475097656, "step": 148735 }, { "epoch": 1.286110798869011, "grad_norm": 5.558826548809309, "learning_rate": 1.6966655530799077e-06, "loss": 0.04246864318847656, "step": 148740 }, { "epoch": 1.2861540323905543, "grad_norm": 1.1015125484046249, "learning_rate": 1.6964820539341043e-06, "loss": 0.09539451599121093, "step": 148745 }, { "epoch": 1.2861972659120977, "grad_norm": 0.13257473694955896, "learning_rate": 1.69629856080001e-06, "loss": 0.015747833251953124, "step": 148750 }, { "epoch": 1.286240499433641, "grad_norm": 1.377154320669182, "learning_rate": 1.6961150736784722e-06, "loss": 0.0945831298828125, "step": 148755 }, { "epoch": 1.2862837329551842, "grad_norm": 0.17874581001161802, "learning_rate": 1.6959315925703367e-06, "loss": 0.08807258605957032, "step": 148760 }, { "epoch": 1.2863269664767274, "grad_norm": 5.5926904605446115, "learning_rate": 1.6957481174764491e-06, "loss": 0.13857498168945312, "step": 148765 }, { "epoch": 1.2863701999982706, "grad_norm": 0.11704007376437865, "learning_rate": 1.6955646483976563e-06, "loss": 0.14827632904052734, "step": 148770 }, { "epoch": 1.2864134335198139, "grad_norm": 19.657960144445052, "learning_rate": 1.6953811853348026e-06, "loss": 0.1505290985107422, "step": 148775 }, { "epoch": 1.286456667041357, "grad_norm": 0.04714069244963588, "learning_rate": 1.6951977282887373e-06, "loss": 0.0417266845703125, "step": 148780 }, { "epoch": 1.2864999005629005, "grad_norm": 0.37029523679162946, "learning_rate": 1.6950142772603033e-06, "loss": 0.09144134521484375, "step": 148785 }, { "epoch": 1.2865431340844438, "grad_norm": 2.178994855810359, "learning_rate": 1.6948308322503492e-06, "loss": 0.08624706268310547, "step": 148790 }, { "epoch": 1.286586367605987, "grad_norm": 1.3569773718950267, "learning_rate": 1.69464739325972e-06, "loss": 0.044802093505859376, "step": 148795 }, { "epoch": 1.2866296011275302, "grad_norm": 21.031562815176866, "learning_rate": 1.6944639602892615e-06, "loss": 0.07596511840820312, "step": 148800 }, { "epoch": 1.2866728346490734, "grad_norm": 1.0783821786006733, "learning_rate": 1.6942805333398201e-06, "loss": 0.021516990661621094, "step": 148805 }, { "epoch": 1.286716068170617, "grad_norm": 4.282236474502169, "learning_rate": 1.6940971124122417e-06, "loss": 0.12801380157470704, "step": 148810 }, { "epoch": 1.2867593016921601, "grad_norm": 0.03717739676333863, "learning_rate": 1.6939136975073706e-06, "loss": 0.03056678771972656, "step": 148815 }, { "epoch": 1.2868025352137034, "grad_norm": 0.35261414778382943, "learning_rate": 1.6937302886260553e-06, "loss": 0.0101898193359375, "step": 148820 }, { "epoch": 1.2868457687352466, "grad_norm": 0.14558681386301836, "learning_rate": 1.6935468857691405e-06, "loss": 0.3416751861572266, "step": 148825 }, { "epoch": 1.2868890022567898, "grad_norm": 4.361701313429652, "learning_rate": 1.693363488937472e-06, "loss": 0.01457061767578125, "step": 148830 }, { "epoch": 1.286932235778333, "grad_norm": 1.7532573471644006, "learning_rate": 1.6931800981318946e-06, "loss": 0.02012939453125, "step": 148835 }, { "epoch": 1.2869754692998763, "grad_norm": 0.6112448575320327, "learning_rate": 1.6929967133532563e-06, "loss": 0.13876876831054688, "step": 148840 }, { "epoch": 1.2870187028214195, "grad_norm": 6.313733693777752, "learning_rate": 1.6928133346024003e-06, "loss": 0.042083740234375, "step": 148845 }, { "epoch": 1.287061936342963, "grad_norm": 0.05116534218976703, "learning_rate": 1.692629961880175e-06, "loss": 0.1266172409057617, "step": 148850 }, { "epoch": 1.2871051698645062, "grad_norm": 0.7222042663853057, "learning_rate": 1.6924465951874247e-06, "loss": 0.11449127197265625, "step": 148855 }, { "epoch": 1.2871484033860494, "grad_norm": 0.2473788252457964, "learning_rate": 1.6922632345249954e-06, "loss": 0.02599945068359375, "step": 148860 }, { "epoch": 1.2871916369075926, "grad_norm": 35.61968901107368, "learning_rate": 1.6920798798937324e-06, "loss": 0.1902599334716797, "step": 148865 }, { "epoch": 1.2872348704291359, "grad_norm": 5.4241573340691795, "learning_rate": 1.6918965312944815e-06, "loss": 0.020737457275390624, "step": 148870 }, { "epoch": 1.2872781039506793, "grad_norm": 0.5748417184930266, "learning_rate": 1.691713188728088e-06, "loss": 0.029807281494140626, "step": 148875 }, { "epoch": 1.2873213374722225, "grad_norm": 0.35186304399321683, "learning_rate": 1.6915298521953966e-06, "loss": 0.09735794067382812, "step": 148880 }, { "epoch": 1.2873645709937658, "grad_norm": 7.9225133083214665, "learning_rate": 1.6913465216972548e-06, "loss": 0.1281341552734375, "step": 148885 }, { "epoch": 1.287407804515309, "grad_norm": 0.5260694932932635, "learning_rate": 1.6911631972345074e-06, "loss": 0.03857269287109375, "step": 148890 }, { "epoch": 1.2874510380368522, "grad_norm": 4.055568261486119, "learning_rate": 1.6909798788079988e-06, "loss": 0.17982635498046876, "step": 148895 }, { "epoch": 1.2874942715583955, "grad_norm": 26.378790811490166, "learning_rate": 1.6907965664185762e-06, "loss": 0.39556083679199217, "step": 148900 }, { "epoch": 1.2875375050799387, "grad_norm": 0.41425974548512184, "learning_rate": 1.6906132600670842e-06, "loss": 0.10522079467773438, "step": 148905 }, { "epoch": 1.287580738601482, "grad_norm": 12.365521179673058, "learning_rate": 1.690429959754367e-06, "loss": 0.023525238037109375, "step": 148910 }, { "epoch": 1.2876239721230254, "grad_norm": 1.9396278698863603, "learning_rate": 1.6902466654812722e-06, "loss": 0.061106109619140626, "step": 148915 }, { "epoch": 1.2876672056445686, "grad_norm": 6.858399770903085, "learning_rate": 1.6900633772486441e-06, "loss": 0.1201944351196289, "step": 148920 }, { "epoch": 1.2877104391661118, "grad_norm": 5.451833880794699, "learning_rate": 1.689880095057328e-06, "loss": 0.055539703369140624, "step": 148925 }, { "epoch": 1.287753672687655, "grad_norm": 2.066493005292545, "learning_rate": 1.6896968189081692e-06, "loss": 0.060491943359375, "step": 148930 }, { "epoch": 1.2877969062091983, "grad_norm": 3.160016155262825, "learning_rate": 1.6895135488020128e-06, "loss": 0.03230209350585937, "step": 148935 }, { "epoch": 1.2878401397307417, "grad_norm": 1.150014896760599, "learning_rate": 1.6893302847397042e-06, "loss": 0.23818473815917968, "step": 148940 }, { "epoch": 1.287883373252285, "grad_norm": 8.127562370935532, "learning_rate": 1.689147026722087e-06, "loss": 0.08072738647460938, "step": 148945 }, { "epoch": 1.2879266067738282, "grad_norm": 1.4371063024327664, "learning_rate": 1.6889637747500095e-06, "loss": 0.06957330703735351, "step": 148950 }, { "epoch": 1.2879698402953714, "grad_norm": 4.5182400493766846, "learning_rate": 1.6887805288243142e-06, "loss": 0.06964263916015626, "step": 148955 }, { "epoch": 1.2880130738169147, "grad_norm": 0.17933023101995282, "learning_rate": 1.6885972889458477e-06, "loss": 0.1098907470703125, "step": 148960 }, { "epoch": 1.2880563073384579, "grad_norm": 12.161014523876142, "learning_rate": 1.688414055115455e-06, "loss": 0.23359909057617187, "step": 148965 }, { "epoch": 1.2880995408600011, "grad_norm": 3.0075695650757583, "learning_rate": 1.6882308273339808e-06, "loss": 0.018201828002929688, "step": 148970 }, { "epoch": 1.2881427743815443, "grad_norm": 7.287233492593945, "learning_rate": 1.688047605602269e-06, "loss": 0.07267608642578124, "step": 148975 }, { "epoch": 1.2881860079030878, "grad_norm": 0.48747551598507993, "learning_rate": 1.687864389921167e-06, "loss": 0.09580802917480469, "step": 148980 }, { "epoch": 1.288229241424631, "grad_norm": 18.81486488510495, "learning_rate": 1.6876811802915183e-06, "loss": 0.06798133850097657, "step": 148985 }, { "epoch": 1.2882724749461743, "grad_norm": 26.712831702966334, "learning_rate": 1.6874979767141678e-06, "loss": 0.14722061157226562, "step": 148990 }, { "epoch": 1.2883157084677175, "grad_norm": 15.40205725281563, "learning_rate": 1.6873147791899612e-06, "loss": 0.1898487091064453, "step": 148995 }, { "epoch": 1.288358941989261, "grad_norm": 5.137351134098133, "learning_rate": 1.6871315877197413e-06, "loss": 0.0577117919921875, "step": 149000 }, { "epoch": 1.2884021755108042, "grad_norm": 38.513443013264244, "learning_rate": 1.686948402304356e-06, "loss": 0.2912921905517578, "step": 149005 }, { "epoch": 1.2884454090323474, "grad_norm": 3.0989464065863355, "learning_rate": 1.6867652229446469e-06, "loss": 0.08649749755859375, "step": 149010 }, { "epoch": 1.2884886425538906, "grad_norm": 0.32151203145870716, "learning_rate": 1.6865820496414622e-06, "loss": 0.036043548583984376, "step": 149015 }, { "epoch": 1.2885318760754338, "grad_norm": 1.746302866802686, "learning_rate": 1.686398882395645e-06, "loss": 0.095599365234375, "step": 149020 }, { "epoch": 1.288575109596977, "grad_norm": 0.2310670357295794, "learning_rate": 1.68621572120804e-06, "loss": 0.08261528015136718, "step": 149025 }, { "epoch": 1.2886183431185203, "grad_norm": 20.02679609927304, "learning_rate": 1.686032566079492e-06, "loss": 0.125579833984375, "step": 149030 }, { "epoch": 1.2886615766400635, "grad_norm": 1.3229144903792687, "learning_rate": 1.6858494170108456e-06, "loss": 0.04597339630126953, "step": 149035 }, { "epoch": 1.288704810161607, "grad_norm": 0.07511282635793481, "learning_rate": 1.6856662740029446e-06, "loss": 0.06351070404052735, "step": 149040 }, { "epoch": 1.2887480436831502, "grad_norm": 11.816118645823698, "learning_rate": 1.6854831370566355e-06, "loss": 0.1225748062133789, "step": 149045 }, { "epoch": 1.2887912772046934, "grad_norm": 4.1680815462620115, "learning_rate": 1.6853000061727623e-06, "loss": 0.028141021728515625, "step": 149050 }, { "epoch": 1.2888345107262367, "grad_norm": 26.94159792942794, "learning_rate": 1.685116881352169e-06, "loss": 0.11258697509765625, "step": 149055 }, { "epoch": 1.28887774424778, "grad_norm": 0.7123369579195277, "learning_rate": 1.6849337625956995e-06, "loss": 0.015264892578125, "step": 149060 }, { "epoch": 1.2889209777693234, "grad_norm": 0.6319704748867109, "learning_rate": 1.6847506499042004e-06, "loss": 0.0448699951171875, "step": 149065 }, { "epoch": 1.2889642112908666, "grad_norm": 1.692443947848028, "learning_rate": 1.684567543278514e-06, "loss": 0.07686843872070312, "step": 149070 }, { "epoch": 1.2890074448124098, "grad_norm": 0.5296788211407855, "learning_rate": 1.6843844427194872e-06, "loss": 0.20347061157226562, "step": 149075 }, { "epoch": 1.289050678333953, "grad_norm": 7.558415104278939, "learning_rate": 1.684201348227963e-06, "loss": 0.015038681030273438, "step": 149080 }, { "epoch": 1.2890939118554963, "grad_norm": 1.1174318156416347, "learning_rate": 1.6840182598047856e-06, "loss": 0.008841514587402344, "step": 149085 }, { "epoch": 1.2891371453770395, "grad_norm": 0.855047726270715, "learning_rate": 1.6838351774508001e-06, "loss": 0.0066890716552734375, "step": 149090 }, { "epoch": 1.2891803788985827, "grad_norm": 0.038594687372341875, "learning_rate": 1.6836521011668499e-06, "loss": 0.05783653259277344, "step": 149095 }, { "epoch": 1.289223612420126, "grad_norm": 19.954822868334894, "learning_rate": 1.683469030953779e-06, "loss": 0.07707977294921875, "step": 149100 }, { "epoch": 1.2892668459416694, "grad_norm": 0.41217042903135975, "learning_rate": 1.683285966812434e-06, "loss": 0.011861038208007813, "step": 149105 }, { "epoch": 1.2893100794632126, "grad_norm": 1.5632596748616636, "learning_rate": 1.6831029087436579e-06, "loss": 0.04456024169921875, "step": 149110 }, { "epoch": 1.2893533129847559, "grad_norm": 0.053087878163901, "learning_rate": 1.6829198567482941e-06, "loss": 0.14964370727539061, "step": 149115 }, { "epoch": 1.289396546506299, "grad_norm": 4.788879007445199, "learning_rate": 1.682736810827187e-06, "loss": 0.03454437255859375, "step": 149120 }, { "epoch": 1.2894397800278423, "grad_norm": 0.42493365224735, "learning_rate": 1.6825537709811822e-06, "loss": 0.008814048767089844, "step": 149125 }, { "epoch": 1.2894830135493858, "grad_norm": 3.234411603099506, "learning_rate": 1.6823707372111232e-06, "loss": 0.071624755859375, "step": 149130 }, { "epoch": 1.289526247070929, "grad_norm": 2.9506692889026587, "learning_rate": 1.6821877095178526e-06, "loss": 0.0615234375, "step": 149135 }, { "epoch": 1.2895694805924722, "grad_norm": 0.11147909327815146, "learning_rate": 1.6820046879022175e-06, "loss": 0.06747570037841796, "step": 149140 }, { "epoch": 1.2896127141140155, "grad_norm": 3.2941351466163917, "learning_rate": 1.68182167236506e-06, "loss": 0.0436065673828125, "step": 149145 }, { "epoch": 1.2896559476355587, "grad_norm": 15.99249686562134, "learning_rate": 1.6816386629072241e-06, "loss": 0.0902984619140625, "step": 149150 }, { "epoch": 1.289699181157102, "grad_norm": 1.4013424162797976, "learning_rate": 1.6814556595295547e-06, "loss": 0.0062847137451171875, "step": 149155 }, { "epoch": 1.2897424146786451, "grad_norm": 0.6581126924386282, "learning_rate": 1.681272662232895e-06, "loss": 0.08025588989257812, "step": 149160 }, { "epoch": 1.2897856482001884, "grad_norm": 1.969980366602878, "learning_rate": 1.6810896710180884e-06, "loss": 0.0852142333984375, "step": 149165 }, { "epoch": 1.2898288817217318, "grad_norm": 35.38330809635001, "learning_rate": 1.6809066858859802e-06, "loss": 0.06583099365234375, "step": 149170 }, { "epoch": 1.289872115243275, "grad_norm": 0.8608043445080301, "learning_rate": 1.6807237068374133e-06, "loss": 0.0698638916015625, "step": 149175 }, { "epoch": 1.2899153487648183, "grad_norm": 39.39879667465865, "learning_rate": 1.6805407338732328e-06, "loss": 0.10054931640625, "step": 149180 }, { "epoch": 1.2899585822863615, "grad_norm": 18.82159346544811, "learning_rate": 1.6803577669942821e-06, "loss": 0.09533615112304687, "step": 149185 }, { "epoch": 1.2900018158079047, "grad_norm": 48.321361553868435, "learning_rate": 1.6801748062014049e-06, "loss": 0.09961681365966797, "step": 149190 }, { "epoch": 1.2900450493294482, "grad_norm": 0.23399674712400154, "learning_rate": 1.6799918514954435e-06, "loss": 0.0237762451171875, "step": 149195 }, { "epoch": 1.2900882828509914, "grad_norm": 0.6024014658449621, "learning_rate": 1.6798089028772444e-06, "loss": 0.059283447265625, "step": 149200 }, { "epoch": 1.2901315163725346, "grad_norm": 1.5898188910277977, "learning_rate": 1.6796259603476498e-06, "loss": 0.12205467224121094, "step": 149205 }, { "epoch": 1.2901747498940779, "grad_norm": 0.11122125377521548, "learning_rate": 1.6794430239075035e-06, "loss": 0.01562652587890625, "step": 149210 }, { "epoch": 1.290217983415621, "grad_norm": 1.1771295564098287, "learning_rate": 1.6792600935576496e-06, "loss": 0.01090850830078125, "step": 149215 }, { "epoch": 1.2902612169371643, "grad_norm": 5.4698830928878746, "learning_rate": 1.6790771692989313e-06, "loss": 0.060589599609375, "step": 149220 }, { "epoch": 1.2903044504587076, "grad_norm": 1.9139662687673051, "learning_rate": 1.6788942511321913e-06, "loss": 0.02379302978515625, "step": 149225 }, { "epoch": 1.2903476839802508, "grad_norm": 0.08016846539671159, "learning_rate": 1.6787113390582753e-06, "loss": 0.012099266052246094, "step": 149230 }, { "epoch": 1.2903909175017942, "grad_norm": 10.410845054324877, "learning_rate": 1.6785284330780245e-06, "loss": 0.15208663940429687, "step": 149235 }, { "epoch": 1.2904341510233375, "grad_norm": 26.684159404997928, "learning_rate": 1.678345533192285e-06, "loss": 0.11149406433105469, "step": 149240 }, { "epoch": 1.2904773845448807, "grad_norm": 0.7635510220276105, "learning_rate": 1.6781626394018995e-06, "loss": 0.0230743408203125, "step": 149245 }, { "epoch": 1.290520618066424, "grad_norm": 0.8733850701477635, "learning_rate": 1.6779797517077108e-06, "loss": 0.067877197265625, "step": 149250 }, { "epoch": 1.2905638515879674, "grad_norm": 17.777371470436528, "learning_rate": 1.677796870110563e-06, "loss": 0.08293342590332031, "step": 149255 }, { "epoch": 1.2906070851095106, "grad_norm": 6.385022011802212, "learning_rate": 1.6776139946112975e-06, "loss": 0.17850723266601562, "step": 149260 }, { "epoch": 1.2906503186310538, "grad_norm": 1.2320071344427646, "learning_rate": 1.6774311252107612e-06, "loss": 0.060803794860839845, "step": 149265 }, { "epoch": 1.290693552152597, "grad_norm": 0.8301557947755543, "learning_rate": 1.6772482619097954e-06, "loss": 0.18771209716796874, "step": 149270 }, { "epoch": 1.2907367856741403, "grad_norm": 16.55311817700425, "learning_rate": 1.6770654047092438e-06, "loss": 0.11565933227539063, "step": 149275 }, { "epoch": 1.2907800191956835, "grad_norm": 0.38837997687928655, "learning_rate": 1.6768825536099492e-06, "loss": 0.037091064453125, "step": 149280 }, { "epoch": 1.2908232527172268, "grad_norm": 12.867579085039809, "learning_rate": 1.6766997086127548e-06, "loss": 0.03516387939453125, "step": 149285 }, { "epoch": 1.29086648623877, "grad_norm": 10.185394831766896, "learning_rate": 1.6765168697185053e-06, "loss": 0.036258697509765625, "step": 149290 }, { "epoch": 1.2909097197603134, "grad_norm": 24.2803220330225, "learning_rate": 1.676334036928042e-06, "loss": 0.04893207550048828, "step": 149295 }, { "epoch": 1.2909529532818567, "grad_norm": 2.2109394834361003, "learning_rate": 1.6761512102422101e-06, "loss": 0.0183868408203125, "step": 149300 }, { "epoch": 1.2909961868034, "grad_norm": 0.30591148750252045, "learning_rate": 1.6759683896618522e-06, "loss": 0.05784759521484375, "step": 149305 }, { "epoch": 1.2910394203249431, "grad_norm": 0.28182518473982443, "learning_rate": 1.6757855751878107e-06, "loss": 0.006931877136230469, "step": 149310 }, { "epoch": 1.2910826538464863, "grad_norm": 3.128270734480813, "learning_rate": 1.6756027668209295e-06, "loss": 0.016889190673828124, "step": 149315 }, { "epoch": 1.2911258873680298, "grad_norm": 0.4593874165737644, "learning_rate": 1.675419964562051e-06, "loss": 0.026993942260742188, "step": 149320 }, { "epoch": 1.291169120889573, "grad_norm": 28.0367218653634, "learning_rate": 1.6752371684120176e-06, "loss": 0.1890422821044922, "step": 149325 }, { "epoch": 1.2912123544111163, "grad_norm": 2.0486263112968843, "learning_rate": 1.6750543783716742e-06, "loss": 0.04325294494628906, "step": 149330 }, { "epoch": 1.2912555879326595, "grad_norm": 4.845027101899332, "learning_rate": 1.6748715944418633e-06, "loss": 0.060767173767089844, "step": 149335 }, { "epoch": 1.2912988214542027, "grad_norm": 6.538884576247855, "learning_rate": 1.6746888166234274e-06, "loss": 0.04884796142578125, "step": 149340 }, { "epoch": 1.291342054975746, "grad_norm": 3.9392256365718668, "learning_rate": 1.6745060449172083e-06, "loss": 0.061846923828125, "step": 149345 }, { "epoch": 1.2913852884972892, "grad_norm": 0.6844118736235244, "learning_rate": 1.6743232793240516e-06, "loss": 0.0100738525390625, "step": 149350 }, { "epoch": 1.2914285220188324, "grad_norm": 0.7195339202765625, "learning_rate": 1.6741405198447977e-06, "loss": 0.014114761352539062, "step": 149355 }, { "epoch": 1.2914717555403759, "grad_norm": 0.6585891606985621, "learning_rate": 1.6739577664802915e-06, "loss": 0.03842487335205078, "step": 149360 }, { "epoch": 1.291514989061919, "grad_norm": 22.463619335388373, "learning_rate": 1.6737750192313745e-06, "loss": 0.09408111572265625, "step": 149365 }, { "epoch": 1.2915582225834623, "grad_norm": 0.9290727248642365, "learning_rate": 1.6735922780988904e-06, "loss": 0.07076454162597656, "step": 149370 }, { "epoch": 1.2916014561050055, "grad_norm": 2.741815665030061, "learning_rate": 1.6734095430836815e-06, "loss": 0.02816009521484375, "step": 149375 }, { "epoch": 1.2916446896265488, "grad_norm": 4.608038277658052, "learning_rate": 1.6732268141865908e-06, "loss": 0.1220306396484375, "step": 149380 }, { "epoch": 1.2916879231480922, "grad_norm": 5.969143852509049, "learning_rate": 1.6730440914084603e-06, "loss": 0.03231620788574219, "step": 149385 }, { "epoch": 1.2917311566696354, "grad_norm": 15.064195129961126, "learning_rate": 1.6728613747501326e-06, "loss": 0.071929931640625, "step": 149390 }, { "epoch": 1.2917743901911787, "grad_norm": 0.24847042777567638, "learning_rate": 1.6726786642124517e-06, "loss": 0.057154273986816405, "step": 149395 }, { "epoch": 1.291817623712722, "grad_norm": 0.9143127459346339, "learning_rate": 1.6724959597962581e-06, "loss": 0.00301971435546875, "step": 149400 }, { "epoch": 1.2918608572342651, "grad_norm": 2.842613516941529, "learning_rate": 1.6723132615023973e-06, "loss": 0.10382843017578125, "step": 149405 }, { "epoch": 1.2919040907558084, "grad_norm": 1.5101187646948728, "learning_rate": 1.6721305693317105e-06, "loss": 0.03887519836425781, "step": 149410 }, { "epoch": 1.2919473242773516, "grad_norm": 17.102399974658088, "learning_rate": 1.6719478832850401e-06, "loss": 0.14943923950195312, "step": 149415 }, { "epoch": 1.2919905577988948, "grad_norm": 6.286324457589687, "learning_rate": 1.6717652033632277e-06, "loss": 0.05145530700683594, "step": 149420 }, { "epoch": 1.2920337913204383, "grad_norm": 4.122412558673932, "learning_rate": 1.671582529567118e-06, "loss": 0.03003387451171875, "step": 149425 }, { "epoch": 1.2920770248419815, "grad_norm": 1.4540530911011402, "learning_rate": 1.671399861897552e-06, "loss": 0.148388671875, "step": 149430 }, { "epoch": 1.2921202583635247, "grad_norm": 19.149148365806507, "learning_rate": 1.6712172003553726e-06, "loss": 0.065032958984375, "step": 149435 }, { "epoch": 1.292163491885068, "grad_norm": 6.2559072021696345, "learning_rate": 1.671034544941422e-06, "loss": 0.0178375244140625, "step": 149440 }, { "epoch": 1.2922067254066112, "grad_norm": 0.14933746155559868, "learning_rate": 1.670851895656543e-06, "loss": 0.10161895751953125, "step": 149445 }, { "epoch": 1.2922499589281546, "grad_norm": 0.9338864967055841, "learning_rate": 1.6706692525015759e-06, "loss": 0.020421218872070313, "step": 149450 }, { "epoch": 1.2922931924496979, "grad_norm": 1.4235345739044107, "learning_rate": 1.6704866154773664e-06, "loss": 0.0367645263671875, "step": 149455 }, { "epoch": 1.292336425971241, "grad_norm": 6.790570307306973, "learning_rate": 1.6703039845847534e-06, "loss": 0.0633392333984375, "step": 149460 }, { "epoch": 1.2923796594927843, "grad_norm": 29.3031803825965, "learning_rate": 1.6701213598245827e-06, "loss": 0.12097053527832032, "step": 149465 }, { "epoch": 1.2924228930143276, "grad_norm": 0.6678145970497474, "learning_rate": 1.6699387411976944e-06, "loss": 0.09593658447265625, "step": 149470 }, { "epoch": 1.2924661265358708, "grad_norm": 17.309337610395165, "learning_rate": 1.6697561287049313e-06, "loss": 0.10460052490234376, "step": 149475 }, { "epoch": 1.292509360057414, "grad_norm": 0.5039500611069561, "learning_rate": 1.6695735223471354e-06, "loss": 0.008837890625, "step": 149480 }, { "epoch": 1.2925525935789572, "grad_norm": 5.319327782668535, "learning_rate": 1.6693909221251476e-06, "loss": 0.026689910888671876, "step": 149485 }, { "epoch": 1.2925958271005007, "grad_norm": 29.754433106074075, "learning_rate": 1.6692083280398124e-06, "loss": 0.10396385192871094, "step": 149490 }, { "epoch": 1.292639060622044, "grad_norm": 1.4170647355496728, "learning_rate": 1.6690257400919709e-06, "loss": 0.07183837890625, "step": 149495 }, { "epoch": 1.2926822941435872, "grad_norm": 1.3903956007866694, "learning_rate": 1.6688431582824652e-06, "loss": 0.023160552978515624, "step": 149500 }, { "epoch": 1.2927255276651304, "grad_norm": 0.2141549275442664, "learning_rate": 1.6686605826121371e-06, "loss": 0.012697219848632812, "step": 149505 }, { "epoch": 1.2927687611866738, "grad_norm": 11.979899691635927, "learning_rate": 1.6684780130818274e-06, "loss": 0.03381423950195313, "step": 149510 }, { "epoch": 1.292811994708217, "grad_norm": 3.270428941201091, "learning_rate": 1.6682954496923807e-06, "loss": 0.05126800537109375, "step": 149515 }, { "epoch": 1.2928552282297603, "grad_norm": 1.5532739629755987, "learning_rate": 1.6681128924446367e-06, "loss": 0.3187896728515625, "step": 149520 }, { "epoch": 1.2928984617513035, "grad_norm": 0.2082230729258804, "learning_rate": 1.6679303413394398e-06, "loss": 0.0103729248046875, "step": 149525 }, { "epoch": 1.2929416952728467, "grad_norm": 5.996743093930385, "learning_rate": 1.66774779637763e-06, "loss": 0.13956680297851562, "step": 149530 }, { "epoch": 1.29298492879439, "grad_norm": 1.2242829024324875, "learning_rate": 1.6675652575600497e-06, "loss": 0.043338775634765625, "step": 149535 }, { "epoch": 1.2930281623159332, "grad_norm": 4.707559437480224, "learning_rate": 1.6673827248875407e-06, "loss": 0.0356597900390625, "step": 149540 }, { "epoch": 1.2930713958374764, "grad_norm": 1.8339654009939435, "learning_rate": 1.6672001983609436e-06, "loss": 0.018341827392578124, "step": 149545 }, { "epoch": 1.2931146293590199, "grad_norm": 7.1192142660293225, "learning_rate": 1.6670176779811025e-06, "loss": 0.07557830810546876, "step": 149550 }, { "epoch": 1.2931578628805631, "grad_norm": 1.6930576901350018, "learning_rate": 1.6668351637488583e-06, "loss": 0.029686737060546874, "step": 149555 }, { "epoch": 1.2932010964021063, "grad_norm": 0.39562304565792755, "learning_rate": 1.666652655665053e-06, "loss": 0.2356414794921875, "step": 149560 }, { "epoch": 1.2932443299236496, "grad_norm": 135.36309263162107, "learning_rate": 1.666470153730527e-06, "loss": 0.103076171875, "step": 149565 }, { "epoch": 1.2932875634451928, "grad_norm": 21.007640176802106, "learning_rate": 1.666287657946122e-06, "loss": 0.110009765625, "step": 149570 }, { "epoch": 1.2933307969667363, "grad_norm": 0.15748971136998988, "learning_rate": 1.6661051683126815e-06, "loss": 0.06694869995117188, "step": 149575 }, { "epoch": 1.2933740304882795, "grad_norm": 3.52044862929435, "learning_rate": 1.6659226848310453e-06, "loss": 0.0923370361328125, "step": 149580 }, { "epoch": 1.2934172640098227, "grad_norm": 2.466386097803964, "learning_rate": 1.665740207502057e-06, "loss": 0.011409759521484375, "step": 149585 }, { "epoch": 1.293460497531366, "grad_norm": 7.393113483673708, "learning_rate": 1.6655577363265566e-06, "loss": 0.051369857788085935, "step": 149590 }, { "epoch": 1.2935037310529092, "grad_norm": 0.37551179034678367, "learning_rate": 1.6653752713053858e-06, "loss": 0.08234491348266601, "step": 149595 }, { "epoch": 1.2935469645744524, "grad_norm": 0.5097879679399167, "learning_rate": 1.6651928124393868e-06, "loss": 0.083392333984375, "step": 149600 }, { "epoch": 1.2935901980959956, "grad_norm": 0.8186493804434605, "learning_rate": 1.6650103597294001e-06, "loss": 0.17596397399902344, "step": 149605 }, { "epoch": 1.2936334316175389, "grad_norm": 8.490237156419271, "learning_rate": 1.664827913176267e-06, "loss": 0.030388259887695314, "step": 149610 }, { "epoch": 1.2936766651390823, "grad_norm": 0.6171680173463325, "learning_rate": 1.6646454727808307e-06, "loss": 0.4086578369140625, "step": 149615 }, { "epoch": 1.2937198986606255, "grad_norm": 19.390406094084828, "learning_rate": 1.6644630385439314e-06, "loss": 0.2940803527832031, "step": 149620 }, { "epoch": 1.2937631321821688, "grad_norm": 5.002233669146096, "learning_rate": 1.6642806104664092e-06, "loss": 0.06542129516601562, "step": 149625 }, { "epoch": 1.293806365703712, "grad_norm": 9.50403607392394, "learning_rate": 1.664098188549108e-06, "loss": 0.0515869140625, "step": 149630 }, { "epoch": 1.2938495992252552, "grad_norm": 0.1949351204427896, "learning_rate": 1.6639157727928685e-06, "loss": 0.027924346923828124, "step": 149635 }, { "epoch": 1.2938928327467987, "grad_norm": 0.40210769646284017, "learning_rate": 1.6637333631985306e-06, "loss": 0.07112884521484375, "step": 149640 }, { "epoch": 1.293936066268342, "grad_norm": 2.6911865003493785, "learning_rate": 1.6635509597669355e-06, "loss": 0.10028152465820313, "step": 149645 }, { "epoch": 1.2939792997898851, "grad_norm": 0.3318156633396331, "learning_rate": 1.6633685624989263e-06, "loss": 0.042462158203125, "step": 149650 }, { "epoch": 1.2940225333114284, "grad_norm": 6.66258504743319, "learning_rate": 1.6631861713953432e-06, "loss": 0.09506988525390625, "step": 149655 }, { "epoch": 1.2940657668329716, "grad_norm": 8.094680127220789, "learning_rate": 1.663003786457028e-06, "loss": 0.16060028076171876, "step": 149660 }, { "epoch": 1.2941090003545148, "grad_norm": 0.7136104029655191, "learning_rate": 1.6628214076848207e-06, "loss": 0.18356781005859374, "step": 149665 }, { "epoch": 1.294152233876058, "grad_norm": 15.42956695872345, "learning_rate": 1.6626390350795629e-06, "loss": 0.1582042694091797, "step": 149670 }, { "epoch": 1.2941954673976013, "grad_norm": 6.287224150019356, "learning_rate": 1.6624566686420943e-06, "loss": 0.20195770263671875, "step": 149675 }, { "epoch": 1.2942387009191447, "grad_norm": 7.845594966351628, "learning_rate": 1.6622743083732588e-06, "loss": 0.02754058837890625, "step": 149680 }, { "epoch": 1.294281934440688, "grad_norm": 0.05602358130402427, "learning_rate": 1.6620919542738949e-06, "loss": 0.10691204071044921, "step": 149685 }, { "epoch": 1.2943251679622312, "grad_norm": 6.0638293538005925, "learning_rate": 1.6619096063448458e-06, "loss": 0.0880218505859375, "step": 149690 }, { "epoch": 1.2943684014837744, "grad_norm": 8.302631415818736, "learning_rate": 1.661727264586951e-06, "loss": 0.018494415283203124, "step": 149695 }, { "epoch": 1.2944116350053176, "grad_norm": 0.39918253980107843, "learning_rate": 1.6615449290010526e-06, "loss": 0.18299179077148436, "step": 149700 }, { "epoch": 1.294454868526861, "grad_norm": 1.4187292206902853, "learning_rate": 1.661362599587989e-06, "loss": 0.019884490966796876, "step": 149705 }, { "epoch": 1.2944981020484043, "grad_norm": 1.989282489994867, "learning_rate": 1.6611802763486035e-06, "loss": 0.06889495849609376, "step": 149710 }, { "epoch": 1.2945413355699475, "grad_norm": 1.077851972288405, "learning_rate": 1.660997959283737e-06, "loss": 0.04152679443359375, "step": 149715 }, { "epoch": 1.2945845690914908, "grad_norm": 0.43624308362332354, "learning_rate": 1.6608156483942293e-06, "loss": 0.010735416412353515, "step": 149720 }, { "epoch": 1.294627802613034, "grad_norm": 3.9326697181615753, "learning_rate": 1.6606333436809217e-06, "loss": 0.32304840087890624, "step": 149725 }, { "epoch": 1.2946710361345772, "grad_norm": 0.47032433265991647, "learning_rate": 1.6604510451446548e-06, "loss": 0.03543434143066406, "step": 149730 }, { "epoch": 1.2947142696561205, "grad_norm": 0.5970054683861883, "learning_rate": 1.6602687527862678e-06, "loss": 0.025723648071289063, "step": 149735 }, { "epoch": 1.294757503177664, "grad_norm": 1.6640038219210656, "learning_rate": 1.6600864666066044e-06, "loss": 0.14097862243652343, "step": 149740 }, { "epoch": 1.2948007366992071, "grad_norm": 6.358509909178701, "learning_rate": 1.6599041866065026e-06, "loss": 0.10410003662109375, "step": 149745 }, { "epoch": 1.2948439702207504, "grad_norm": 5.528549942926756, "learning_rate": 1.6597219127868056e-06, "loss": 0.10851287841796875, "step": 149750 }, { "epoch": 1.2948872037422936, "grad_norm": 28.65550774423757, "learning_rate": 1.659539645148352e-06, "loss": 0.08480339050292969, "step": 149755 }, { "epoch": 1.2949304372638368, "grad_norm": 3.8755347167920373, "learning_rate": 1.659357383691984e-06, "loss": 0.045415496826171874, "step": 149760 }, { "epoch": 1.2949736707853803, "grad_norm": 8.627149013193119, "learning_rate": 1.6591751284185407e-06, "loss": 0.04130859375, "step": 149765 }, { "epoch": 1.2950169043069235, "grad_norm": 0.721067360497266, "learning_rate": 1.6589928793288625e-06, "loss": 0.009613037109375, "step": 149770 }, { "epoch": 1.2950601378284667, "grad_norm": 2.3574664360823827, "learning_rate": 1.6588106364237917e-06, "loss": 0.04425334930419922, "step": 149775 }, { "epoch": 1.29510337135001, "grad_norm": 0.4167743425978865, "learning_rate": 1.6586283997041676e-06, "loss": 0.07025489807128907, "step": 149780 }, { "epoch": 1.2951466048715532, "grad_norm": 0.20282835866698698, "learning_rate": 1.6584461691708304e-06, "loss": 0.013607025146484375, "step": 149785 }, { "epoch": 1.2951898383930964, "grad_norm": 4.169976241677334, "learning_rate": 1.6582639448246212e-06, "loss": 0.07161636352539062, "step": 149790 }, { "epoch": 1.2952330719146397, "grad_norm": 6.929187622505194, "learning_rate": 1.6580817266663793e-06, "loss": 0.05369415283203125, "step": 149795 }, { "epoch": 1.2952763054361829, "grad_norm": 6.207952077685779, "learning_rate": 1.6578995146969455e-06, "loss": 0.1748495101928711, "step": 149800 }, { "epoch": 1.2953195389577263, "grad_norm": 0.6030617285694981, "learning_rate": 1.6577173089171623e-06, "loss": 0.04110221862792969, "step": 149805 }, { "epoch": 1.2953627724792696, "grad_norm": 24.549867796053057, "learning_rate": 1.6575351093278676e-06, "loss": 0.150341796875, "step": 149810 }, { "epoch": 1.2954060060008128, "grad_norm": 11.740376571427978, "learning_rate": 1.6573529159299028e-06, "loss": 0.15035476684570312, "step": 149815 }, { "epoch": 1.295449239522356, "grad_norm": 17.541965652263368, "learning_rate": 1.6571707287241076e-06, "loss": 0.048561477661132814, "step": 149820 }, { "epoch": 1.2954924730438993, "grad_norm": 3.3309700180757016, "learning_rate": 1.6569885477113224e-06, "loss": 0.05113983154296875, "step": 149825 }, { "epoch": 1.2955357065654427, "grad_norm": 0.5203280579048674, "learning_rate": 1.6568063728923873e-06, "loss": 0.025104522705078125, "step": 149830 }, { "epoch": 1.295578940086986, "grad_norm": 0.4247284599119918, "learning_rate": 1.6566242042681413e-06, "loss": 0.031960678100585935, "step": 149835 }, { "epoch": 1.2956221736085292, "grad_norm": 0.043738859348133693, "learning_rate": 1.6564420418394267e-06, "loss": 0.10932693481445313, "step": 149840 }, { "epoch": 1.2956654071300724, "grad_norm": 14.165372137009792, "learning_rate": 1.6562598856070831e-06, "loss": 0.15377655029296874, "step": 149845 }, { "epoch": 1.2957086406516156, "grad_norm": 1.0085194463059508, "learning_rate": 1.656077735571949e-06, "loss": 0.07945480346679687, "step": 149850 }, { "epoch": 1.2957518741731588, "grad_norm": 3.831757892219461, "learning_rate": 1.6558955917348666e-06, "loss": 0.01967315673828125, "step": 149855 }, { "epoch": 1.295795107694702, "grad_norm": 1.166626224315931, "learning_rate": 1.655713454096675e-06, "loss": 0.024536895751953124, "step": 149860 }, { "epoch": 1.2958383412162453, "grad_norm": 9.248403666612438, "learning_rate": 1.6555313226582128e-06, "loss": 0.06712570190429687, "step": 149865 }, { "epoch": 1.2958815747377888, "grad_norm": 0.1732967929354796, "learning_rate": 1.6553491974203226e-06, "loss": 0.0406524658203125, "step": 149870 }, { "epoch": 1.295924808259332, "grad_norm": 0.5038639095664912, "learning_rate": 1.655167078383843e-06, "loss": 0.07544212341308594, "step": 149875 }, { "epoch": 1.2959680417808752, "grad_norm": 9.137409882943743, "learning_rate": 1.6549849655496143e-06, "loss": 0.054032516479492185, "step": 149880 }, { "epoch": 1.2960112753024184, "grad_norm": 18.841993274490413, "learning_rate": 1.6548028589184762e-06, "loss": 0.20511627197265625, "step": 149885 }, { "epoch": 1.2960545088239617, "grad_norm": 1.984636592249191, "learning_rate": 1.6546207584912682e-06, "loss": 0.075970458984375, "step": 149890 }, { "epoch": 1.2960977423455051, "grad_norm": 7.399615215555474, "learning_rate": 1.6544386642688293e-06, "loss": 0.09380645751953125, "step": 149895 }, { "epoch": 1.2961409758670484, "grad_norm": 0.21904305361125043, "learning_rate": 1.6542565762520016e-06, "loss": 0.051878738403320315, "step": 149900 }, { "epoch": 1.2961842093885916, "grad_norm": 1.0876351112748175, "learning_rate": 1.6540744944416235e-06, "loss": 0.038383865356445314, "step": 149905 }, { "epoch": 1.2962274429101348, "grad_norm": 3.9758988191310918, "learning_rate": 1.6538924188385338e-06, "loss": 0.14956531524658204, "step": 149910 }, { "epoch": 1.296270676431678, "grad_norm": 1.7265548870272835, "learning_rate": 1.6537103494435745e-06, "loss": 0.042086029052734376, "step": 149915 }, { "epoch": 1.2963139099532213, "grad_norm": 25.01801734683934, "learning_rate": 1.653528286257584e-06, "loss": 0.06314620971679688, "step": 149920 }, { "epoch": 1.2963571434747645, "grad_norm": 22.513802782532007, "learning_rate": 1.6533462292814021e-06, "loss": 0.04583740234375, "step": 149925 }, { "epoch": 1.2964003769963077, "grad_norm": 17.560396959616778, "learning_rate": 1.6531641785158672e-06, "loss": 0.23916778564453126, "step": 149930 }, { "epoch": 1.2964436105178512, "grad_norm": 0.057736636981306884, "learning_rate": 1.6529821339618216e-06, "loss": 0.14104232788085938, "step": 149935 }, { "epoch": 1.2964868440393944, "grad_norm": 0.42244143804730977, "learning_rate": 1.652800095620103e-06, "loss": 0.029631805419921876, "step": 149940 }, { "epoch": 1.2965300775609376, "grad_norm": 5.279266321427542, "learning_rate": 1.6526180634915515e-06, "loss": 0.0516693115234375, "step": 149945 }, { "epoch": 1.2965733110824809, "grad_norm": 12.737586419918044, "learning_rate": 1.6524360375770065e-06, "loss": 0.08656959533691407, "step": 149950 }, { "epoch": 1.2966165446040243, "grad_norm": 80.86973510365658, "learning_rate": 1.6522540178773072e-06, "loss": 0.12755584716796875, "step": 149955 }, { "epoch": 1.2966597781255675, "grad_norm": 1.7729635968361852, "learning_rate": 1.6520720043932921e-06, "loss": 0.01256704330444336, "step": 149960 }, { "epoch": 1.2967030116471108, "grad_norm": 43.204258897017404, "learning_rate": 1.651889997125803e-06, "loss": 0.18450546264648438, "step": 149965 }, { "epoch": 1.296746245168654, "grad_norm": 40.256580478833605, "learning_rate": 1.6517079960756769e-06, "loss": 0.5141719818115235, "step": 149970 }, { "epoch": 1.2967894786901972, "grad_norm": 0.25965925354551317, "learning_rate": 1.6515260012437561e-06, "loss": 0.022234344482421876, "step": 149975 }, { "epoch": 1.2968327122117405, "grad_norm": 1.9084054335789944, "learning_rate": 1.651344012630878e-06, "loss": 0.3585704803466797, "step": 149980 }, { "epoch": 1.2968759457332837, "grad_norm": 0.17815602477248427, "learning_rate": 1.6511620302378817e-06, "loss": 0.0290802001953125, "step": 149985 }, { "epoch": 1.296919179254827, "grad_norm": 0.8272493241512529, "learning_rate": 1.6509800540656072e-06, "loss": 0.05960655212402344, "step": 149990 }, { "epoch": 1.2969624127763704, "grad_norm": 0.3220208017106812, "learning_rate": 1.6507980841148922e-06, "loss": 0.01963958740234375, "step": 149995 }, { "epoch": 1.2970056462979136, "grad_norm": 2.0379819004531337, "learning_rate": 1.6506161203865784e-06, "loss": 0.12570037841796874, "step": 150000 }, { "epoch": 1.2970488798194568, "grad_norm": 0.9896217604441461, "learning_rate": 1.6504341628815042e-06, "loss": 0.05236663818359375, "step": 150005 }, { "epoch": 1.297092113341, "grad_norm": 3.4771358685173785, "learning_rate": 1.650252211600508e-06, "loss": 0.10167427062988281, "step": 150010 }, { "epoch": 1.2971353468625433, "grad_norm": 12.361564857130553, "learning_rate": 1.6500702665444296e-06, "loss": 0.14815216064453124, "step": 150015 }, { "epoch": 1.2971785803840867, "grad_norm": 1.4515652046045744, "learning_rate": 1.6498883277141064e-06, "loss": 0.08560295104980468, "step": 150020 }, { "epoch": 1.29722181390563, "grad_norm": 1.716305089130653, "learning_rate": 1.649706395110379e-06, "loss": 0.03388137817382812, "step": 150025 }, { "epoch": 1.2972650474271732, "grad_norm": 2.3257573273061793, "learning_rate": 1.6495244687340877e-06, "loss": 0.04341468811035156, "step": 150030 }, { "epoch": 1.2973082809487164, "grad_norm": 6.875856623734084, "learning_rate": 1.64934254858607e-06, "loss": 0.039019012451171876, "step": 150035 }, { "epoch": 1.2973515144702596, "grad_norm": 0.8592584855954809, "learning_rate": 1.6491606346671655e-06, "loss": 0.011513900756835938, "step": 150040 }, { "epoch": 1.2973947479918029, "grad_norm": 1.095705442388164, "learning_rate": 1.6489787269782124e-06, "loss": 0.059848785400390625, "step": 150045 }, { "epoch": 1.297437981513346, "grad_norm": 4.766745166640615, "learning_rate": 1.64879682552005e-06, "loss": 0.04997520446777344, "step": 150050 }, { "epoch": 1.2974812150348893, "grad_norm": 0.34826033015610397, "learning_rate": 1.6486149302935165e-06, "loss": 0.15228118896484374, "step": 150055 }, { "epoch": 1.2975244485564328, "grad_norm": 24.673305357375625, "learning_rate": 1.6484330412994525e-06, "loss": 0.10914554595947265, "step": 150060 }, { "epoch": 1.297567682077976, "grad_norm": 34.123058091843966, "learning_rate": 1.6482511585386956e-06, "loss": 0.26895599365234374, "step": 150065 }, { "epoch": 1.2976109155995192, "grad_norm": 11.835688406963365, "learning_rate": 1.6480692820120854e-06, "loss": 0.11312484741210938, "step": 150070 }, { "epoch": 1.2976541491210625, "grad_norm": 2.499270214900433, "learning_rate": 1.647887411720459e-06, "loss": 0.029417800903320312, "step": 150075 }, { "epoch": 1.2976973826426057, "grad_norm": 0.1793613669575099, "learning_rate": 1.6477055476646575e-06, "loss": 0.060467529296875, "step": 150080 }, { "epoch": 1.2977406161641492, "grad_norm": 24.06028057417178, "learning_rate": 1.647523689845518e-06, "loss": 0.13733901977539062, "step": 150085 }, { "epoch": 1.2977838496856924, "grad_norm": 2.0259920367619144, "learning_rate": 1.6473418382638792e-06, "loss": 0.076129150390625, "step": 150090 }, { "epoch": 1.2978270832072356, "grad_norm": 0.5666037759117215, "learning_rate": 1.6471599929205813e-06, "loss": 0.047998809814453126, "step": 150095 }, { "epoch": 1.2978703167287788, "grad_norm": 14.049213484658676, "learning_rate": 1.6469781538164617e-06, "loss": 0.06700172424316406, "step": 150100 }, { "epoch": 1.297913550250322, "grad_norm": 75.19492685502577, "learning_rate": 1.6467963209523594e-06, "loss": 0.1078329086303711, "step": 150105 }, { "epoch": 1.2979567837718653, "grad_norm": 16.194706833257328, "learning_rate": 1.646614494329113e-06, "loss": 0.05261306762695313, "step": 150110 }, { "epoch": 1.2980000172934085, "grad_norm": 11.446803012488695, "learning_rate": 1.6464326739475612e-06, "loss": 0.023331451416015624, "step": 150115 }, { "epoch": 1.2980432508149518, "grad_norm": 0.14138607154384167, "learning_rate": 1.646250859808541e-06, "loss": 0.060909080505371097, "step": 150120 }, { "epoch": 1.2980864843364952, "grad_norm": 0.25642176722373594, "learning_rate": 1.6460690519128932e-06, "loss": 0.0860504150390625, "step": 150125 }, { "epoch": 1.2981297178580384, "grad_norm": 2.0230214326218605, "learning_rate": 1.6458872502614555e-06, "loss": 0.041520309448242185, "step": 150130 }, { "epoch": 1.2981729513795817, "grad_norm": 1.0007334885540577, "learning_rate": 1.645705454855065e-06, "loss": 0.021724700927734375, "step": 150135 }, { "epoch": 1.298216184901125, "grad_norm": 3.7385452419220746, "learning_rate": 1.645523665694562e-06, "loss": 0.023792266845703125, "step": 150140 }, { "epoch": 1.2982594184226681, "grad_norm": 13.619083491958131, "learning_rate": 1.6453418827807846e-06, "loss": 0.10596771240234375, "step": 150145 }, { "epoch": 1.2983026519442116, "grad_norm": 2.2623317633720545, "learning_rate": 1.6451601061145695e-06, "loss": 0.06705245971679688, "step": 150150 }, { "epoch": 1.2983458854657548, "grad_norm": 1.4807328239345683, "learning_rate": 1.6449783356967574e-06, "loss": 0.070416259765625, "step": 150155 }, { "epoch": 1.298389118987298, "grad_norm": 0.39428949897128623, "learning_rate": 1.6447965715281857e-06, "loss": 0.01303558349609375, "step": 150160 }, { "epoch": 1.2984323525088413, "grad_norm": 25.65204859819754, "learning_rate": 1.644614813609692e-06, "loss": 0.06508827209472656, "step": 150165 }, { "epoch": 1.2984755860303845, "grad_norm": 0.3928361618217788, "learning_rate": 1.6444330619421154e-06, "loss": 0.11479034423828124, "step": 150170 }, { "epoch": 1.2985188195519277, "grad_norm": 1.4927099676637077, "learning_rate": 1.6442513165262934e-06, "loss": 0.0539215087890625, "step": 150175 }, { "epoch": 1.298562053073471, "grad_norm": 1.281801661385891, "learning_rate": 1.644069577363065e-06, "loss": 0.019527435302734375, "step": 150180 }, { "epoch": 1.2986052865950142, "grad_norm": 0.322902562727894, "learning_rate": 1.6438878444532664e-06, "loss": 0.022736740112304688, "step": 150185 }, { "epoch": 1.2986485201165576, "grad_norm": 9.773243924947305, "learning_rate": 1.6437061177977387e-06, "loss": 0.08095464706420899, "step": 150190 }, { "epoch": 1.2986917536381009, "grad_norm": 13.587713395968114, "learning_rate": 1.6435243973973166e-06, "loss": 0.045556640625, "step": 150195 }, { "epoch": 1.298734987159644, "grad_norm": 1.3806000674763832, "learning_rate": 1.643342683252842e-06, "loss": 0.03976783752441406, "step": 150200 }, { "epoch": 1.2987782206811873, "grad_norm": 15.233824283719848, "learning_rate": 1.643160975365151e-06, "loss": 0.059820556640625, "step": 150205 }, { "epoch": 1.2988214542027308, "grad_norm": 49.50252711722484, "learning_rate": 1.6429792737350815e-06, "loss": 0.20825653076171874, "step": 150210 }, { "epoch": 1.298864687724274, "grad_norm": 1.7557949987454518, "learning_rate": 1.6427975783634706e-06, "loss": 0.05567474365234375, "step": 150215 }, { "epoch": 1.2989079212458172, "grad_norm": 1.5051414025203487, "learning_rate": 1.6426158892511585e-06, "loss": 0.02553138732910156, "step": 150220 }, { "epoch": 1.2989511547673604, "grad_norm": 6.388556105138272, "learning_rate": 1.642434206398982e-06, "loss": 0.0664215087890625, "step": 150225 }, { "epoch": 1.2989943882889037, "grad_norm": 0.3948553955694232, "learning_rate": 1.6422525298077793e-06, "loss": 0.15475082397460938, "step": 150230 }, { "epoch": 1.299037621810447, "grad_norm": 0.7681518325839704, "learning_rate": 1.6420708594783872e-06, "loss": 0.03897533416748047, "step": 150235 }, { "epoch": 1.2990808553319901, "grad_norm": 7.4467287935416016, "learning_rate": 1.641889195411645e-06, "loss": 0.059917449951171875, "step": 150240 }, { "epoch": 1.2991240888535334, "grad_norm": 2.8712324483553497, "learning_rate": 1.6417075376083886e-06, "loss": 0.012664794921875, "step": 150245 }, { "epoch": 1.2991673223750768, "grad_norm": 0.2890670667771702, "learning_rate": 1.6415258860694565e-06, "loss": 0.3130035400390625, "step": 150250 }, { "epoch": 1.29921055589662, "grad_norm": 0.07292695389933947, "learning_rate": 1.6413442407956886e-06, "loss": 0.05919036865234375, "step": 150255 }, { "epoch": 1.2992537894181633, "grad_norm": 7.293494043475628, "learning_rate": 1.6411626017879207e-06, "loss": 0.02710895538330078, "step": 150260 }, { "epoch": 1.2992970229397065, "grad_norm": 0.11483875942087612, "learning_rate": 1.640980969046991e-06, "loss": 0.0720733642578125, "step": 150265 }, { "epoch": 1.2993402564612497, "grad_norm": 20.07438094200877, "learning_rate": 1.640799342573737e-06, "loss": 0.06905364990234375, "step": 150270 }, { "epoch": 1.2993834899827932, "grad_norm": 2.7052646678541548, "learning_rate": 1.6406177223689963e-06, "loss": 0.19509925842285156, "step": 150275 }, { "epoch": 1.2994267235043364, "grad_norm": 1.5538591173616891, "learning_rate": 1.6404361084336054e-06, "loss": 0.188287353515625, "step": 150280 }, { "epoch": 1.2994699570258796, "grad_norm": 3.3445659321611245, "learning_rate": 1.6402545007684043e-06, "loss": 0.17649307250976562, "step": 150285 }, { "epoch": 1.2995131905474229, "grad_norm": 1.1566990175625267, "learning_rate": 1.6400728993742291e-06, "loss": 0.019870758056640625, "step": 150290 }, { "epoch": 1.299556424068966, "grad_norm": 0.6248681999110385, "learning_rate": 1.6398913042519177e-06, "loss": 0.025542831420898436, "step": 150295 }, { "epoch": 1.2995996575905093, "grad_norm": 4.005560906055271, "learning_rate": 1.639709715402306e-06, "loss": 0.09077911376953125, "step": 150300 }, { "epoch": 1.2996428911120526, "grad_norm": 1.1848659155271564, "learning_rate": 1.6395281328262342e-06, "loss": 0.03229866027832031, "step": 150305 }, { "epoch": 1.2996861246335958, "grad_norm": 53.66127685626285, "learning_rate": 1.6393465565245377e-06, "loss": 0.1707395553588867, "step": 150310 }, { "epoch": 1.2997293581551392, "grad_norm": 0.12068521555930363, "learning_rate": 1.639164986498055e-06, "loss": 0.0026889801025390624, "step": 150315 }, { "epoch": 1.2997725916766825, "grad_norm": 7.259828179893134, "learning_rate": 1.6389834227476236e-06, "loss": 0.014138412475585938, "step": 150320 }, { "epoch": 1.2998158251982257, "grad_norm": 23.173326245156304, "learning_rate": 1.6388018652740806e-06, "loss": 0.1111480712890625, "step": 150325 }, { "epoch": 1.299859058719769, "grad_norm": 12.531780907573213, "learning_rate": 1.6386203140782625e-06, "loss": 0.22547683715820313, "step": 150330 }, { "epoch": 1.2999022922413122, "grad_norm": 3.344726613104997, "learning_rate": 1.6384387691610076e-06, "loss": 0.029131317138671876, "step": 150335 }, { "epoch": 1.2999455257628556, "grad_norm": 0.297449140462355, "learning_rate": 1.6382572305231527e-06, "loss": 0.024010467529296874, "step": 150340 }, { "epoch": 1.2999887592843988, "grad_norm": 0.20920643649193266, "learning_rate": 1.6380756981655341e-06, "loss": 0.029120254516601562, "step": 150345 }, { "epoch": 1.300031992805942, "grad_norm": 1.4182827029121992, "learning_rate": 1.637894172088991e-06, "loss": 0.06283798217773437, "step": 150350 }, { "epoch": 1.3000752263274853, "grad_norm": 7.688622539910766, "learning_rate": 1.63771265229436e-06, "loss": 0.039075469970703124, "step": 150355 }, { "epoch": 1.3001184598490285, "grad_norm": 1.5878120897314678, "learning_rate": 1.6375311387824764e-06, "loss": 0.03761100769042969, "step": 150360 }, { "epoch": 1.3001616933705717, "grad_norm": 2.792528004216878, "learning_rate": 1.6373496315541801e-06, "loss": 0.06974372863769532, "step": 150365 }, { "epoch": 1.300204926892115, "grad_norm": 0.3973245517004023, "learning_rate": 1.637168130610307e-06, "loss": 0.12474136352539063, "step": 150370 }, { "epoch": 1.3002481604136582, "grad_norm": 0.15688263466783778, "learning_rate": 1.6369866359516922e-06, "loss": 0.126312255859375, "step": 150375 }, { "epoch": 1.3002913939352017, "grad_norm": 0.647340570849146, "learning_rate": 1.6368051475791766e-06, "loss": 0.12424087524414062, "step": 150380 }, { "epoch": 1.3003346274567449, "grad_norm": 0.0963489797372578, "learning_rate": 1.636623665493595e-06, "loss": 0.04253578186035156, "step": 150385 }, { "epoch": 1.3003778609782881, "grad_norm": 1.8893147176834164, "learning_rate": 1.6364421896957844e-06, "loss": 0.02974700927734375, "step": 150390 }, { "epoch": 1.3004210944998313, "grad_norm": 7.545325614432428, "learning_rate": 1.6362607201865818e-06, "loss": 0.06228179931640625, "step": 150395 }, { "epoch": 1.3004643280213746, "grad_norm": 0.8342748956063645, "learning_rate": 1.6360792569668246e-06, "loss": 0.05385894775390625, "step": 150400 }, { "epoch": 1.300507561542918, "grad_norm": 0.05939799369618199, "learning_rate": 1.635897800037348e-06, "loss": 0.048620986938476565, "step": 150405 }, { "epoch": 1.3005507950644613, "grad_norm": 1.7250950494716308, "learning_rate": 1.6357163493989917e-06, "loss": 0.056201171875, "step": 150410 }, { "epoch": 1.3005940285860045, "grad_norm": 10.528871163296541, "learning_rate": 1.6355349050525907e-06, "loss": 0.06981048583984376, "step": 150415 }, { "epoch": 1.3006372621075477, "grad_norm": 31.415601319121876, "learning_rate": 1.6353534669989807e-06, "loss": 0.16994056701660157, "step": 150420 }, { "epoch": 1.300680495629091, "grad_norm": 10.232846018878071, "learning_rate": 1.6351720352390017e-06, "loss": 0.040390396118164064, "step": 150425 }, { "epoch": 1.3007237291506342, "grad_norm": 1.3378430534708439, "learning_rate": 1.6349906097734882e-06, "loss": 0.09294967651367188, "step": 150430 }, { "epoch": 1.3007669626721774, "grad_norm": 15.32429999542437, "learning_rate": 1.6348091906032777e-06, "loss": 0.034853363037109376, "step": 150435 }, { "epoch": 1.3008101961937206, "grad_norm": 1.3659954349156482, "learning_rate": 1.6346277777292054e-06, "loss": 0.04503021240234375, "step": 150440 }, { "epoch": 1.300853429715264, "grad_norm": 1.2239138025031369, "learning_rate": 1.6344463711521101e-06, "loss": 0.017079544067382813, "step": 150445 }, { "epoch": 1.3008966632368073, "grad_norm": 17.70894590986584, "learning_rate": 1.6342649708728276e-06, "loss": 0.09713954925537109, "step": 150450 }, { "epoch": 1.3009398967583505, "grad_norm": 35.25389375142566, "learning_rate": 1.6340835768921945e-06, "loss": 0.0827357292175293, "step": 150455 }, { "epoch": 1.3009831302798938, "grad_norm": 8.426857826040358, "learning_rate": 1.633902189211047e-06, "loss": 0.056927490234375, "step": 150460 }, { "epoch": 1.3010263638014372, "grad_norm": 3.084029582156655, "learning_rate": 1.6337208078302222e-06, "loss": 0.018982505798339842, "step": 150465 }, { "epoch": 1.3010695973229804, "grad_norm": 22.6096223504365, "learning_rate": 1.6335394327505548e-06, "loss": 0.055276679992675784, "step": 150470 }, { "epoch": 1.3011128308445237, "grad_norm": 3.0607390967014423, "learning_rate": 1.6333580639728831e-06, "loss": 0.05587615966796875, "step": 150475 }, { "epoch": 1.301156064366067, "grad_norm": 0.9522518080961293, "learning_rate": 1.6331767014980443e-06, "loss": 0.022728919982910156, "step": 150480 }, { "epoch": 1.3011992978876101, "grad_norm": 62.95896265022272, "learning_rate": 1.6329953453268739e-06, "loss": 0.41491851806640623, "step": 150485 }, { "epoch": 1.3012425314091534, "grad_norm": 2.32921885754602, "learning_rate": 1.6328139954602081e-06, "loss": 0.05126876831054687, "step": 150490 }, { "epoch": 1.3012857649306966, "grad_norm": 0.9184876351116049, "learning_rate": 1.632632651898883e-06, "loss": 0.00803070068359375, "step": 150495 }, { "epoch": 1.3013289984522398, "grad_norm": 0.17019019748284386, "learning_rate": 1.6324513146437343e-06, "loss": 0.03152179718017578, "step": 150500 }, { "epoch": 1.3013722319737833, "grad_norm": 2.413402235956546, "learning_rate": 1.6322699836956007e-06, "loss": 0.08706512451171874, "step": 150505 }, { "epoch": 1.3014154654953265, "grad_norm": 0.39962414858995504, "learning_rate": 1.632088659055317e-06, "loss": 0.039963150024414064, "step": 150510 }, { "epoch": 1.3014586990168697, "grad_norm": 5.31525771909046, "learning_rate": 1.6319073407237197e-06, "loss": 0.04658203125, "step": 150515 }, { "epoch": 1.301501932538413, "grad_norm": 8.032392731343718, "learning_rate": 1.6317260287016446e-06, "loss": 0.03959236145019531, "step": 150520 }, { "epoch": 1.3015451660599562, "grad_norm": 4.569952838790559, "learning_rate": 1.631544722989927e-06, "loss": 0.05558929443359375, "step": 150525 }, { "epoch": 1.3015883995814996, "grad_norm": 23.048124438762326, "learning_rate": 1.6313634235894053e-06, "loss": 0.17575836181640625, "step": 150530 }, { "epoch": 1.3016316331030429, "grad_norm": 11.520262918911836, "learning_rate": 1.6311821305009138e-06, "loss": 0.15151481628417968, "step": 150535 }, { "epoch": 1.301674866624586, "grad_norm": 0.05430976739733612, "learning_rate": 1.6310008437252899e-06, "loss": 0.18578453063964845, "step": 150540 }, { "epoch": 1.3017181001461293, "grad_norm": 8.82030083558362, "learning_rate": 1.6308195632633695e-06, "loss": 0.0728515625, "step": 150545 }, { "epoch": 1.3017613336676725, "grad_norm": 7.867675622962194, "learning_rate": 1.6306382891159884e-06, "loss": 0.06912384033203126, "step": 150550 }, { "epoch": 1.3018045671892158, "grad_norm": 0.34913295510071685, "learning_rate": 1.6304570212839821e-06, "loss": 0.13809890747070314, "step": 150555 }, { "epoch": 1.301847800710759, "grad_norm": 7.410377861357139, "learning_rate": 1.6302757597681874e-06, "loss": 0.0457916259765625, "step": 150560 }, { "epoch": 1.3018910342323022, "grad_norm": 6.781275616047808, "learning_rate": 1.6300945045694387e-06, "loss": 0.016781425476074217, "step": 150565 }, { "epoch": 1.3019342677538457, "grad_norm": 0.670460930853652, "learning_rate": 1.6299132556885742e-06, "loss": 0.011634063720703126, "step": 150570 }, { "epoch": 1.301977501275389, "grad_norm": 0.4817449176035399, "learning_rate": 1.6297320131264288e-06, "loss": 0.03291816711425781, "step": 150575 }, { "epoch": 1.3020207347969321, "grad_norm": 0.36826592201159686, "learning_rate": 1.629550776883838e-06, "loss": 0.03995361328125, "step": 150580 }, { "epoch": 1.3020639683184754, "grad_norm": 1.776341806426739, "learning_rate": 1.6293695469616368e-06, "loss": 0.09564170837402344, "step": 150585 }, { "epoch": 1.3021072018400186, "grad_norm": 5.09803569187428, "learning_rate": 1.6291883233606633e-06, "loss": 0.06446914672851563, "step": 150590 }, { "epoch": 1.302150435361562, "grad_norm": 1.3675177382448918, "learning_rate": 1.6290071060817522e-06, "loss": 0.07957916259765625, "step": 150595 }, { "epoch": 1.3021936688831053, "grad_norm": 123.82108937151284, "learning_rate": 1.6288258951257382e-06, "loss": 0.07635688781738281, "step": 150600 }, { "epoch": 1.3022369024046485, "grad_norm": 0.4249546593610999, "learning_rate": 1.628644690493459e-06, "loss": 0.09461212158203125, "step": 150605 }, { "epoch": 1.3022801359261917, "grad_norm": 46.99365807978653, "learning_rate": 1.6284634921857495e-06, "loss": 0.12617111206054688, "step": 150610 }, { "epoch": 1.302323369447735, "grad_norm": 0.6130443553189442, "learning_rate": 1.6282823002034447e-06, "loss": 0.0677490234375, "step": 150615 }, { "epoch": 1.3023666029692782, "grad_norm": 17.877147846790113, "learning_rate": 1.628101114547381e-06, "loss": 0.07926654815673828, "step": 150620 }, { "epoch": 1.3024098364908214, "grad_norm": 4.487994169903329, "learning_rate": 1.627919935218394e-06, "loss": 0.02909860610961914, "step": 150625 }, { "epoch": 1.3024530700123647, "grad_norm": 0.3187613453529165, "learning_rate": 1.6277387622173176e-06, "loss": 0.02623882293701172, "step": 150630 }, { "epoch": 1.302496303533908, "grad_norm": 0.16301968835968325, "learning_rate": 1.62755759554499e-06, "loss": 0.059336090087890626, "step": 150635 }, { "epoch": 1.3025395370554513, "grad_norm": 6.284792348866784, "learning_rate": 1.6273764352022455e-06, "loss": 0.04613800048828125, "step": 150640 }, { "epoch": 1.3025827705769946, "grad_norm": 2.2525780627372787, "learning_rate": 1.627195281189918e-06, "loss": 0.011185264587402344, "step": 150645 }, { "epoch": 1.3026260040985378, "grad_norm": 1.4983505192388598, "learning_rate": 1.6270141335088463e-06, "loss": 0.024909210205078126, "step": 150650 }, { "epoch": 1.3026692376200812, "grad_norm": 2.7338825171699934, "learning_rate": 1.6268329921598642e-06, "loss": 0.0373992919921875, "step": 150655 }, { "epoch": 1.3027124711416245, "grad_norm": 0.7322553606426144, "learning_rate": 1.6266518571438052e-06, "loss": 0.066815185546875, "step": 150660 }, { "epoch": 1.3027557046631677, "grad_norm": 0.28701769414341805, "learning_rate": 1.6264707284615084e-06, "loss": 0.014434432983398438, "step": 150665 }, { "epoch": 1.302798938184711, "grad_norm": 8.261336241262201, "learning_rate": 1.6262896061138065e-06, "loss": 0.051678466796875, "step": 150670 }, { "epoch": 1.3028421717062542, "grad_norm": 0.40606362749620944, "learning_rate": 1.626108490101536e-06, "loss": 0.02104949951171875, "step": 150675 }, { "epoch": 1.3028854052277974, "grad_norm": 8.581303669491536, "learning_rate": 1.6259273804255318e-06, "loss": 0.07374801635742187, "step": 150680 }, { "epoch": 1.3029286387493406, "grad_norm": 0.8778849866506074, "learning_rate": 1.625746277086629e-06, "loss": 0.2746837615966797, "step": 150685 }, { "epoch": 1.3029718722708838, "grad_norm": 3.2072879378576618, "learning_rate": 1.6255651800856626e-06, "loss": 0.152130126953125, "step": 150690 }, { "epoch": 1.3030151057924273, "grad_norm": 1.7284216796922325, "learning_rate": 1.6253840894234673e-06, "loss": 0.02232227325439453, "step": 150695 }, { "epoch": 1.3030583393139705, "grad_norm": 24.158241935836603, "learning_rate": 1.625203005100879e-06, "loss": 0.1239349365234375, "step": 150700 }, { "epoch": 1.3031015728355138, "grad_norm": 0.17965444074413878, "learning_rate": 1.6250219271187343e-06, "loss": 0.07420234680175782, "step": 150705 }, { "epoch": 1.303144806357057, "grad_norm": 9.565513456995731, "learning_rate": 1.6248408554778667e-06, "loss": 0.026857757568359376, "step": 150710 }, { "epoch": 1.3031880398786002, "grad_norm": 1.3141267328955697, "learning_rate": 1.624659790179112e-06, "loss": 0.0090057373046875, "step": 150715 }, { "epoch": 1.3032312734001437, "grad_norm": 10.8183655859375, "learning_rate": 1.6244787312233045e-06, "loss": 0.15337181091308594, "step": 150720 }, { "epoch": 1.303274506921687, "grad_norm": 3.5603812055917934, "learning_rate": 1.6242976786112784e-06, "loss": 0.014378547668457031, "step": 150725 }, { "epoch": 1.3033177404432301, "grad_norm": 0.2994591109422034, "learning_rate": 1.6241166323438713e-06, "loss": 0.3855918884277344, "step": 150730 }, { "epoch": 1.3033609739647734, "grad_norm": 6.240544165844606, "learning_rate": 1.6239355924219162e-06, "loss": 0.055578994750976565, "step": 150735 }, { "epoch": 1.3034042074863166, "grad_norm": 4.250007411472807, "learning_rate": 1.6237545588462485e-06, "loss": 0.023237228393554688, "step": 150740 }, { "epoch": 1.3034474410078598, "grad_norm": 1.386733235542441, "learning_rate": 1.6235735316177039e-06, "loss": 0.1215606689453125, "step": 150745 }, { "epoch": 1.303490674529403, "grad_norm": 0.7563624046506019, "learning_rate": 1.6233925107371149e-06, "loss": 0.11542816162109375, "step": 150750 }, { "epoch": 1.3035339080509463, "grad_norm": 6.481228214027696, "learning_rate": 1.6232114962053193e-06, "loss": 0.1228729248046875, "step": 150755 }, { "epoch": 1.3035771415724897, "grad_norm": 35.8702070141912, "learning_rate": 1.6230304880231492e-06, "loss": 0.08644294738769531, "step": 150760 }, { "epoch": 1.303620375094033, "grad_norm": 16.867085418124265, "learning_rate": 1.6228494861914425e-06, "loss": 0.0425994873046875, "step": 150765 }, { "epoch": 1.3036636086155762, "grad_norm": 5.047403340318243, "learning_rate": 1.6226684907110318e-06, "loss": 0.018694305419921876, "step": 150770 }, { "epoch": 1.3037068421371194, "grad_norm": 4.931214903632101, "learning_rate": 1.6224875015827527e-06, "loss": 0.022095680236816406, "step": 150775 }, { "epoch": 1.3037500756586626, "grad_norm": 25.028492049720814, "learning_rate": 1.622306518807439e-06, "loss": 0.14488134384155274, "step": 150780 }, { "epoch": 1.303793309180206, "grad_norm": 2.7118310464831596, "learning_rate": 1.6221255423859262e-06, "loss": 0.023859405517578126, "step": 150785 }, { "epoch": 1.3038365427017493, "grad_norm": 0.6001251918459378, "learning_rate": 1.6219445723190477e-06, "loss": 0.04475860595703125, "step": 150790 }, { "epoch": 1.3038797762232925, "grad_norm": 1.1662030741093181, "learning_rate": 1.6217636086076396e-06, "loss": 0.018450164794921876, "step": 150795 }, { "epoch": 1.3039230097448358, "grad_norm": 12.826689523528643, "learning_rate": 1.6215826512525367e-06, "loss": 0.061551666259765624, "step": 150800 }, { "epoch": 1.303966243266379, "grad_norm": 0.4154522703601008, "learning_rate": 1.6214017002545724e-06, "loss": 0.04074249267578125, "step": 150805 }, { "epoch": 1.3040094767879222, "grad_norm": 5.228928312756887, "learning_rate": 1.6212207556145807e-06, "loss": 0.0420501708984375, "step": 150810 }, { "epoch": 1.3040527103094655, "grad_norm": 0.3737618289446209, "learning_rate": 1.621039817333398e-06, "loss": 0.06310806274414063, "step": 150815 }, { "epoch": 1.3040959438310087, "grad_norm": 2.0385456504870154, "learning_rate": 1.6208588854118567e-06, "loss": 0.1012786865234375, "step": 150820 }, { "epoch": 1.3041391773525521, "grad_norm": 37.28878909810849, "learning_rate": 1.6206779598507938e-06, "loss": 0.2127532958984375, "step": 150825 }, { "epoch": 1.3041824108740954, "grad_norm": 1.8480487619451151, "learning_rate": 1.6204970406510419e-06, "loss": 0.06512451171875, "step": 150830 }, { "epoch": 1.3042256443956386, "grad_norm": 2.11293702836034, "learning_rate": 1.620316127813436e-06, "loss": 0.10369110107421875, "step": 150835 }, { "epoch": 1.3042688779171818, "grad_norm": 23.60148377451537, "learning_rate": 1.62013522133881e-06, "loss": 0.07659454345703125, "step": 150840 }, { "epoch": 1.304312111438725, "grad_norm": 20.099524886814404, "learning_rate": 1.6199543212279986e-06, "loss": 0.09384899139404297, "step": 150845 }, { "epoch": 1.3043553449602685, "grad_norm": 0.7153358684368795, "learning_rate": 1.6197734274818347e-06, "loss": 0.22989959716796876, "step": 150850 }, { "epoch": 1.3043985784818117, "grad_norm": 41.30754472788755, "learning_rate": 1.619592540101155e-06, "loss": 0.16332778930664063, "step": 150855 }, { "epoch": 1.304441812003355, "grad_norm": 0.7273264669109547, "learning_rate": 1.6194116590867922e-06, "loss": 0.007501983642578125, "step": 150860 }, { "epoch": 1.3044850455248982, "grad_norm": 1.00368446885181, "learning_rate": 1.619230784439581e-06, "loss": 0.025785446166992188, "step": 150865 }, { "epoch": 1.3045282790464414, "grad_norm": 6.483731366296159, "learning_rate": 1.6190499161603547e-06, "loss": 0.047140884399414065, "step": 150870 }, { "epoch": 1.3045715125679846, "grad_norm": 0.19549469765757904, "learning_rate": 1.6188690542499489e-06, "loss": 0.1739673614501953, "step": 150875 }, { "epoch": 1.3046147460895279, "grad_norm": 1.910773717628271, "learning_rate": 1.618688198709197e-06, "loss": 0.016082763671875, "step": 150880 }, { "epoch": 1.304657979611071, "grad_norm": 0.32224110055592126, "learning_rate": 1.618507349538932e-06, "loss": 0.10434417724609375, "step": 150885 }, { "epoch": 1.3047012131326146, "grad_norm": 1.5695439516870384, "learning_rate": 1.6183265067399903e-06, "loss": 0.16285247802734376, "step": 150890 }, { "epoch": 1.3047444466541578, "grad_norm": 2.119622451620612, "learning_rate": 1.6181456703132043e-06, "loss": 0.0587921142578125, "step": 150895 }, { "epoch": 1.304787680175701, "grad_norm": 1.4504936944466043, "learning_rate": 1.6179648402594087e-06, "loss": 0.026702880859375, "step": 150900 }, { "epoch": 1.3048309136972442, "grad_norm": 46.40169490661402, "learning_rate": 1.6177840165794373e-06, "loss": 0.23432235717773436, "step": 150905 }, { "epoch": 1.3048741472187877, "grad_norm": 7.463841893490926, "learning_rate": 1.6176031992741234e-06, "loss": 0.0777679443359375, "step": 150910 }, { "epoch": 1.304917380740331, "grad_norm": 3.243945676448923, "learning_rate": 1.6174223883443005e-06, "loss": 0.0879547119140625, "step": 150915 }, { "epoch": 1.3049606142618742, "grad_norm": 2.989737412430117, "learning_rate": 1.6172415837908045e-06, "loss": 0.090521240234375, "step": 150920 }, { "epoch": 1.3050038477834174, "grad_norm": 2.593966282306085, "learning_rate": 1.617060785614467e-06, "loss": 0.056917762756347655, "step": 150925 }, { "epoch": 1.3050470813049606, "grad_norm": 1.3875707159168238, "learning_rate": 1.6168799938161244e-06, "loss": 0.01155853271484375, "step": 150930 }, { "epoch": 1.3050903148265038, "grad_norm": 3.120862623067854, "learning_rate": 1.6166992083966084e-06, "loss": 0.0618682861328125, "step": 150935 }, { "epoch": 1.305133548348047, "grad_norm": 0.7584184716339644, "learning_rate": 1.616518429356754e-06, "loss": 0.04011688232421875, "step": 150940 }, { "epoch": 1.3051767818695903, "grad_norm": 68.03172031010482, "learning_rate": 1.6163376566973939e-06, "loss": 0.0503570556640625, "step": 150945 }, { "epoch": 1.3052200153911337, "grad_norm": 59.216863669490095, "learning_rate": 1.6161568904193613e-06, "loss": 0.15907859802246094, "step": 150950 }, { "epoch": 1.305263248912677, "grad_norm": 1.5489600396635652, "learning_rate": 1.6159761305234919e-06, "loss": 0.0734375, "step": 150955 }, { "epoch": 1.3053064824342202, "grad_norm": 14.56165251640919, "learning_rate": 1.6157953770106187e-06, "loss": 0.11630172729492187, "step": 150960 }, { "epoch": 1.3053497159557634, "grad_norm": 0.27152722057506246, "learning_rate": 1.6156146298815746e-06, "loss": 0.19750938415527344, "step": 150965 }, { "epoch": 1.3053929494773067, "grad_norm": 2.090402454271415, "learning_rate": 1.6154338891371935e-06, "loss": 0.0678802490234375, "step": 150970 }, { "epoch": 1.3054361829988501, "grad_norm": 0.2778362018626032, "learning_rate": 1.6152531547783076e-06, "loss": 0.09564056396484374, "step": 150975 }, { "epoch": 1.3054794165203933, "grad_norm": 13.323463722347498, "learning_rate": 1.6150724268057536e-06, "loss": 0.05477752685546875, "step": 150980 }, { "epoch": 1.3055226500419366, "grad_norm": 9.197302305819301, "learning_rate": 1.6148917052203616e-06, "loss": 0.03632850646972656, "step": 150985 }, { "epoch": 1.3055658835634798, "grad_norm": 1.0488164780607911, "learning_rate": 1.6147109900229678e-06, "loss": 0.0600071907043457, "step": 150990 }, { "epoch": 1.305609117085023, "grad_norm": 3.57653941853616, "learning_rate": 1.6145302812144044e-06, "loss": 0.012046623229980468, "step": 150995 }, { "epoch": 1.3056523506065663, "grad_norm": 0.03438108668089431, "learning_rate": 1.6143495787955052e-06, "loss": 0.051924514770507815, "step": 151000 }, { "epoch": 1.3056955841281095, "grad_norm": 10.334760868420272, "learning_rate": 1.614168882767103e-06, "loss": 0.055769729614257815, "step": 151005 }, { "epoch": 1.3057388176496527, "grad_norm": 3.9028078869671288, "learning_rate": 1.6139881931300303e-06, "loss": 0.045926666259765624, "step": 151010 }, { "epoch": 1.3057820511711962, "grad_norm": 6.7053375266139685, "learning_rate": 1.6138075098851233e-06, "loss": 0.0547393798828125, "step": 151015 }, { "epoch": 1.3058252846927394, "grad_norm": 28.57265083772587, "learning_rate": 1.613626833033213e-06, "loss": 0.27319183349609377, "step": 151020 }, { "epoch": 1.3058685182142826, "grad_norm": 0.8988451038796128, "learning_rate": 1.6134461625751333e-06, "loss": 0.016594696044921874, "step": 151025 }, { "epoch": 1.3059117517358259, "grad_norm": 4.37243979247223, "learning_rate": 1.6132654985117179e-06, "loss": 0.0600006103515625, "step": 151030 }, { "epoch": 1.305954985257369, "grad_norm": 15.612786828765914, "learning_rate": 1.6130848408437978e-06, "loss": 0.05211181640625, "step": 151035 }, { "epoch": 1.3059982187789125, "grad_norm": 0.18070796555078986, "learning_rate": 1.6129041895722094e-06, "loss": 0.11561870574951172, "step": 151040 }, { "epoch": 1.3060414523004558, "grad_norm": 0.9818841452689661, "learning_rate": 1.6127235446977827e-06, "loss": 0.014447402954101563, "step": 151045 }, { "epoch": 1.306084685821999, "grad_norm": 4.614110112477666, "learning_rate": 1.6125429062213539e-06, "loss": 0.12794952392578124, "step": 151050 }, { "epoch": 1.3061279193435422, "grad_norm": 2.773654751417588, "learning_rate": 1.6123622741437548e-06, "loss": 0.05580062866210937, "step": 151055 }, { "epoch": 1.3061711528650854, "grad_norm": 1.5413771941919372, "learning_rate": 1.6121816484658176e-06, "loss": 0.017913818359375, "step": 151060 }, { "epoch": 1.3062143863866287, "grad_norm": 34.62715596913033, "learning_rate": 1.6120010291883765e-06, "loss": 0.17231979370117187, "step": 151065 }, { "epoch": 1.306257619908172, "grad_norm": 3.6981886657726153, "learning_rate": 1.611820416312264e-06, "loss": 0.12223358154296875, "step": 151070 }, { "epoch": 1.3063008534297151, "grad_norm": 12.652968229414286, "learning_rate": 1.6116398098383115e-06, "loss": 0.03045654296875, "step": 151075 }, { "epoch": 1.3063440869512586, "grad_norm": 47.31548718299482, "learning_rate": 1.6114592097673554e-06, "loss": 0.057830810546875, "step": 151080 }, { "epoch": 1.3063873204728018, "grad_norm": 1.3222316002143948, "learning_rate": 1.611278616100226e-06, "loss": 0.09449539184570313, "step": 151085 }, { "epoch": 1.306430553994345, "grad_norm": 3.3890640265519707, "learning_rate": 1.6110980288377577e-06, "loss": 0.17758216857910156, "step": 151090 }, { "epoch": 1.3064737875158883, "grad_norm": 3.4654643890152323, "learning_rate": 1.6109174479807807e-06, "loss": 0.04648857116699219, "step": 151095 }, { "epoch": 1.3065170210374315, "grad_norm": 0.5715573810395919, "learning_rate": 1.6107368735301312e-06, "loss": 0.12956085205078124, "step": 151100 }, { "epoch": 1.306560254558975, "grad_norm": 4.002579668051004, "learning_rate": 1.6105563054866394e-06, "loss": 0.04668617248535156, "step": 151105 }, { "epoch": 1.3066034880805182, "grad_norm": 4.497088743911791, "learning_rate": 1.61037574385114e-06, "loss": 0.05596771240234375, "step": 151110 }, { "epoch": 1.3066467216020614, "grad_norm": 21.358592230127698, "learning_rate": 1.6101951886244653e-06, "loss": 0.02241172790527344, "step": 151115 }, { "epoch": 1.3066899551236046, "grad_norm": 3.22379633198422, "learning_rate": 1.6100146398074472e-06, "loss": 0.08241424560546876, "step": 151120 }, { "epoch": 1.3067331886451479, "grad_norm": 2.704783136017928, "learning_rate": 1.6098340974009186e-06, "loss": 0.10725860595703125, "step": 151125 }, { "epoch": 1.306776422166691, "grad_norm": 0.31532920261622327, "learning_rate": 1.6096535614057127e-06, "loss": 0.04160232543945312, "step": 151130 }, { "epoch": 1.3068196556882343, "grad_norm": 7.069255756082889, "learning_rate": 1.6094730318226616e-06, "loss": 0.06071281433105469, "step": 151135 }, { "epoch": 1.3068628892097776, "grad_norm": 7.86752639958653, "learning_rate": 1.6092925086525969e-06, "loss": 0.024553489685058594, "step": 151140 }, { "epoch": 1.306906122731321, "grad_norm": 17.74406828149946, "learning_rate": 1.6091119918963533e-06, "loss": 0.050292587280273436, "step": 151145 }, { "epoch": 1.3069493562528642, "grad_norm": 0.44090499850799364, "learning_rate": 1.6089314815547614e-06, "loss": 0.0174224853515625, "step": 151150 }, { "epoch": 1.3069925897744075, "grad_norm": 3.8439899142821523, "learning_rate": 1.6087509776286557e-06, "loss": 0.15739059448242188, "step": 151155 }, { "epoch": 1.3070358232959507, "grad_norm": 2.83895744867919, "learning_rate": 1.608570480118867e-06, "loss": 0.05735931396484375, "step": 151160 }, { "epoch": 1.3070790568174941, "grad_norm": 0.7832620782850162, "learning_rate": 1.6083899890262287e-06, "loss": 0.0804443359375, "step": 151165 }, { "epoch": 1.3071222903390374, "grad_norm": 3.14590166096122, "learning_rate": 1.608209504351572e-06, "loss": 0.05526123046875, "step": 151170 }, { "epoch": 1.3071655238605806, "grad_norm": 2.2373287500849055, "learning_rate": 1.6080290260957312e-06, "loss": 0.03639678955078125, "step": 151175 }, { "epoch": 1.3072087573821238, "grad_norm": 9.924062711566453, "learning_rate": 1.607848554259537e-06, "loss": 0.1390848159790039, "step": 151180 }, { "epoch": 1.307251990903667, "grad_norm": 0.6174803925992987, "learning_rate": 1.6076680888438226e-06, "loss": 0.036285400390625, "step": 151185 }, { "epoch": 1.3072952244252103, "grad_norm": 4.370314473733095, "learning_rate": 1.6074876298494197e-06, "loss": 0.0602874755859375, "step": 151190 }, { "epoch": 1.3073384579467535, "grad_norm": 0.9957077622632741, "learning_rate": 1.6073071772771608e-06, "loss": 0.01224517822265625, "step": 151195 }, { "epoch": 1.3073816914682967, "grad_norm": 0.2420895521114394, "learning_rate": 1.6071267311278775e-06, "loss": 0.0388458251953125, "step": 151200 }, { "epoch": 1.3074249249898402, "grad_norm": 0.7570189159096458, "learning_rate": 1.6069462914024032e-06, "loss": 0.196319580078125, "step": 151205 }, { "epoch": 1.3074681585113834, "grad_norm": 3.4781318903520777, "learning_rate": 1.6067658581015686e-06, "loss": 0.3320159912109375, "step": 151210 }, { "epoch": 1.3075113920329267, "grad_norm": 2.3080217932466227, "learning_rate": 1.606585431226208e-06, "loss": 0.1372213363647461, "step": 151215 }, { "epoch": 1.3075546255544699, "grad_norm": 0.7966890970495139, "learning_rate": 1.6064050107771523e-06, "loss": 0.01371612548828125, "step": 151220 }, { "epoch": 1.3075978590760131, "grad_norm": 0.3874271035945159, "learning_rate": 1.6062245967552333e-06, "loss": 0.07860984802246093, "step": 151225 }, { "epoch": 1.3076410925975566, "grad_norm": 0.9498420025779583, "learning_rate": 1.6060441891612832e-06, "loss": 0.06559333801269532, "step": 151230 }, { "epoch": 1.3076843261190998, "grad_norm": 2.0157822544931814, "learning_rate": 1.6058637879961336e-06, "loss": 0.05008697509765625, "step": 151235 }, { "epoch": 1.307727559640643, "grad_norm": 7.364672660533332, "learning_rate": 1.6056833932606174e-06, "loss": 0.05461044311523437, "step": 151240 }, { "epoch": 1.3077707931621863, "grad_norm": 3.224726551494698, "learning_rate": 1.6055030049555669e-06, "loss": 0.0298614501953125, "step": 151245 }, { "epoch": 1.3078140266837295, "grad_norm": 1.5963001225522446, "learning_rate": 1.605322623081813e-06, "loss": 0.02995891571044922, "step": 151250 }, { "epoch": 1.3078572602052727, "grad_norm": 0.10307248128418854, "learning_rate": 1.6051422476401876e-06, "loss": 0.035259246826171875, "step": 151255 }, { "epoch": 1.307900493726816, "grad_norm": 1.3820999815705237, "learning_rate": 1.6049618786315218e-06, "loss": 0.06750278472900391, "step": 151260 }, { "epoch": 1.3079437272483592, "grad_norm": 34.44411362768398, "learning_rate": 1.6047815160566501e-06, "loss": 0.2260843276977539, "step": 151265 }, { "epoch": 1.3079869607699026, "grad_norm": 3.7741859818946804, "learning_rate": 1.6046011599164015e-06, "loss": 0.02202606201171875, "step": 151270 }, { "epoch": 1.3080301942914458, "grad_norm": 2.2112708938402514, "learning_rate": 1.6044208102116104e-06, "loss": 0.066986083984375, "step": 151275 }, { "epoch": 1.308073427812989, "grad_norm": 4.340365347675398, "learning_rate": 1.604240466943107e-06, "loss": 0.0576019287109375, "step": 151280 }, { "epoch": 1.3081166613345323, "grad_norm": 5.765430919323493, "learning_rate": 1.604060130111723e-06, "loss": 0.0256683349609375, "step": 151285 }, { "epoch": 1.3081598948560755, "grad_norm": 0.09473618988401868, "learning_rate": 1.6038797997182906e-06, "loss": 0.13529567718505858, "step": 151290 }, { "epoch": 1.308203128377619, "grad_norm": 2.1364909047094387, "learning_rate": 1.6036994757636414e-06, "loss": 0.039325714111328125, "step": 151295 }, { "epoch": 1.3082463618991622, "grad_norm": 1.29992843883233, "learning_rate": 1.6035191582486052e-06, "loss": 0.13358154296875, "step": 151300 }, { "epoch": 1.3082895954207054, "grad_norm": 1.0292860016758174, "learning_rate": 1.603338847174017e-06, "loss": 0.006549835205078125, "step": 151305 }, { "epoch": 1.3083328289422487, "grad_norm": 2.228495341368066, "learning_rate": 1.6031585425407057e-06, "loss": 0.0426788330078125, "step": 151310 }, { "epoch": 1.308376062463792, "grad_norm": 0.8754559203452636, "learning_rate": 1.6029782443495036e-06, "loss": 0.14989051818847657, "step": 151315 }, { "epoch": 1.3084192959853351, "grad_norm": 4.3927750981576, "learning_rate": 1.6027979526012427e-06, "loss": 0.06921920776367188, "step": 151320 }, { "epoch": 1.3084625295068784, "grad_norm": 0.21827140031784315, "learning_rate": 1.602617667296755e-06, "loss": 0.0582183837890625, "step": 151325 }, { "epoch": 1.3085057630284216, "grad_norm": 0.1816164360754049, "learning_rate": 1.6024373884368693e-06, "loss": 0.09812202453613281, "step": 151330 }, { "epoch": 1.308548996549965, "grad_norm": 4.374987939698495, "learning_rate": 1.6022571160224205e-06, "loss": 0.03185958862304687, "step": 151335 }, { "epoch": 1.3085922300715083, "grad_norm": 18.19342985751828, "learning_rate": 1.6020768500542384e-06, "loss": 0.05804367065429687, "step": 151340 }, { "epoch": 1.3086354635930515, "grad_norm": 0.3170393790126797, "learning_rate": 1.6018965905331539e-06, "loss": 0.040726661682128906, "step": 151345 }, { "epoch": 1.3086786971145947, "grad_norm": 0.6492471610518752, "learning_rate": 1.601716337459999e-06, "loss": 0.008047103881835938, "step": 151350 }, { "epoch": 1.308721930636138, "grad_norm": 6.7663627492889225, "learning_rate": 1.6015360908356049e-06, "loss": 0.038208389282226564, "step": 151355 }, { "epoch": 1.3087651641576814, "grad_norm": 3.055039510996253, "learning_rate": 1.6013558506608017e-06, "loss": 0.0545135498046875, "step": 151360 }, { "epoch": 1.3088083976792246, "grad_norm": 4.058920279754956, "learning_rate": 1.6011756169364227e-06, "loss": 0.036133956909179685, "step": 151365 }, { "epoch": 1.3088516312007679, "grad_norm": 4.804349609930499, "learning_rate": 1.6009953896632983e-06, "loss": 0.0198760986328125, "step": 151370 }, { "epoch": 1.308894864722311, "grad_norm": 10.48311441507501, "learning_rate": 1.6008151688422582e-06, "loss": 0.6080337524414062, "step": 151375 }, { "epoch": 1.3089380982438543, "grad_norm": 3.8172418426029515, "learning_rate": 1.6006349544741361e-06, "loss": 0.09991111755371093, "step": 151380 }, { "epoch": 1.3089813317653975, "grad_norm": 4.237970666575575, "learning_rate": 1.6004547465597619e-06, "loss": 0.04264812469482422, "step": 151385 }, { "epoch": 1.3090245652869408, "grad_norm": 2.209397668590351, "learning_rate": 1.6002745450999669e-06, "loss": 0.09562835693359376, "step": 151390 }, { "epoch": 1.3090677988084842, "grad_norm": 0.32300335525796836, "learning_rate": 1.6000943500955807e-06, "loss": 0.10752410888671875, "step": 151395 }, { "epoch": 1.3091110323300275, "grad_norm": 1.3214656974631012, "learning_rate": 1.5999141615474371e-06, "loss": 0.08355941772460937, "step": 151400 }, { "epoch": 1.3091542658515707, "grad_norm": 0.21331063844737785, "learning_rate": 1.5997339794563653e-06, "loss": 0.6226108551025391, "step": 151405 }, { "epoch": 1.309197499373114, "grad_norm": 3.0377429170431616, "learning_rate": 1.5995538038231972e-06, "loss": 0.03983383178710938, "step": 151410 }, { "epoch": 1.3092407328946571, "grad_norm": 15.031662482346043, "learning_rate": 1.5993736346487626e-06, "loss": 0.2759437561035156, "step": 151415 }, { "epoch": 1.3092839664162006, "grad_norm": 3.362535480585031, "learning_rate": 1.5991934719338934e-06, "loss": 0.06575088500976563, "step": 151420 }, { "epoch": 1.3093271999377438, "grad_norm": 3.605684574114557, "learning_rate": 1.5990133156794189e-06, "loss": 0.092578125, "step": 151425 }, { "epoch": 1.309370433459287, "grad_norm": 8.310358178953802, "learning_rate": 1.598833165886172e-06, "loss": 0.03687267303466797, "step": 151430 }, { "epoch": 1.3094136669808303, "grad_norm": 25.840479456253362, "learning_rate": 1.5986530225549818e-06, "loss": 0.07878189086914063, "step": 151435 }, { "epoch": 1.3094569005023735, "grad_norm": 2.988543906486208, "learning_rate": 1.5984728856866812e-06, "loss": 0.0437255859375, "step": 151440 }, { "epoch": 1.3095001340239167, "grad_norm": 20.01847716443029, "learning_rate": 1.5982927552821002e-06, "loss": 0.051794815063476565, "step": 151445 }, { "epoch": 1.30954336754546, "grad_norm": 1.906300360604537, "learning_rate": 1.598112631342069e-06, "loss": 0.04039955139160156, "step": 151450 }, { "epoch": 1.3095866010670032, "grad_norm": 12.713375827763146, "learning_rate": 1.597932513867417e-06, "loss": 0.15527496337890626, "step": 151455 }, { "epoch": 1.3096298345885466, "grad_norm": 1.628343321565928, "learning_rate": 1.5977524028589777e-06, "loss": 0.10699615478515626, "step": 151460 }, { "epoch": 1.3096730681100899, "grad_norm": 47.02044430923352, "learning_rate": 1.5975722983175802e-06, "loss": 0.13260421752929688, "step": 151465 }, { "epoch": 1.309716301631633, "grad_norm": 2.0078357481657925, "learning_rate": 1.5973922002440556e-06, "loss": 0.28376617431640627, "step": 151470 }, { "epoch": 1.3097595351531763, "grad_norm": 1.2334984487847138, "learning_rate": 1.5972121086392342e-06, "loss": 0.076873779296875, "step": 151475 }, { "epoch": 1.3098027686747196, "grad_norm": 0.17496464364358008, "learning_rate": 1.5970320235039466e-06, "loss": 0.02318229675292969, "step": 151480 }, { "epoch": 1.309846002196263, "grad_norm": 4.613919154275265, "learning_rate": 1.5968519448390222e-06, "loss": 0.06610107421875, "step": 151485 }, { "epoch": 1.3098892357178062, "grad_norm": 10.561010828047527, "learning_rate": 1.5966718726452938e-06, "loss": 0.12246932983398437, "step": 151490 }, { "epoch": 1.3099324692393495, "grad_norm": 4.609422859776215, "learning_rate": 1.5964918069235895e-06, "loss": 0.054426383972167966, "step": 151495 }, { "epoch": 1.3099757027608927, "grad_norm": 0.2890224536009811, "learning_rate": 1.596311747674742e-06, "loss": 0.027016067504882814, "step": 151500 }, { "epoch": 1.310018936282436, "grad_norm": 13.317700497176425, "learning_rate": 1.596131694899581e-06, "loss": 0.06106243133544922, "step": 151505 }, { "epoch": 1.3100621698039792, "grad_norm": 0.594204882906749, "learning_rate": 1.5959516485989366e-06, "loss": 0.012603759765625, "step": 151510 }, { "epoch": 1.3101054033255224, "grad_norm": 5.321321304920421, "learning_rate": 1.5957716087736386e-06, "loss": 0.023484516143798827, "step": 151515 }, { "epoch": 1.3101486368470656, "grad_norm": 2.078518449080969, "learning_rate": 1.5955915754245169e-06, "loss": 0.017476367950439452, "step": 151520 }, { "epoch": 1.310191870368609, "grad_norm": 10.040124503889754, "learning_rate": 1.5954115485524042e-06, "loss": 0.03264923095703125, "step": 151525 }, { "epoch": 1.3102351038901523, "grad_norm": 1.2128522082656052, "learning_rate": 1.5952315281581292e-06, "loss": 0.019483184814453124, "step": 151530 }, { "epoch": 1.3102783374116955, "grad_norm": 3.680843958715573, "learning_rate": 1.5950515142425223e-06, "loss": 0.03412933349609375, "step": 151535 }, { "epoch": 1.3103215709332388, "grad_norm": 7.882822131823875, "learning_rate": 1.5948715068064124e-06, "loss": 0.06630706787109375, "step": 151540 }, { "epoch": 1.310364804454782, "grad_norm": 1.3769596790734546, "learning_rate": 1.5946915058506324e-06, "loss": 0.0164154052734375, "step": 151545 }, { "epoch": 1.3104080379763254, "grad_norm": 2.720635156209762, "learning_rate": 1.5945115113760107e-06, "loss": 0.023099899291992188, "step": 151550 }, { "epoch": 1.3104512714978687, "grad_norm": 54.32868868961584, "learning_rate": 1.5943315233833767e-06, "loss": 0.20586681365966797, "step": 151555 }, { "epoch": 1.310494505019412, "grad_norm": 11.023951143844657, "learning_rate": 1.5941515418735624e-06, "loss": 0.04578742980957031, "step": 151560 }, { "epoch": 1.3105377385409551, "grad_norm": 20.009746238461325, "learning_rate": 1.5939715668473973e-06, "loss": 0.032358741760253905, "step": 151565 }, { "epoch": 1.3105809720624984, "grad_norm": 2.650530976254585, "learning_rate": 1.5937915983057108e-06, "loss": 0.07065773010253906, "step": 151570 }, { "epoch": 1.3106242055840416, "grad_norm": 6.8668225021581675, "learning_rate": 1.5936116362493334e-06, "loss": 0.1080850601196289, "step": 151575 }, { "epoch": 1.3106674391055848, "grad_norm": 15.130019160242762, "learning_rate": 1.593431680679095e-06, "loss": 0.0753265380859375, "step": 151580 }, { "epoch": 1.310710672627128, "grad_norm": 0.32749582189140247, "learning_rate": 1.5932517315958241e-06, "loss": 0.023952484130859375, "step": 151585 }, { "epoch": 1.3107539061486715, "grad_norm": 29.9219018345403, "learning_rate": 1.5930717890003527e-06, "loss": 0.05236968994140625, "step": 151590 }, { "epoch": 1.3107971396702147, "grad_norm": 0.080135822024403, "learning_rate": 1.5928918528935104e-06, "loss": 0.06029796600341797, "step": 151595 }, { "epoch": 1.310840373191758, "grad_norm": 11.228753197474395, "learning_rate": 1.5927119232761253e-06, "loss": 0.0350189208984375, "step": 151600 }, { "epoch": 1.3108836067133012, "grad_norm": 0.37751419421179544, "learning_rate": 1.5925320001490293e-06, "loss": 0.01734466552734375, "step": 151605 }, { "epoch": 1.3109268402348446, "grad_norm": 37.25476475372405, "learning_rate": 1.5923520835130516e-06, "loss": 0.22205581665039062, "step": 151610 }, { "epoch": 1.3109700737563879, "grad_norm": 42.188959541363836, "learning_rate": 1.5921721733690204e-06, "loss": 0.1262969970703125, "step": 151615 }, { "epoch": 1.311013307277931, "grad_norm": 1.4739874062795983, "learning_rate": 1.5919922697177683e-06, "loss": 0.021468734741210936, "step": 151620 }, { "epoch": 1.3110565407994743, "grad_norm": 0.12191799234082257, "learning_rate": 1.591812372560123e-06, "loss": 0.003964614868164062, "step": 151625 }, { "epoch": 1.3110997743210175, "grad_norm": 0.1811506425499748, "learning_rate": 1.5916324818969148e-06, "loss": 0.010439300537109375, "step": 151630 }, { "epoch": 1.3111430078425608, "grad_norm": 4.157077918819581, "learning_rate": 1.5914525977289733e-06, "loss": 0.11413745880126953, "step": 151635 }, { "epoch": 1.311186241364104, "grad_norm": 44.2458282805503, "learning_rate": 1.5912727200571276e-06, "loss": 0.2645378112792969, "step": 151640 }, { "epoch": 1.3112294748856472, "grad_norm": 0.4857162882606747, "learning_rate": 1.591092848882208e-06, "loss": 0.061956024169921874, "step": 151645 }, { "epoch": 1.3112727084071907, "grad_norm": 0.19319483785648744, "learning_rate": 1.590912984205042e-06, "loss": 0.12308502197265625, "step": 151650 }, { "epoch": 1.311315941928734, "grad_norm": 30.94253812643786, "learning_rate": 1.5907331260264625e-06, "loss": 0.21147289276123046, "step": 151655 }, { "epoch": 1.3113591754502771, "grad_norm": 11.731055850949074, "learning_rate": 1.590553274347296e-06, "loss": 0.14264602661132814, "step": 151660 }, { "epoch": 1.3114024089718204, "grad_norm": 0.5781760277604457, "learning_rate": 1.5903734291683744e-06, "loss": 0.1863616943359375, "step": 151665 }, { "epoch": 1.3114456424933636, "grad_norm": 6.770833974145914, "learning_rate": 1.590193590490526e-06, "loss": 0.05878238677978516, "step": 151670 }, { "epoch": 1.311488876014907, "grad_norm": 11.907366350419132, "learning_rate": 1.5900137583145802e-06, "loss": 0.21862106323242186, "step": 151675 }, { "epoch": 1.3115321095364503, "grad_norm": 2.403796148206565, "learning_rate": 1.589833932641365e-06, "loss": 0.07560520172119141, "step": 151680 }, { "epoch": 1.3115753430579935, "grad_norm": 2.6482283830444153, "learning_rate": 1.5896541134717123e-06, "loss": 0.009873199462890624, "step": 151685 }, { "epoch": 1.3116185765795367, "grad_norm": 1.6043638941093825, "learning_rate": 1.5894743008064503e-06, "loss": 0.024767684936523437, "step": 151690 }, { "epoch": 1.31166181010108, "grad_norm": 0.7860462074950532, "learning_rate": 1.5892944946464084e-06, "loss": 0.0437957763671875, "step": 151695 }, { "epoch": 1.3117050436226232, "grad_norm": 0.7221407454792204, "learning_rate": 1.589114694992415e-06, "loss": 0.01945152282714844, "step": 151700 }, { "epoch": 1.3117482771441664, "grad_norm": 0.30080401685554564, "learning_rate": 1.5889349018453008e-06, "loss": 0.01224365234375, "step": 151705 }, { "epoch": 1.3117915106657096, "grad_norm": 14.81816367080631, "learning_rate": 1.5887551152058924e-06, "loss": 0.1436767578125, "step": 151710 }, { "epoch": 1.311834744187253, "grad_norm": 0.13003053685054564, "learning_rate": 1.5885753350750212e-06, "loss": 0.037335968017578124, "step": 151715 }, { "epoch": 1.3118779777087963, "grad_norm": 20.65297497680979, "learning_rate": 1.5883955614535167e-06, "loss": 0.04430580139160156, "step": 151720 }, { "epoch": 1.3119212112303396, "grad_norm": 1.8087259512154898, "learning_rate": 1.588215794342207e-06, "loss": 0.062188720703125, "step": 151725 }, { "epoch": 1.3119644447518828, "grad_norm": 3.940975207953724, "learning_rate": 1.5880360337419213e-06, "loss": 0.12493820190429687, "step": 151730 }, { "epoch": 1.312007678273426, "grad_norm": 1.4562432212465624, "learning_rate": 1.5878562796534889e-06, "loss": 0.04122848510742187, "step": 151735 }, { "epoch": 1.3120509117949695, "grad_norm": 0.7909025687038072, "learning_rate": 1.5876765320777382e-06, "loss": 0.041259765625, "step": 151740 }, { "epoch": 1.3120941453165127, "grad_norm": 1.2184315349119414, "learning_rate": 1.5874967910154977e-06, "loss": 0.08976306915283203, "step": 151745 }, { "epoch": 1.312137378838056, "grad_norm": 4.438779605892292, "learning_rate": 1.5873170564675985e-06, "loss": 0.07066650390625, "step": 151750 }, { "epoch": 1.3121806123595992, "grad_norm": 0.5112328876789247, "learning_rate": 1.5871373284348679e-06, "loss": 0.029346466064453125, "step": 151755 }, { "epoch": 1.3122238458811424, "grad_norm": 23.0264177455198, "learning_rate": 1.5869576069181352e-06, "loss": 0.0612091064453125, "step": 151760 }, { "epoch": 1.3122670794026856, "grad_norm": 34.43952478129223, "learning_rate": 1.586777891918228e-06, "loss": 0.2069478988647461, "step": 151765 }, { "epoch": 1.3123103129242288, "grad_norm": 2.1131971430418584, "learning_rate": 1.5865981834359772e-06, "loss": 0.05022125244140625, "step": 151770 }, { "epoch": 1.312353546445772, "grad_norm": 2.4352419769409526, "learning_rate": 1.5864184814722098e-06, "loss": 0.0432464599609375, "step": 151775 }, { "epoch": 1.3123967799673155, "grad_norm": 0.8786207683586917, "learning_rate": 1.5862387860277567e-06, "loss": 0.041469669342041014, "step": 151780 }, { "epoch": 1.3124400134888587, "grad_norm": 36.90623674145658, "learning_rate": 1.5860590971034455e-06, "loss": 0.08445167541503906, "step": 151785 }, { "epoch": 1.312483247010402, "grad_norm": 2.5543365329541747, "learning_rate": 1.5858794147001045e-06, "loss": 0.016294479370117188, "step": 151790 }, { "epoch": 1.3125264805319452, "grad_norm": 2.6844360832426175, "learning_rate": 1.5856997388185627e-06, "loss": 0.04178314208984375, "step": 151795 }, { "epoch": 1.3125697140534884, "grad_norm": 0.8199620882734382, "learning_rate": 1.5855200694596487e-06, "loss": 0.035884857177734375, "step": 151800 }, { "epoch": 1.3126129475750319, "grad_norm": 21.50193693590633, "learning_rate": 1.5853404066241901e-06, "loss": 0.06779212951660156, "step": 151805 }, { "epoch": 1.3126561810965751, "grad_norm": 1.9876144939696834, "learning_rate": 1.5851607503130179e-06, "loss": 0.049782943725585935, "step": 151810 }, { "epoch": 1.3126994146181183, "grad_norm": 3.015901876572468, "learning_rate": 1.5849811005269588e-06, "loss": 0.06024093627929687, "step": 151815 }, { "epoch": 1.3127426481396616, "grad_norm": 184.7329416879093, "learning_rate": 1.5848014572668422e-06, "loss": 0.1129547119140625, "step": 151820 }, { "epoch": 1.3127858816612048, "grad_norm": 1.5130522044155594, "learning_rate": 1.5846218205334952e-06, "loss": 0.0150054931640625, "step": 151825 }, { "epoch": 1.312829115182748, "grad_norm": 1.0470652841668988, "learning_rate": 1.5844421903277481e-06, "loss": 0.047135353088378906, "step": 151830 }, { "epoch": 1.3128723487042913, "grad_norm": 5.975491111104145, "learning_rate": 1.5842625666504287e-06, "loss": 0.11672286987304688, "step": 151835 }, { "epoch": 1.3129155822258345, "grad_norm": 0.09190122469481786, "learning_rate": 1.5840829495023643e-06, "loss": 0.007984066009521484, "step": 151840 }, { "epoch": 1.312958815747378, "grad_norm": 1.147344564864233, "learning_rate": 1.583903338884385e-06, "loss": 0.024808502197265624, "step": 151845 }, { "epoch": 1.3130020492689212, "grad_norm": 5.420384548697312, "learning_rate": 1.5837237347973183e-06, "loss": 0.03432159423828125, "step": 151850 }, { "epoch": 1.3130452827904644, "grad_norm": 2.2523412812089014, "learning_rate": 1.5835441372419932e-06, "loss": 0.10586929321289062, "step": 151855 }, { "epoch": 1.3130885163120076, "grad_norm": 2.1520989454422077, "learning_rate": 1.583364546219237e-06, "loss": 0.020029830932617187, "step": 151860 }, { "epoch": 1.313131749833551, "grad_norm": 0.3086725883443723, "learning_rate": 1.5831849617298785e-06, "loss": 0.13280754089355468, "step": 151865 }, { "epoch": 1.3131749833550943, "grad_norm": 1.5386453478888948, "learning_rate": 1.5830053837747443e-06, "loss": 0.384869384765625, "step": 151870 }, { "epoch": 1.3132182168766375, "grad_norm": 1.018705074506805, "learning_rate": 1.5828258123546653e-06, "loss": 0.069915771484375, "step": 151875 }, { "epoch": 1.3132614503981808, "grad_norm": 0.12919299694338196, "learning_rate": 1.5826462474704684e-06, "loss": 0.03248672485351563, "step": 151880 }, { "epoch": 1.313304683919724, "grad_norm": 0.36603384544732026, "learning_rate": 1.5824666891229808e-06, "loss": 0.05302848815917969, "step": 151885 }, { "epoch": 1.3133479174412672, "grad_norm": 1.8496549956181234, "learning_rate": 1.5822871373130328e-06, "loss": 0.067706298828125, "step": 151890 }, { "epoch": 1.3133911509628104, "grad_norm": 2.487890839408293, "learning_rate": 1.5821075920414511e-06, "loss": 0.0299102783203125, "step": 151895 }, { "epoch": 1.3134343844843537, "grad_norm": 27.681766671476137, "learning_rate": 1.5819280533090638e-06, "loss": 0.13193511962890625, "step": 151900 }, { "epoch": 1.3134776180058971, "grad_norm": 0.028300819883810977, "learning_rate": 1.5817485211166978e-06, "loss": 0.022272300720214844, "step": 151905 }, { "epoch": 1.3135208515274404, "grad_norm": 5.616684407176967, "learning_rate": 1.5815689954651836e-06, "loss": 0.0280853271484375, "step": 151910 }, { "epoch": 1.3135640850489836, "grad_norm": 5.995880755548782, "learning_rate": 1.581389476355348e-06, "loss": 0.045836257934570315, "step": 151915 }, { "epoch": 1.3136073185705268, "grad_norm": 0.718333142445733, "learning_rate": 1.5812099637880187e-06, "loss": 0.0871124267578125, "step": 151920 }, { "epoch": 1.31365055209207, "grad_norm": 7.158100927149557, "learning_rate": 1.5810304577640238e-06, "loss": 0.0728912353515625, "step": 151925 }, { "epoch": 1.3136937856136135, "grad_norm": 0.5448606698567299, "learning_rate": 1.5808509582841907e-06, "loss": 0.006397056579589844, "step": 151930 }, { "epoch": 1.3137370191351567, "grad_norm": 9.48100776252044, "learning_rate": 1.5806714653493468e-06, "loss": 0.0274627685546875, "step": 151935 }, { "epoch": 1.3137802526567, "grad_norm": 1.5345764305892289, "learning_rate": 1.58049197896032e-06, "loss": 0.024095535278320312, "step": 151940 }, { "epoch": 1.3138234861782432, "grad_norm": 1.115758508603782, "learning_rate": 1.5803124991179405e-06, "loss": 0.014604949951171875, "step": 151945 }, { "epoch": 1.3138667196997864, "grad_norm": 1.4906223703177262, "learning_rate": 1.5801330258230341e-06, "loss": 0.03775634765625, "step": 151950 }, { "epoch": 1.3139099532213296, "grad_norm": 3.5855465179138344, "learning_rate": 1.5799535590764292e-06, "loss": 0.024735260009765624, "step": 151955 }, { "epoch": 1.3139531867428729, "grad_norm": 0.2782994510895442, "learning_rate": 1.5797740988789527e-06, "loss": 0.1625732421875, "step": 151960 }, { "epoch": 1.313996420264416, "grad_norm": 11.16164062201688, "learning_rate": 1.5795946452314315e-06, "loss": 0.07446517944335937, "step": 151965 }, { "epoch": 1.3140396537859595, "grad_norm": 2.7129538369169106, "learning_rate": 1.5794151981346953e-06, "loss": 0.10825576782226562, "step": 151970 }, { "epoch": 1.3140828873075028, "grad_norm": 121.6108316419711, "learning_rate": 1.5792357575895706e-06, "loss": 0.18534622192382813, "step": 151975 }, { "epoch": 1.314126120829046, "grad_norm": 1.2551121477299463, "learning_rate": 1.5790563235968852e-06, "loss": 0.0905181884765625, "step": 151980 }, { "epoch": 1.3141693543505892, "grad_norm": 23.013865568742233, "learning_rate": 1.5788768961574663e-06, "loss": 0.10165510177612305, "step": 151985 }, { "epoch": 1.3142125878721325, "grad_norm": 2.4054435187673855, "learning_rate": 1.5786974752721405e-06, "loss": 0.28734779357910156, "step": 151990 }, { "epoch": 1.314255821393676, "grad_norm": 54.1256447135161, "learning_rate": 1.5785180609417375e-06, "loss": 0.16319961547851564, "step": 151995 }, { "epoch": 1.3142990549152191, "grad_norm": 5.2452702258064825, "learning_rate": 1.5783386531670825e-06, "loss": 0.034978485107421874, "step": 152000 }, { "epoch": 1.3143422884367624, "grad_norm": 1.6504130081830555, "learning_rate": 1.5781592519490054e-06, "loss": 0.09116744995117188, "step": 152005 }, { "epoch": 1.3143855219583056, "grad_norm": 8.539636770086602, "learning_rate": 1.5779798572883318e-06, "loss": 0.3071174621582031, "step": 152010 }, { "epoch": 1.3144287554798488, "grad_norm": 0.6670616092757611, "learning_rate": 1.5778004691858897e-06, "loss": 0.1625091552734375, "step": 152015 }, { "epoch": 1.314471989001392, "grad_norm": 30.526457778167515, "learning_rate": 1.577621087642506e-06, "loss": 0.1322540283203125, "step": 152020 }, { "epoch": 1.3145152225229353, "grad_norm": 2.732678394133529, "learning_rate": 1.5774417126590084e-06, "loss": 0.09359016418457031, "step": 152025 }, { "epoch": 1.3145584560444785, "grad_norm": 0.2032633346152163, "learning_rate": 1.5772623442362224e-06, "loss": 0.019998931884765626, "step": 152030 }, { "epoch": 1.314601689566022, "grad_norm": 0.39821809251296997, "learning_rate": 1.577082982374978e-06, "loss": 0.1924114227294922, "step": 152035 }, { "epoch": 1.3146449230875652, "grad_norm": 0.5397559226182749, "learning_rate": 1.5769036270761013e-06, "loss": 0.049325180053710935, "step": 152040 }, { "epoch": 1.3146881566091084, "grad_norm": 2.4205701429840305, "learning_rate": 1.5767242783404194e-06, "loss": 0.20328617095947266, "step": 152045 }, { "epoch": 1.3147313901306517, "grad_norm": 0.1417699847495697, "learning_rate": 1.5765449361687582e-06, "loss": 0.3419347763061523, "step": 152050 }, { "epoch": 1.3147746236521949, "grad_norm": 2.3107600591352595, "learning_rate": 1.5763656005619469e-06, "loss": 0.048067474365234376, "step": 152055 }, { "epoch": 1.3148178571737383, "grad_norm": 0.5822963482878619, "learning_rate": 1.5761862715208108e-06, "loss": 0.06541824340820312, "step": 152060 }, { "epoch": 1.3148610906952816, "grad_norm": 19.949681666567056, "learning_rate": 1.576006949046179e-06, "loss": 0.2583930969238281, "step": 152065 }, { "epoch": 1.3149043242168248, "grad_norm": 2.298082999740221, "learning_rate": 1.5758276331388768e-06, "loss": 0.0617828369140625, "step": 152070 }, { "epoch": 1.314947557738368, "grad_norm": 0.14892575517833048, "learning_rate": 1.5756483237997321e-06, "loss": 0.02427825927734375, "step": 152075 }, { "epoch": 1.3149907912599113, "grad_norm": 0.4215803714375237, "learning_rate": 1.5754690210295716e-06, "loss": 0.012052345275878906, "step": 152080 }, { "epoch": 1.3150340247814545, "grad_norm": 2.2663305606592927, "learning_rate": 1.5752897248292215e-06, "loss": 0.01739654541015625, "step": 152085 }, { "epoch": 1.3150772583029977, "grad_norm": 1.679160053097355, "learning_rate": 1.5751104351995095e-06, "loss": 0.02400970458984375, "step": 152090 }, { "epoch": 1.315120491824541, "grad_norm": 20.53387850308098, "learning_rate": 1.5749311521412617e-06, "loss": 0.07822589874267578, "step": 152095 }, { "epoch": 1.3151637253460844, "grad_norm": 6.21970359872083, "learning_rate": 1.5747518756553062e-06, "loss": 0.07083587646484375, "step": 152100 }, { "epoch": 1.3152069588676276, "grad_norm": 5.736375721000998, "learning_rate": 1.5745726057424695e-06, "loss": 0.0177093505859375, "step": 152105 }, { "epoch": 1.3152501923891708, "grad_norm": 1.8874117177508989, "learning_rate": 1.5743933424035764e-06, "loss": 0.043710517883300784, "step": 152110 }, { "epoch": 1.315293425910714, "grad_norm": 6.184523651272415, "learning_rate": 1.5742140856394564e-06, "loss": 0.13819808959960939, "step": 152115 }, { "epoch": 1.3153366594322575, "grad_norm": 0.19607012690787565, "learning_rate": 1.574034835450935e-06, "loss": 0.13453407287597657, "step": 152120 }, { "epoch": 1.3153798929538008, "grad_norm": 0.40520840695037774, "learning_rate": 1.5738555918388381e-06, "loss": 0.053424644470214847, "step": 152125 }, { "epoch": 1.315423126475344, "grad_norm": 0.3985327383284561, "learning_rate": 1.573676354803994e-06, "loss": 0.03902626037597656, "step": 152130 }, { "epoch": 1.3154663599968872, "grad_norm": 5.438702826092252, "learning_rate": 1.5734971243472287e-06, "loss": 0.027715301513671874, "step": 152135 }, { "epoch": 1.3155095935184304, "grad_norm": 3.0303978722185696, "learning_rate": 1.5733179004693687e-06, "loss": 0.027649307250976564, "step": 152140 }, { "epoch": 1.3155528270399737, "grad_norm": 21.43231858137849, "learning_rate": 1.5731386831712405e-06, "loss": 0.07090263366699219, "step": 152145 }, { "epoch": 1.315596060561517, "grad_norm": 12.05509249313325, "learning_rate": 1.5729594724536704e-06, "loss": 0.06162109375, "step": 152150 }, { "epoch": 1.3156392940830601, "grad_norm": 1.1628293820378215, "learning_rate": 1.572780268317484e-06, "loss": 0.016829681396484376, "step": 152155 }, { "epoch": 1.3156825276046036, "grad_norm": 9.256418549489243, "learning_rate": 1.5726010707635105e-06, "loss": 0.0509552001953125, "step": 152160 }, { "epoch": 1.3157257611261468, "grad_norm": 6.312014671531323, "learning_rate": 1.5724218797925732e-06, "loss": 0.12855987548828124, "step": 152165 }, { "epoch": 1.31576899464769, "grad_norm": 0.6512589456389655, "learning_rate": 1.572242695405501e-06, "loss": 0.03226776123046875, "step": 152170 }, { "epoch": 1.3158122281692333, "grad_norm": 1.6572799896283468, "learning_rate": 1.5720635176031197e-06, "loss": 0.09043655395507813, "step": 152175 }, { "epoch": 1.3158554616907765, "grad_norm": 0.3048648174166387, "learning_rate": 1.571884346386255e-06, "loss": 0.05089302062988281, "step": 152180 }, { "epoch": 1.31589869521232, "grad_norm": 0.037701200068251076, "learning_rate": 1.571705181755734e-06, "loss": 0.031035327911376955, "step": 152185 }, { "epoch": 1.3159419287338632, "grad_norm": 10.915677243956795, "learning_rate": 1.5715260237123805e-06, "loss": 0.0608184814453125, "step": 152190 }, { "epoch": 1.3159851622554064, "grad_norm": 5.658567190343147, "learning_rate": 1.5713468722570246e-06, "loss": 0.07476091384887695, "step": 152195 }, { "epoch": 1.3160283957769496, "grad_norm": 0.7338144117446045, "learning_rate": 1.5711677273904905e-06, "loss": 0.06176128387451172, "step": 152200 }, { "epoch": 1.3160716292984929, "grad_norm": 0.25130304747366894, "learning_rate": 1.5709885891136044e-06, "loss": 0.034906005859375, "step": 152205 }, { "epoch": 1.316114862820036, "grad_norm": 1.5959507298211333, "learning_rate": 1.570809457427193e-06, "loss": 0.025309371948242187, "step": 152210 }, { "epoch": 1.3161580963415793, "grad_norm": 1.0863623340581845, "learning_rate": 1.570630332332081e-06, "loss": 0.03702545166015625, "step": 152215 }, { "epoch": 1.3162013298631225, "grad_norm": 0.4724276052707621, "learning_rate": 1.5704512138290966e-06, "loss": 0.06441421508789062, "step": 152220 }, { "epoch": 1.316244563384666, "grad_norm": 13.623942914564298, "learning_rate": 1.5702721019190633e-06, "loss": 0.03204822540283203, "step": 152225 }, { "epoch": 1.3162877969062092, "grad_norm": 1.5312151976835366, "learning_rate": 1.57009299660281e-06, "loss": 0.1181793212890625, "step": 152230 }, { "epoch": 1.3163310304277525, "grad_norm": 0.31581407482369356, "learning_rate": 1.569913897881162e-06, "loss": 0.11917457580566407, "step": 152235 }, { "epoch": 1.3163742639492957, "grad_norm": 29.98281285030237, "learning_rate": 1.5697348057549445e-06, "loss": 0.19539966583251953, "step": 152240 }, { "epoch": 1.316417497470839, "grad_norm": 0.09491938032492418, "learning_rate": 1.5695557202249837e-06, "loss": 0.27411575317382814, "step": 152245 }, { "epoch": 1.3164607309923824, "grad_norm": 15.22522166006912, "learning_rate": 1.569376641292105e-06, "loss": 0.026416015625, "step": 152250 }, { "epoch": 1.3165039645139256, "grad_norm": 79.2284089792237, "learning_rate": 1.5691975689571343e-06, "loss": 0.3258544921875, "step": 152255 }, { "epoch": 1.3165471980354688, "grad_norm": 3.3249770579105506, "learning_rate": 1.5690185032208992e-06, "loss": 0.13208236694335937, "step": 152260 }, { "epoch": 1.316590431557012, "grad_norm": 5.743940014733337, "learning_rate": 1.568839444084224e-06, "loss": 0.1973175048828125, "step": 152265 }, { "epoch": 1.3166336650785553, "grad_norm": 4.516853413593825, "learning_rate": 1.568660391547935e-06, "loss": 0.02650146484375, "step": 152270 }, { "epoch": 1.3166768986000985, "grad_norm": 4.909301449458443, "learning_rate": 1.5684813456128562e-06, "loss": 0.08056125640869141, "step": 152275 }, { "epoch": 1.3167201321216417, "grad_norm": 1.7599733024686752, "learning_rate": 1.5683023062798167e-06, "loss": 0.029415988922119142, "step": 152280 }, { "epoch": 1.316763365643185, "grad_norm": 0.8745029793850505, "learning_rate": 1.5681232735496387e-06, "loss": 0.02644500732421875, "step": 152285 }, { "epoch": 1.3168065991647284, "grad_norm": 16.919914547996434, "learning_rate": 1.5679442474231513e-06, "loss": 0.1284820556640625, "step": 152290 }, { "epoch": 1.3168498326862716, "grad_norm": 0.6045086176647204, "learning_rate": 1.5677652279011783e-06, "loss": 0.045951461791992186, "step": 152295 }, { "epoch": 1.3168930662078149, "grad_norm": 21.07752066730287, "learning_rate": 1.5675862149845454e-06, "loss": 0.08455581665039062, "step": 152300 }, { "epoch": 1.316936299729358, "grad_norm": 0.9227351603302895, "learning_rate": 1.5674072086740789e-06, "loss": 0.022568511962890624, "step": 152305 }, { "epoch": 1.3169795332509016, "grad_norm": 58.9349306596266, "learning_rate": 1.5672282089706035e-06, "loss": 0.263037109375, "step": 152310 }, { "epoch": 1.3170227667724448, "grad_norm": 22.2223902583581, "learning_rate": 1.5670492158749434e-06, "loss": 0.18143081665039062, "step": 152315 }, { "epoch": 1.317066000293988, "grad_norm": 3.3350748145194062, "learning_rate": 1.5668702293879272e-06, "loss": 0.036569976806640626, "step": 152320 }, { "epoch": 1.3171092338155312, "grad_norm": 0.32211416472566434, "learning_rate": 1.5666912495103785e-06, "loss": 0.04156150817871094, "step": 152325 }, { "epoch": 1.3171524673370745, "grad_norm": 25.964524952811807, "learning_rate": 1.5665122762431235e-06, "loss": 0.20359649658203124, "step": 152330 }, { "epoch": 1.3171957008586177, "grad_norm": 1.7154109541026283, "learning_rate": 1.5663333095869862e-06, "loss": 0.0492462158203125, "step": 152335 }, { "epoch": 1.317238934380161, "grad_norm": 2.3009186949789147, "learning_rate": 1.5661543495427938e-06, "loss": 0.06526393890380859, "step": 152340 }, { "epoch": 1.3172821679017042, "grad_norm": 0.43980786170398073, "learning_rate": 1.5659753961113709e-06, "loss": 0.07907905578613281, "step": 152345 }, { "epoch": 1.3173254014232476, "grad_norm": 19.664197887568232, "learning_rate": 1.5657964492935416e-06, "loss": 0.5332122802734375, "step": 152350 }, { "epoch": 1.3173686349447908, "grad_norm": 5.635033078414079, "learning_rate": 1.5656175090901336e-06, "loss": 0.0415191650390625, "step": 152355 }, { "epoch": 1.317411868466334, "grad_norm": 1.4453674772539669, "learning_rate": 1.565438575501971e-06, "loss": 0.06419677734375, "step": 152360 }, { "epoch": 1.3174551019878773, "grad_norm": 2.0945319958955455, "learning_rate": 1.5652596485298786e-06, "loss": 0.14501953125, "step": 152365 }, { "epoch": 1.3174983355094205, "grad_norm": 6.19817166223782, "learning_rate": 1.565080728174682e-06, "loss": 0.017782211303710938, "step": 152370 }, { "epoch": 1.317541569030964, "grad_norm": 27.964587414059626, "learning_rate": 1.5649018144372067e-06, "loss": 0.04012374877929688, "step": 152375 }, { "epoch": 1.3175848025525072, "grad_norm": 13.24249716242571, "learning_rate": 1.5647229073182758e-06, "loss": 0.034923553466796875, "step": 152380 }, { "epoch": 1.3176280360740504, "grad_norm": 6.00442800198598, "learning_rate": 1.5645440068187173e-06, "loss": 0.13099594116210939, "step": 152385 }, { "epoch": 1.3176712695955937, "grad_norm": 5.8693252856168465, "learning_rate": 1.564365112939354e-06, "loss": 0.057787322998046876, "step": 152390 }, { "epoch": 1.317714503117137, "grad_norm": 6.326596809962504, "learning_rate": 1.564186225681013e-06, "loss": 0.12438087463378907, "step": 152395 }, { "epoch": 1.3177577366386801, "grad_norm": 2.157911796798849, "learning_rate": 1.5640073450445182e-06, "loss": 0.020705413818359376, "step": 152400 }, { "epoch": 1.3178009701602234, "grad_norm": 1.1683997290574593, "learning_rate": 1.5638284710306948e-06, "loss": 0.02581920623779297, "step": 152405 }, { "epoch": 1.3178442036817666, "grad_norm": 3.6960111061424445, "learning_rate": 1.5636496036403662e-06, "loss": 0.017249584197998047, "step": 152410 }, { "epoch": 1.31788743720331, "grad_norm": 2.4506197115907162, "learning_rate": 1.5634707428743602e-06, "loss": 0.03304862976074219, "step": 152415 }, { "epoch": 1.3179306707248533, "grad_norm": 0.262687808010096, "learning_rate": 1.5632918887335e-06, "loss": 0.004334926605224609, "step": 152420 }, { "epoch": 1.3179739042463965, "grad_norm": 2.297716291309709, "learning_rate": 1.5631130412186104e-06, "loss": 0.07421340942382812, "step": 152425 }, { "epoch": 1.3180171377679397, "grad_norm": 0.5239883093984141, "learning_rate": 1.5629342003305168e-06, "loss": 0.00486907958984375, "step": 152430 }, { "epoch": 1.318060371289483, "grad_norm": 0.7433431146674693, "learning_rate": 1.5627553660700437e-06, "loss": 0.015561676025390625, "step": 152435 }, { "epoch": 1.3181036048110264, "grad_norm": 2.5994187379076936, "learning_rate": 1.5625765384380146e-06, "loss": 0.15190467834472657, "step": 152440 }, { "epoch": 1.3181468383325696, "grad_norm": 3.1618114412185565, "learning_rate": 1.5623977174352567e-06, "loss": 0.02584228515625, "step": 152445 }, { "epoch": 1.3181900718541129, "grad_norm": 1.4177727453305593, "learning_rate": 1.5622189030625925e-06, "loss": 0.11842117309570313, "step": 152450 }, { "epoch": 1.318233305375656, "grad_norm": 9.75022359950523, "learning_rate": 1.5620400953208488e-06, "loss": 0.2000865936279297, "step": 152455 }, { "epoch": 1.3182765388971993, "grad_norm": 0.0858453850252436, "learning_rate": 1.5618612942108488e-06, "loss": 0.013673782348632812, "step": 152460 }, { "epoch": 1.3183197724187425, "grad_norm": 2.2958619424650077, "learning_rate": 1.5616824997334174e-06, "loss": 0.22938995361328124, "step": 152465 }, { "epoch": 1.3183630059402858, "grad_norm": 0.27202020915047626, "learning_rate": 1.5615037118893795e-06, "loss": 0.02124481201171875, "step": 152470 }, { "epoch": 1.318406239461829, "grad_norm": 5.628440479793541, "learning_rate": 1.561324930679558e-06, "loss": 0.03455429077148438, "step": 152475 }, { "epoch": 1.3184494729833725, "grad_norm": 36.39015393227431, "learning_rate": 1.5611461561047802e-06, "loss": 0.30242919921875, "step": 152480 }, { "epoch": 1.3184927065049157, "grad_norm": 6.557602016630547, "learning_rate": 1.5609673881658692e-06, "loss": 0.02700386047363281, "step": 152485 }, { "epoch": 1.318535940026459, "grad_norm": 1.176608037447066, "learning_rate": 1.5607886268636494e-06, "loss": 0.04820747375488281, "step": 152490 }, { "epoch": 1.3185791735480021, "grad_norm": 8.19715380069865, "learning_rate": 1.5606098721989451e-06, "loss": 0.022072601318359374, "step": 152495 }, { "epoch": 1.3186224070695454, "grad_norm": 12.46457793959423, "learning_rate": 1.56043112417258e-06, "loss": 0.05503578186035156, "step": 152500 }, { "epoch": 1.3186656405910888, "grad_norm": 1.0272262960474807, "learning_rate": 1.5602523827853801e-06, "loss": 0.02639350891113281, "step": 152505 }, { "epoch": 1.318708874112632, "grad_norm": 0.7553259455772673, "learning_rate": 1.5600736480381682e-06, "loss": 0.12000656127929688, "step": 152510 }, { "epoch": 1.3187521076341753, "grad_norm": 8.629937570732272, "learning_rate": 1.5598949199317703e-06, "loss": 0.09207019805908204, "step": 152515 }, { "epoch": 1.3187953411557185, "grad_norm": 15.943380917642306, "learning_rate": 1.5597161984670099e-06, "loss": 0.024298095703125, "step": 152520 }, { "epoch": 1.3188385746772617, "grad_norm": 1.426896965315559, "learning_rate": 1.5595374836447113e-06, "loss": 0.14613723754882812, "step": 152525 }, { "epoch": 1.318881808198805, "grad_norm": 2.004588457961694, "learning_rate": 1.5593587754656986e-06, "loss": 0.14477157592773438, "step": 152530 }, { "epoch": 1.3189250417203482, "grad_norm": 0.051036501651860106, "learning_rate": 1.5591800739307955e-06, "loss": 0.06764183044433594, "step": 152535 }, { "epoch": 1.3189682752418914, "grad_norm": 2.6521305817563685, "learning_rate": 1.5590013790408258e-06, "loss": 0.03925952911376953, "step": 152540 }, { "epoch": 1.3190115087634349, "grad_norm": 4.565830698990238, "learning_rate": 1.5588226907966152e-06, "loss": 0.01755943298339844, "step": 152545 }, { "epoch": 1.319054742284978, "grad_norm": 12.202730171637977, "learning_rate": 1.5586440091989874e-06, "loss": 0.126617431640625, "step": 152550 }, { "epoch": 1.3190979758065213, "grad_norm": 7.178231046536973, "learning_rate": 1.558465334248766e-06, "loss": 0.03969764709472656, "step": 152555 }, { "epoch": 1.3191412093280646, "grad_norm": 15.712032524250661, "learning_rate": 1.5582866659467738e-06, "loss": 0.07881927490234375, "step": 152560 }, { "epoch": 1.319184442849608, "grad_norm": 14.077098340849577, "learning_rate": 1.5581080042938374e-06, "loss": 0.08099899291992188, "step": 152565 }, { "epoch": 1.3192276763711512, "grad_norm": 55.794326481500846, "learning_rate": 1.557929349290778e-06, "loss": 0.1520862579345703, "step": 152570 }, { "epoch": 1.3192709098926945, "grad_norm": 30.167472845970366, "learning_rate": 1.5577507009384225e-06, "loss": 0.13230438232421876, "step": 152575 }, { "epoch": 1.3193141434142377, "grad_norm": 1.1758744330165196, "learning_rate": 1.5575720592375933e-06, "loss": 0.0276397705078125, "step": 152580 }, { "epoch": 1.319357376935781, "grad_norm": 1.6145117485704188, "learning_rate": 1.5573934241891143e-06, "loss": 0.03637237548828125, "step": 152585 }, { "epoch": 1.3194006104573242, "grad_norm": 8.526833408579497, "learning_rate": 1.5572147957938093e-06, "loss": 0.04436569213867188, "step": 152590 }, { "epoch": 1.3194438439788674, "grad_norm": 0.31670501855379146, "learning_rate": 1.5570361740525019e-06, "loss": 0.03557090759277344, "step": 152595 }, { "epoch": 1.3194870775004106, "grad_norm": 1.024916267215275, "learning_rate": 1.5568575589660166e-06, "loss": 0.0644287109375, "step": 152600 }, { "epoch": 1.319530311021954, "grad_norm": 0.6970819978893847, "learning_rate": 1.5566789505351753e-06, "loss": 0.04480438232421875, "step": 152605 }, { "epoch": 1.3195735445434973, "grad_norm": 0.6380679637068443, "learning_rate": 1.5565003487608046e-06, "loss": 0.07563600540161133, "step": 152610 }, { "epoch": 1.3196167780650405, "grad_norm": 6.076061442202149, "learning_rate": 1.5563217536437256e-06, "loss": 0.04620027542114258, "step": 152615 }, { "epoch": 1.3196600115865837, "grad_norm": 5.5328481348206955, "learning_rate": 1.5561431651847637e-06, "loss": 0.03959465026855469, "step": 152620 }, { "epoch": 1.319703245108127, "grad_norm": 17.453681889387234, "learning_rate": 1.5559645833847424e-06, "loss": 0.15285797119140626, "step": 152625 }, { "epoch": 1.3197464786296704, "grad_norm": 1.017919924789706, "learning_rate": 1.5557860082444847e-06, "loss": 0.00920257568359375, "step": 152630 }, { "epoch": 1.3197897121512137, "grad_norm": 8.777349050591702, "learning_rate": 1.5556074397648131e-06, "loss": 0.14060211181640625, "step": 152635 }, { "epoch": 1.3198329456727569, "grad_norm": 15.253352941182206, "learning_rate": 1.5554288779465536e-06, "loss": 0.22054882049560548, "step": 152640 }, { "epoch": 1.3198761791943001, "grad_norm": 61.969052595354356, "learning_rate": 1.5552503227905286e-06, "loss": 0.15147552490234376, "step": 152645 }, { "epoch": 1.3199194127158433, "grad_norm": 0.14702538802244244, "learning_rate": 1.5550717742975612e-06, "loss": 0.022515487670898438, "step": 152650 }, { "epoch": 1.3199626462373866, "grad_norm": 0.18841301159250223, "learning_rate": 1.5548932324684754e-06, "loss": 0.07511634826660156, "step": 152655 }, { "epoch": 1.3200058797589298, "grad_norm": 0.7990665196435461, "learning_rate": 1.5547146973040941e-06, "loss": 0.0358154296875, "step": 152660 }, { "epoch": 1.320049113280473, "grad_norm": 11.507511982183217, "learning_rate": 1.55453616880524e-06, "loss": 0.11559600830078125, "step": 152665 }, { "epoch": 1.3200923468020165, "grad_norm": 9.86119193123559, "learning_rate": 1.5543576469727381e-06, "loss": 0.304986572265625, "step": 152670 }, { "epoch": 1.3201355803235597, "grad_norm": 0.20026724987327132, "learning_rate": 1.55417913180741e-06, "loss": 0.23333892822265626, "step": 152675 }, { "epoch": 1.320178813845103, "grad_norm": 2.0184916554062937, "learning_rate": 1.5540006233100813e-06, "loss": 0.03837890625, "step": 152680 }, { "epoch": 1.3202220473666462, "grad_norm": 0.8707762256735817, "learning_rate": 1.553822121481574e-06, "loss": 0.010417556762695313, "step": 152685 }, { "epoch": 1.3202652808881894, "grad_norm": 44.130229498778874, "learning_rate": 1.5536436263227112e-06, "loss": 0.22360153198242189, "step": 152690 }, { "epoch": 1.3203085144097328, "grad_norm": 1.5399761231718347, "learning_rate": 1.553465137834316e-06, "loss": 0.016607666015625, "step": 152695 }, { "epoch": 1.320351747931276, "grad_norm": 3.3155689441907774, "learning_rate": 1.5532866560172108e-06, "loss": 0.01085042953491211, "step": 152700 }, { "epoch": 1.3203949814528193, "grad_norm": 50.89404303241499, "learning_rate": 1.5531081808722208e-06, "loss": 0.09222278594970704, "step": 152705 }, { "epoch": 1.3204382149743625, "grad_norm": 0.6972015625750438, "learning_rate": 1.5529297124001682e-06, "loss": 0.19018020629882812, "step": 152710 }, { "epoch": 1.3204814484959058, "grad_norm": 2.221225542288808, "learning_rate": 1.5527512506018755e-06, "loss": 0.172515869140625, "step": 152715 }, { "epoch": 1.320524682017449, "grad_norm": 18.7240596657642, "learning_rate": 1.5525727954781663e-06, "loss": 0.07178678512573242, "step": 152720 }, { "epoch": 1.3205679155389922, "grad_norm": 42.31368668603859, "learning_rate": 1.5523943470298627e-06, "loss": 0.1260395050048828, "step": 152725 }, { "epoch": 1.3206111490605354, "grad_norm": 12.776718907471729, "learning_rate": 1.5522159052577891e-06, "loss": 0.022127342224121094, "step": 152730 }, { "epoch": 1.320654382582079, "grad_norm": 19.726320899942237, "learning_rate": 1.5520374701627668e-06, "loss": 0.07715377807617188, "step": 152735 }, { "epoch": 1.3206976161036221, "grad_norm": 3.458506199969005, "learning_rate": 1.5518590417456216e-06, "loss": 0.02572479248046875, "step": 152740 }, { "epoch": 1.3207408496251654, "grad_norm": 4.690507927001124, "learning_rate": 1.5516806200071738e-06, "loss": 0.10495071411132813, "step": 152745 }, { "epoch": 1.3207840831467086, "grad_norm": 0.816517872116034, "learning_rate": 1.551502204948247e-06, "loss": 0.01616363525390625, "step": 152750 }, { "epoch": 1.3208273166682518, "grad_norm": 1.0044859723882922, "learning_rate": 1.5513237965696642e-06, "loss": 0.05395793914794922, "step": 152755 }, { "epoch": 1.3208705501897953, "grad_norm": 0.41872528739294784, "learning_rate": 1.551145394872247e-06, "loss": 0.021931838989257813, "step": 152760 }, { "epoch": 1.3209137837113385, "grad_norm": 1.5178213072558617, "learning_rate": 1.55096699985682e-06, "loss": 0.02534027099609375, "step": 152765 }, { "epoch": 1.3209570172328817, "grad_norm": 0.8610376843990324, "learning_rate": 1.5507886115242053e-06, "loss": 0.0254180908203125, "step": 152770 }, { "epoch": 1.321000250754425, "grad_norm": 1.9998513781772405, "learning_rate": 1.5506102298752258e-06, "loss": 0.02740325927734375, "step": 152775 }, { "epoch": 1.3210434842759682, "grad_norm": 20.347161014908725, "learning_rate": 1.5504318549107037e-06, "loss": 0.049028778076171876, "step": 152780 }, { "epoch": 1.3210867177975114, "grad_norm": 9.614329649285212, "learning_rate": 1.5502534866314604e-06, "loss": 0.11060829162597656, "step": 152785 }, { "epoch": 1.3211299513190546, "grad_norm": 0.8424321901508295, "learning_rate": 1.5500751250383213e-06, "loss": 0.009237289428710938, "step": 152790 }, { "epoch": 1.3211731848405979, "grad_norm": 26.088102946992095, "learning_rate": 1.5498967701321061e-06, "loss": 0.18087501525878907, "step": 152795 }, { "epoch": 1.3212164183621413, "grad_norm": 0.5886695510105039, "learning_rate": 1.5497184219136404e-06, "loss": 0.019448280334472656, "step": 152800 }, { "epoch": 1.3212596518836845, "grad_norm": 25.17663289439202, "learning_rate": 1.5495400803837452e-06, "loss": 0.03967971801757812, "step": 152805 }, { "epoch": 1.3213028854052278, "grad_norm": 1.3182017839699314, "learning_rate": 1.5493617455432429e-06, "loss": 0.15793266296386718, "step": 152810 }, { "epoch": 1.321346118926771, "grad_norm": 1.623430986450361, "learning_rate": 1.5491834173929557e-06, "loss": 0.0534423828125, "step": 152815 }, { "epoch": 1.3213893524483145, "grad_norm": 0.6379952592309583, "learning_rate": 1.549005095933707e-06, "loss": 0.18452835083007812, "step": 152820 }, { "epoch": 1.3214325859698577, "grad_norm": 8.659305206898155, "learning_rate": 1.5488267811663172e-06, "loss": 0.03643989562988281, "step": 152825 }, { "epoch": 1.321475819491401, "grad_norm": 2.748064340885023, "learning_rate": 1.5486484730916113e-06, "loss": 0.1172454833984375, "step": 152830 }, { "epoch": 1.3215190530129441, "grad_norm": 0.5370131733386214, "learning_rate": 1.5484701717104101e-06, "loss": 0.05399169921875, "step": 152835 }, { "epoch": 1.3215622865344874, "grad_norm": 5.6814756847282, "learning_rate": 1.5482918770235352e-06, "loss": 0.023886871337890626, "step": 152840 }, { "epoch": 1.3216055200560306, "grad_norm": 1.485901640179087, "learning_rate": 1.5481135890318109e-06, "loss": 0.09782943725585938, "step": 152845 }, { "epoch": 1.3216487535775738, "grad_norm": 1.5123065263181918, "learning_rate": 1.5479353077360586e-06, "loss": 0.02829456329345703, "step": 152850 }, { "epoch": 1.321691987099117, "grad_norm": 0.15666009400041137, "learning_rate": 1.5477570331370992e-06, "loss": 0.04832115173339844, "step": 152855 }, { "epoch": 1.3217352206206605, "grad_norm": 8.374335903327943, "learning_rate": 1.5475787652357572e-06, "loss": 0.07229347229003906, "step": 152860 }, { "epoch": 1.3217784541422037, "grad_norm": 0.22312270805309078, "learning_rate": 1.5474005040328536e-06, "loss": 0.05551300048828125, "step": 152865 }, { "epoch": 1.321821687663747, "grad_norm": 22.335782492910557, "learning_rate": 1.5472222495292106e-06, "loss": 0.1848724365234375, "step": 152870 }, { "epoch": 1.3218649211852902, "grad_norm": 2.651571514180417, "learning_rate": 1.54704400172565e-06, "loss": 0.10564498901367188, "step": 152875 }, { "epoch": 1.3219081547068334, "grad_norm": 14.484003272415237, "learning_rate": 1.5468657606229943e-06, "loss": 0.08565826416015625, "step": 152880 }, { "epoch": 1.3219513882283769, "grad_norm": 3.3330304908117205, "learning_rate": 1.546687526222065e-06, "loss": 0.024423599243164062, "step": 152885 }, { "epoch": 1.32199462174992, "grad_norm": 1.3525206352884191, "learning_rate": 1.5465092985236832e-06, "loss": 0.018304824829101562, "step": 152890 }, { "epoch": 1.3220378552714633, "grad_norm": 7.271690496320895, "learning_rate": 1.5463310775286738e-06, "loss": 0.05911750793457031, "step": 152895 }, { "epoch": 1.3220810887930066, "grad_norm": 7.197564286359323, "learning_rate": 1.5461528632378554e-06, "loss": 0.13899078369140624, "step": 152900 }, { "epoch": 1.3221243223145498, "grad_norm": 39.2836675333205, "learning_rate": 1.545974655652053e-06, "loss": 0.13672027587890626, "step": 152905 }, { "epoch": 1.322167555836093, "grad_norm": 4.355086430418073, "learning_rate": 1.545796454772087e-06, "loss": 0.026558303833007814, "step": 152910 }, { "epoch": 1.3222107893576363, "grad_norm": 1.1396017797153997, "learning_rate": 1.5456182605987788e-06, "loss": 0.030021286010742186, "step": 152915 }, { "epoch": 1.3222540228791795, "grad_norm": 1.4915653605209658, "learning_rate": 1.54544007313295e-06, "loss": 0.07476654052734374, "step": 152920 }, { "epoch": 1.322297256400723, "grad_norm": 0.7350245062073909, "learning_rate": 1.5452618923754237e-06, "loss": 0.027513504028320312, "step": 152925 }, { "epoch": 1.3223404899222662, "grad_norm": 0.34595980258757497, "learning_rate": 1.545083718327022e-06, "loss": 0.15535850524902345, "step": 152930 }, { "epoch": 1.3223837234438094, "grad_norm": 32.7359178917641, "learning_rate": 1.5449055509885646e-06, "loss": 0.13914260864257813, "step": 152935 }, { "epoch": 1.3224269569653526, "grad_norm": 12.510661685306513, "learning_rate": 1.5447273903608744e-06, "loss": 0.08705024719238282, "step": 152940 }, { "epoch": 1.3224701904868958, "grad_norm": 29.127670983426974, "learning_rate": 1.5445492364447735e-06, "loss": 0.3252056121826172, "step": 152945 }, { "epoch": 1.3225134240084393, "grad_norm": 13.903881039967882, "learning_rate": 1.5443710892410813e-06, "loss": 0.08038864135742188, "step": 152950 }, { "epoch": 1.3225566575299825, "grad_norm": 0.18179550813428183, "learning_rate": 1.5441929487506222e-06, "loss": 0.018462371826171876, "step": 152955 }, { "epoch": 1.3225998910515258, "grad_norm": 0.8481174317949128, "learning_rate": 1.5440148149742153e-06, "loss": 0.13921852111816407, "step": 152960 }, { "epoch": 1.322643124573069, "grad_norm": 0.22055901530419333, "learning_rate": 1.5438366879126851e-06, "loss": 0.14294157028198243, "step": 152965 }, { "epoch": 1.3226863580946122, "grad_norm": 0.24673213032397176, "learning_rate": 1.5436585675668508e-06, "loss": 0.03125, "step": 152970 }, { "epoch": 1.3227295916161554, "grad_norm": 0.5662168449024917, "learning_rate": 1.5434804539375347e-06, "loss": 0.022694778442382813, "step": 152975 }, { "epoch": 1.3227728251376987, "grad_norm": 1.451957896481104, "learning_rate": 1.5433023470255582e-06, "loss": 0.18695831298828125, "step": 152980 }, { "epoch": 1.322816058659242, "grad_norm": 2.117063876811621, "learning_rate": 1.543124246831741e-06, "loss": 0.014298534393310547, "step": 152985 }, { "epoch": 1.3228592921807854, "grad_norm": 3.488882525663137, "learning_rate": 1.5429461533569076e-06, "loss": 0.05434837341308594, "step": 152990 }, { "epoch": 1.3229025257023286, "grad_norm": 28.286356899491818, "learning_rate": 1.5427680666018778e-06, "loss": 0.13121414184570312, "step": 152995 }, { "epoch": 1.3229457592238718, "grad_norm": 1.4579446417043842, "learning_rate": 1.5425899865674728e-06, "loss": 0.09334945678710938, "step": 153000 }, { "epoch": 1.322988992745415, "grad_norm": 19.518064906705664, "learning_rate": 1.5424119132545143e-06, "loss": 0.06629486083984375, "step": 153005 }, { "epoch": 1.3230322262669583, "grad_norm": 14.715020977967823, "learning_rate": 1.5422338466638218e-06, "loss": 0.27045440673828125, "step": 153010 }, { "epoch": 1.3230754597885017, "grad_norm": 0.16274336627335775, "learning_rate": 1.542055786796218e-06, "loss": 0.032085418701171875, "step": 153015 }, { "epoch": 1.323118693310045, "grad_norm": 2.2495006417320726, "learning_rate": 1.541877733652525e-06, "loss": 0.033912277221679686, "step": 153020 }, { "epoch": 1.3231619268315882, "grad_norm": 0.45794069038679797, "learning_rate": 1.5416996872335634e-06, "loss": 0.0062652587890625, "step": 153025 }, { "epoch": 1.3232051603531314, "grad_norm": 0.5096196822762497, "learning_rate": 1.5415216475401538e-06, "loss": 0.07885208129882812, "step": 153030 }, { "epoch": 1.3232483938746746, "grad_norm": 4.4458340489542705, "learning_rate": 1.5413436145731175e-06, "loss": 0.031444549560546875, "step": 153035 }, { "epoch": 1.3232916273962179, "grad_norm": 6.137219577375443, "learning_rate": 1.5411655883332758e-06, "loss": 0.0624847412109375, "step": 153040 }, { "epoch": 1.323334860917761, "grad_norm": 0.30140640323226264, "learning_rate": 1.5409875688214493e-06, "loss": 0.0367401123046875, "step": 153045 }, { "epoch": 1.3233780944393045, "grad_norm": 0.36568956952366427, "learning_rate": 1.5408095560384581e-06, "loss": 0.01506805419921875, "step": 153050 }, { "epoch": 1.3234213279608478, "grad_norm": 0.14548234781946986, "learning_rate": 1.5406315499851253e-06, "loss": 0.0988250732421875, "step": 153055 }, { "epoch": 1.323464561482391, "grad_norm": 11.304638235544822, "learning_rate": 1.540453550662271e-06, "loss": 0.045781707763671874, "step": 153060 }, { "epoch": 1.3235077950039342, "grad_norm": 5.295755284350345, "learning_rate": 1.5402755580707151e-06, "loss": 0.233929443359375, "step": 153065 }, { "epoch": 1.3235510285254775, "grad_norm": 9.176020086018966, "learning_rate": 1.5400975722112798e-06, "loss": 0.07273101806640625, "step": 153070 }, { "epoch": 1.323594262047021, "grad_norm": 4.573018353241741, "learning_rate": 1.5399195930847862e-06, "loss": 0.08069629669189453, "step": 153075 }, { "epoch": 1.3236374955685641, "grad_norm": 7.363465975300969, "learning_rate": 1.5397416206920529e-06, "loss": 0.028348541259765624, "step": 153080 }, { "epoch": 1.3236807290901074, "grad_norm": 2.573115736571992, "learning_rate": 1.5395636550339034e-06, "loss": 0.0067535400390625, "step": 153085 }, { "epoch": 1.3237239626116506, "grad_norm": 16.674300357312884, "learning_rate": 1.539385696111157e-06, "loss": 0.04853038787841797, "step": 153090 }, { "epoch": 1.3237671961331938, "grad_norm": 0.9810100196806669, "learning_rate": 1.5392077439246352e-06, "loss": 0.02301311492919922, "step": 153095 }, { "epoch": 1.323810429654737, "grad_norm": 1.9172340847576805, "learning_rate": 1.5390297984751578e-06, "loss": 0.0102508544921875, "step": 153100 }, { "epoch": 1.3238536631762803, "grad_norm": 6.285244325431569, "learning_rate": 1.5388518597635462e-06, "loss": 0.08039875030517578, "step": 153105 }, { "epoch": 1.3238968966978235, "grad_norm": 2.160149511954359, "learning_rate": 1.5386739277906197e-06, "loss": 0.0441192626953125, "step": 153110 }, { "epoch": 1.323940130219367, "grad_norm": 5.403740293664527, "learning_rate": 1.5384960025572006e-06, "loss": 0.02183361053466797, "step": 153115 }, { "epoch": 1.3239833637409102, "grad_norm": 0.8288692832356134, "learning_rate": 1.538318084064109e-06, "loss": 0.07773590087890625, "step": 153120 }, { "epoch": 1.3240265972624534, "grad_norm": 17.538696482193725, "learning_rate": 1.538140172312164e-06, "loss": 0.06129875183105469, "step": 153125 }, { "epoch": 1.3240698307839966, "grad_norm": 4.089154952494509, "learning_rate": 1.537962267302189e-06, "loss": 0.03570404052734375, "step": 153130 }, { "epoch": 1.3241130643055399, "grad_norm": 3.6115517734462195, "learning_rate": 1.5377843690350022e-06, "loss": 0.10149154663085938, "step": 153135 }, { "epoch": 1.3241562978270833, "grad_norm": 7.517766248921559, "learning_rate": 1.5376064775114251e-06, "loss": 0.1089385986328125, "step": 153140 }, { "epoch": 1.3241995313486266, "grad_norm": 0.18351141300711662, "learning_rate": 1.5374285927322761e-06, "loss": 0.037348175048828126, "step": 153145 }, { "epoch": 1.3242427648701698, "grad_norm": 1.38302815982523, "learning_rate": 1.5372507146983787e-06, "loss": 0.03813877105712891, "step": 153150 }, { "epoch": 1.324285998391713, "grad_norm": 1.2361137796495378, "learning_rate": 1.5370728434105517e-06, "loss": 0.17302360534667968, "step": 153155 }, { "epoch": 1.3243292319132562, "grad_norm": 0.16743826044613203, "learning_rate": 1.5368949788696155e-06, "loss": 0.05686235427856445, "step": 153160 }, { "epoch": 1.3243724654347995, "grad_norm": 1.972208121230099, "learning_rate": 1.5367171210763903e-06, "loss": 0.029798507690429688, "step": 153165 }, { "epoch": 1.3244156989563427, "grad_norm": 3.3217286572244094, "learning_rate": 1.5365392700316966e-06, "loss": 0.015826416015625, "step": 153170 }, { "epoch": 1.324458932477886, "grad_norm": 1.3687974747960847, "learning_rate": 1.536361425736353e-06, "loss": 0.0534637451171875, "step": 153175 }, { "epoch": 1.3245021659994294, "grad_norm": 1.4617057599454548, "learning_rate": 1.5361835881911827e-06, "loss": 0.2300537109375, "step": 153180 }, { "epoch": 1.3245453995209726, "grad_norm": 0.22551090837420967, "learning_rate": 1.5360057573970029e-06, "loss": 0.3300056457519531, "step": 153185 }, { "epoch": 1.3245886330425158, "grad_norm": 11.557006904805561, "learning_rate": 1.5358279333546364e-06, "loss": 0.11405715942382813, "step": 153190 }, { "epoch": 1.324631866564059, "grad_norm": 0.9827091247342106, "learning_rate": 1.535650116064902e-06, "loss": 0.21887741088867188, "step": 153195 }, { "epoch": 1.3246751000856023, "grad_norm": 30.389952660537496, "learning_rate": 1.5354723055286203e-06, "loss": 0.096234130859375, "step": 153200 }, { "epoch": 1.3247183336071457, "grad_norm": 4.614922926784919, "learning_rate": 1.5352945017466106e-06, "loss": 0.06632919311523437, "step": 153205 }, { "epoch": 1.324761567128689, "grad_norm": 3.1884692068754754, "learning_rate": 1.5351167047196923e-06, "loss": 0.0542205810546875, "step": 153210 }, { "epoch": 1.3248048006502322, "grad_norm": 57.67636039069822, "learning_rate": 1.534938914448687e-06, "loss": 0.16937179565429689, "step": 153215 }, { "epoch": 1.3248480341717754, "grad_norm": 5.485030522124122, "learning_rate": 1.5347611309344145e-06, "loss": 0.10878925323486328, "step": 153220 }, { "epoch": 1.3248912676933187, "grad_norm": 2.053651250055462, "learning_rate": 1.5345833541776936e-06, "loss": 0.030457305908203124, "step": 153225 }, { "epoch": 1.324934501214862, "grad_norm": 6.011863080385255, "learning_rate": 1.5344055841793442e-06, "loss": 0.05770721435546875, "step": 153230 }, { "epoch": 1.3249777347364051, "grad_norm": 2.912427412691498, "learning_rate": 1.534227820940188e-06, "loss": 0.14876861572265626, "step": 153235 }, { "epoch": 1.3250209682579484, "grad_norm": 0.24081186321071033, "learning_rate": 1.5340500644610417e-06, "loss": 0.0146636962890625, "step": 153240 }, { "epoch": 1.3250642017794918, "grad_norm": 13.594390026357564, "learning_rate": 1.5338723147427287e-06, "loss": 0.3975128173828125, "step": 153245 }, { "epoch": 1.325107435301035, "grad_norm": 12.619052594197392, "learning_rate": 1.5336945717860666e-06, "loss": 0.04400348663330078, "step": 153250 }, { "epoch": 1.3251506688225783, "grad_norm": 0.9806540195847236, "learning_rate": 1.5335168355918763e-06, "loss": 0.026232147216796876, "step": 153255 }, { "epoch": 1.3251939023441215, "grad_norm": 57.93723776501511, "learning_rate": 1.5333391061609763e-06, "loss": 0.5218307495117187, "step": 153260 }, { "epoch": 1.325237135865665, "grad_norm": 1.6836553144643398, "learning_rate": 1.5331613834941867e-06, "loss": 0.09278068542480469, "step": 153265 }, { "epoch": 1.3252803693872082, "grad_norm": 6.4273440701404585, "learning_rate": 1.532983667592326e-06, "loss": 0.02205047607421875, "step": 153270 }, { "epoch": 1.3253236029087514, "grad_norm": 47.83962789171917, "learning_rate": 1.5328059584562162e-06, "loss": 0.154656982421875, "step": 153275 }, { "epoch": 1.3253668364302946, "grad_norm": 20.892617754376968, "learning_rate": 1.5326282560866756e-06, "loss": 0.055496597290039064, "step": 153280 }, { "epoch": 1.3254100699518379, "grad_norm": 0.3951091938315099, "learning_rate": 1.5324505604845242e-06, "loss": 0.025465774536132812, "step": 153285 }, { "epoch": 1.325453303473381, "grad_norm": 1.3370262272781004, "learning_rate": 1.5322728716505796e-06, "loss": 0.01973247528076172, "step": 153290 }, { "epoch": 1.3254965369949243, "grad_norm": 1.968406252894912, "learning_rate": 1.5320951895856643e-06, "loss": 0.028011322021484375, "step": 153295 }, { "epoch": 1.3255397705164675, "grad_norm": 25.498773779640977, "learning_rate": 1.5319175142905957e-06, "loss": 0.06065511703491211, "step": 153300 }, { "epoch": 1.325583004038011, "grad_norm": 1.095911912501784, "learning_rate": 1.531739845766193e-06, "loss": 0.0848114013671875, "step": 153305 }, { "epoch": 1.3256262375595542, "grad_norm": 16.094229387822466, "learning_rate": 1.5315621840132778e-06, "loss": 0.054149627685546875, "step": 153310 }, { "epoch": 1.3256694710810975, "grad_norm": 0.2007486418982078, "learning_rate": 1.5313845290326681e-06, "loss": 0.1242898941040039, "step": 153315 }, { "epoch": 1.3257127046026407, "grad_norm": 1.6038284645682452, "learning_rate": 1.531206880825183e-06, "loss": 0.05048370361328125, "step": 153320 }, { "epoch": 1.325755938124184, "grad_norm": 7.501905288571798, "learning_rate": 1.5310292393916415e-06, "loss": 0.05077362060546875, "step": 153325 }, { "epoch": 1.3257991716457274, "grad_norm": 1.056075666759724, "learning_rate": 1.530851604732864e-06, "loss": 0.07628173828125, "step": 153330 }, { "epoch": 1.3258424051672706, "grad_norm": 0.7825355261885897, "learning_rate": 1.5306739768496673e-06, "loss": 0.027968978881835936, "step": 153335 }, { "epoch": 1.3258856386888138, "grad_norm": 28.574634124189533, "learning_rate": 1.530496355742874e-06, "loss": 0.22996578216552735, "step": 153340 }, { "epoch": 1.325928872210357, "grad_norm": 1.1720529234371675, "learning_rate": 1.530318741413302e-06, "loss": 0.052520370483398436, "step": 153345 }, { "epoch": 1.3259721057319003, "grad_norm": 2.4187582134145442, "learning_rate": 1.5301411338617681e-06, "loss": 0.01807861328125, "step": 153350 }, { "epoch": 1.3260153392534435, "grad_norm": 1.445717039825211, "learning_rate": 1.529963533089095e-06, "loss": 0.1340362548828125, "step": 153355 }, { "epoch": 1.3260585727749867, "grad_norm": 0.9681509977442302, "learning_rate": 1.5297859390961002e-06, "loss": 0.031304931640625, "step": 153360 }, { "epoch": 1.32610180629653, "grad_norm": 6.551861097201005, "learning_rate": 1.5296083518836015e-06, "loss": 0.030214309692382812, "step": 153365 }, { "epoch": 1.3261450398180734, "grad_norm": 28.095236288424047, "learning_rate": 1.5294307714524207e-06, "loss": 0.22952861785888673, "step": 153370 }, { "epoch": 1.3261882733396166, "grad_norm": 2.9574175302665258, "learning_rate": 1.5292531978033746e-06, "loss": 0.07210540771484375, "step": 153375 }, { "epoch": 1.3262315068611599, "grad_norm": 5.4819845648477905, "learning_rate": 1.529075630937283e-06, "loss": 0.083941650390625, "step": 153380 }, { "epoch": 1.326274740382703, "grad_norm": 1.5738267909231796, "learning_rate": 1.528898070854965e-06, "loss": 0.05930023193359375, "step": 153385 }, { "epoch": 1.3263179739042463, "grad_norm": 6.774142786286407, "learning_rate": 1.5287205175572387e-06, "loss": 0.06987724304199219, "step": 153390 }, { "epoch": 1.3263612074257898, "grad_norm": 18.588402166530987, "learning_rate": 1.5285429710449239e-06, "loss": 0.10036487579345703, "step": 153395 }, { "epoch": 1.326404440947333, "grad_norm": 1.1118110074804892, "learning_rate": 1.5283654313188373e-06, "loss": 0.0341949462890625, "step": 153400 }, { "epoch": 1.3264476744688762, "grad_norm": 5.363545572994041, "learning_rate": 1.5281878983798002e-06, "loss": 0.0204193115234375, "step": 153405 }, { "epoch": 1.3264909079904195, "grad_norm": 1.9470680008381154, "learning_rate": 1.5280103722286296e-06, "loss": 0.029816055297851564, "step": 153410 }, { "epoch": 1.3265341415119627, "grad_norm": 8.230286807478052, "learning_rate": 1.5278328528661464e-06, "loss": 0.115362548828125, "step": 153415 }, { "epoch": 1.326577375033506, "grad_norm": 0.720797568579249, "learning_rate": 1.5276553402931676e-06, "loss": 0.02724876403808594, "step": 153420 }, { "epoch": 1.3266206085550492, "grad_norm": 0.4245605559152116, "learning_rate": 1.5274778345105127e-06, "loss": 0.0225982666015625, "step": 153425 }, { "epoch": 1.3266638420765924, "grad_norm": 6.563061137866838, "learning_rate": 1.527300335518999e-06, "loss": 0.058250808715820314, "step": 153430 }, { "epoch": 1.3267070755981358, "grad_norm": 2.963868016169811, "learning_rate": 1.5271228433194463e-06, "loss": 0.03507919311523437, "step": 153435 }, { "epoch": 1.326750309119679, "grad_norm": 0.7014101857564657, "learning_rate": 1.5269453579126736e-06, "loss": 0.08049545288085938, "step": 153440 }, { "epoch": 1.3267935426412223, "grad_norm": 0.8069195613865667, "learning_rate": 1.5267678792994983e-06, "loss": 0.1055877685546875, "step": 153445 }, { "epoch": 1.3268367761627655, "grad_norm": 6.196111230598891, "learning_rate": 1.5265904074807394e-06, "loss": 0.0439361572265625, "step": 153450 }, { "epoch": 1.3268800096843087, "grad_norm": 8.636455632073174, "learning_rate": 1.5264129424572147e-06, "loss": 0.1429075241088867, "step": 153455 }, { "epoch": 1.3269232432058522, "grad_norm": 1.195473829326737, "learning_rate": 1.5262354842297444e-06, "loss": 0.01696434020996094, "step": 153460 }, { "epoch": 1.3269664767273954, "grad_norm": 0.2230754863153913, "learning_rate": 1.526058032799144e-06, "loss": 0.04346771240234375, "step": 153465 }, { "epoch": 1.3270097102489387, "grad_norm": 1.049228141399678, "learning_rate": 1.5258805881662355e-06, "loss": 0.03542041778564453, "step": 153470 }, { "epoch": 1.3270529437704819, "grad_norm": 17.23571411939535, "learning_rate": 1.5257031503318354e-06, "loss": 0.200177001953125, "step": 153475 }, { "epoch": 1.3270961772920251, "grad_norm": 3.3051088581011947, "learning_rate": 1.5255257192967614e-06, "loss": 0.02825775146484375, "step": 153480 }, { "epoch": 1.3271394108135683, "grad_norm": 2.4790203256447536, "learning_rate": 1.5253482950618332e-06, "loss": 0.08621902465820312, "step": 153485 }, { "epoch": 1.3271826443351116, "grad_norm": 14.85966927991959, "learning_rate": 1.5251708776278682e-06, "loss": 0.12917022705078124, "step": 153490 }, { "epoch": 1.3272258778566548, "grad_norm": 0.9220064213575022, "learning_rate": 1.5249934669956837e-06, "loss": 0.09240646362304687, "step": 153495 }, { "epoch": 1.3272691113781983, "grad_norm": 3.4888168783280133, "learning_rate": 1.5248160631661003e-06, "loss": 0.034136962890625, "step": 153500 }, { "epoch": 1.3273123448997415, "grad_norm": 7.2352908973487065, "learning_rate": 1.5246386661399346e-06, "loss": 0.02341766357421875, "step": 153505 }, { "epoch": 1.3273555784212847, "grad_norm": 0.6359369092306513, "learning_rate": 1.524461275918005e-06, "loss": 0.009459686279296876, "step": 153510 }, { "epoch": 1.327398811942828, "grad_norm": 3.7100364507031403, "learning_rate": 1.5242838925011286e-06, "loss": 0.06058006286621094, "step": 153515 }, { "epoch": 1.3274420454643714, "grad_norm": 4.572621189950719, "learning_rate": 1.5241065158901255e-06, "loss": 0.0480560302734375, "step": 153520 }, { "epoch": 1.3274852789859146, "grad_norm": 32.24160237423337, "learning_rate": 1.5239291460858116e-06, "loss": 0.18934135437011718, "step": 153525 }, { "epoch": 1.3275285125074578, "grad_norm": 0.20816630795181737, "learning_rate": 1.523751783089007e-06, "loss": 0.020804595947265626, "step": 153530 }, { "epoch": 1.327571746029001, "grad_norm": 1.2318592962113515, "learning_rate": 1.5235744269005293e-06, "loss": 0.02618064880371094, "step": 153535 }, { "epoch": 1.3276149795505443, "grad_norm": 2.7003106942761357, "learning_rate": 1.5233970775211954e-06, "loss": 0.030568313598632813, "step": 153540 }, { "epoch": 1.3276582130720875, "grad_norm": 4.096274043283599, "learning_rate": 1.5232197349518241e-06, "loss": 0.08293533325195312, "step": 153545 }, { "epoch": 1.3277014465936308, "grad_norm": 0.8761410051242822, "learning_rate": 1.5230423991932326e-06, "loss": 0.04794464111328125, "step": 153550 }, { "epoch": 1.327744680115174, "grad_norm": 0.23425574566302115, "learning_rate": 1.522865070246239e-06, "loss": 0.130523681640625, "step": 153555 }, { "epoch": 1.3277879136367174, "grad_norm": 16.749235227308194, "learning_rate": 1.5226877481116604e-06, "loss": 0.037921142578125, "step": 153560 }, { "epoch": 1.3278311471582607, "grad_norm": 1.0148528038522133, "learning_rate": 1.5225104327903158e-06, "loss": 0.02272186279296875, "step": 153565 }, { "epoch": 1.327874380679804, "grad_norm": 1.9886400808044506, "learning_rate": 1.5223331242830235e-06, "loss": 0.07417678833007812, "step": 153570 }, { "epoch": 1.3279176142013471, "grad_norm": 2.130171043938672, "learning_rate": 1.5221558225905982e-06, "loss": 0.047010040283203124, "step": 153575 }, { "epoch": 1.3279608477228904, "grad_norm": 11.32492172896484, "learning_rate": 1.521978527713861e-06, "loss": 0.064044189453125, "step": 153580 }, { "epoch": 1.3280040812444338, "grad_norm": 6.991060064988096, "learning_rate": 1.5218012396536288e-06, "loss": 0.10847702026367187, "step": 153585 }, { "epoch": 1.328047314765977, "grad_norm": 5.355350346428732, "learning_rate": 1.521623958410717e-06, "loss": 0.037396240234375, "step": 153590 }, { "epoch": 1.3280905482875203, "grad_norm": 16.009732227799763, "learning_rate": 1.5214466839859463e-06, "loss": 0.10503692626953125, "step": 153595 }, { "epoch": 1.3281337818090635, "grad_norm": 31.12832537442552, "learning_rate": 1.5212694163801327e-06, "loss": 0.18903961181640624, "step": 153600 }, { "epoch": 1.3281770153306067, "grad_norm": 3.247290953500932, "learning_rate": 1.5210921555940943e-06, "loss": 0.009611892700195312, "step": 153605 }, { "epoch": 1.32822024885215, "grad_norm": 0.3298317129056736, "learning_rate": 1.5209149016286474e-06, "loss": 0.039813995361328125, "step": 153610 }, { "epoch": 1.3282634823736932, "grad_norm": 0.19968106202028782, "learning_rate": 1.5207376544846107e-06, "loss": 0.046387100219726564, "step": 153615 }, { "epoch": 1.3283067158952364, "grad_norm": 2.6480379233468336, "learning_rate": 1.5205604141628002e-06, "loss": 0.018910598754882813, "step": 153620 }, { "epoch": 1.3283499494167799, "grad_norm": 79.53562897854503, "learning_rate": 1.5203831806640351e-06, "loss": 0.037212371826171875, "step": 153625 }, { "epoch": 1.328393182938323, "grad_norm": 0.07272385984854203, "learning_rate": 1.5202059539891316e-06, "loss": 0.00980377197265625, "step": 153630 }, { "epoch": 1.3284364164598663, "grad_norm": 46.661399833299555, "learning_rate": 1.520028734138908e-06, "loss": 0.12668533325195314, "step": 153635 }, { "epoch": 1.3284796499814095, "grad_norm": 0.4222329614707868, "learning_rate": 1.5198515211141814e-06, "loss": 0.11612281799316407, "step": 153640 }, { "epoch": 1.3285228835029528, "grad_norm": 4.469101864671914, "learning_rate": 1.5196743149157684e-06, "loss": 0.046377944946289065, "step": 153645 }, { "epoch": 1.3285661170244962, "grad_norm": 12.180815610065574, "learning_rate": 1.5194971155444872e-06, "loss": 0.06791610717773437, "step": 153650 }, { "epoch": 1.3286093505460395, "grad_norm": 3.949469399882159, "learning_rate": 1.5193199230011528e-06, "loss": 0.05848236083984375, "step": 153655 }, { "epoch": 1.3286525840675827, "grad_norm": 4.401521553122873, "learning_rate": 1.5191427372865857e-06, "loss": 0.045654296875, "step": 153660 }, { "epoch": 1.328695817589126, "grad_norm": 15.156419182540054, "learning_rate": 1.5189655584016014e-06, "loss": 0.05524024963378906, "step": 153665 }, { "epoch": 1.3287390511106691, "grad_norm": 1.3355569439580395, "learning_rate": 1.5187883863470167e-06, "loss": 0.035532379150390626, "step": 153670 }, { "epoch": 1.3287822846322124, "grad_norm": 3.6605548780767294, "learning_rate": 1.5186112211236496e-06, "loss": 0.09083976745605468, "step": 153675 }, { "epoch": 1.3288255181537556, "grad_norm": 16.951345777042675, "learning_rate": 1.5184340627323149e-06, "loss": 0.05099411010742187, "step": 153680 }, { "epoch": 1.3288687516752988, "grad_norm": 47.26133163928967, "learning_rate": 1.518256911173833e-06, "loss": 0.31214752197265627, "step": 153685 }, { "epoch": 1.3289119851968423, "grad_norm": 0.8981008899714035, "learning_rate": 1.518079766449018e-06, "loss": 0.036038780212402345, "step": 153690 }, { "epoch": 1.3289552187183855, "grad_norm": 1.0691157772951698, "learning_rate": 1.5179026285586895e-06, "loss": 0.02906646728515625, "step": 153695 }, { "epoch": 1.3289984522399287, "grad_norm": 6.913271179567352, "learning_rate": 1.5177254975036625e-06, "loss": 0.023842239379882814, "step": 153700 }, { "epoch": 1.329041685761472, "grad_norm": 1.187168203750374, "learning_rate": 1.5175483732847552e-06, "loss": 0.04741668701171875, "step": 153705 }, { "epoch": 1.3290849192830152, "grad_norm": 12.23603004385438, "learning_rate": 1.5173712559027837e-06, "loss": 0.19625930786132811, "step": 153710 }, { "epoch": 1.3291281528045586, "grad_norm": 2.170407606275472, "learning_rate": 1.5171941453585635e-06, "loss": 0.20490188598632814, "step": 153715 }, { "epoch": 1.3291713863261019, "grad_norm": 0.683634248111826, "learning_rate": 1.5170170416529143e-06, "loss": 0.010210800170898437, "step": 153720 }, { "epoch": 1.329214619847645, "grad_norm": 17.720284001776722, "learning_rate": 1.5168399447866512e-06, "loss": 0.06984405517578125, "step": 153725 }, { "epoch": 1.3292578533691883, "grad_norm": 2.875222372982151, "learning_rate": 1.5166628547605915e-06, "loss": 0.0588623046875, "step": 153730 }, { "epoch": 1.3293010868907316, "grad_norm": 2.5233977125479092, "learning_rate": 1.5164857715755516e-06, "loss": 0.02636260986328125, "step": 153735 }, { "epoch": 1.3293443204122748, "grad_norm": 0.9957966748835838, "learning_rate": 1.5163086952323472e-06, "loss": 0.06948013305664062, "step": 153740 }, { "epoch": 1.329387553933818, "grad_norm": 26.793717618918816, "learning_rate": 1.516131625731797e-06, "loss": 0.09941940307617188, "step": 153745 }, { "epoch": 1.3294307874553613, "grad_norm": 27.55725835113865, "learning_rate": 1.5159545630747153e-06, "loss": 0.133551025390625, "step": 153750 }, { "epoch": 1.3294740209769047, "grad_norm": 13.507104923551971, "learning_rate": 1.5157775072619212e-06, "loss": 0.20963172912597655, "step": 153755 }, { "epoch": 1.329517254498448, "grad_norm": 0.3050481769419768, "learning_rate": 1.5156004582942301e-06, "loss": 0.036998748779296875, "step": 153760 }, { "epoch": 1.3295604880199912, "grad_norm": 0.22490126076273967, "learning_rate": 1.5154234161724588e-06, "loss": 0.0374664306640625, "step": 153765 }, { "epoch": 1.3296037215415344, "grad_norm": 1.806783846615591, "learning_rate": 1.5152463808974234e-06, "loss": 0.04953079223632813, "step": 153770 }, { "epoch": 1.3296469550630778, "grad_norm": 0.14446033251875706, "learning_rate": 1.5150693524699406e-06, "loss": 0.10175018310546875, "step": 153775 }, { "epoch": 1.329690188584621, "grad_norm": 1.1078602235412243, "learning_rate": 1.5148923308908254e-06, "loss": 0.07197723388671876, "step": 153780 }, { "epoch": 1.3297334221061643, "grad_norm": 6.4204799928281995, "learning_rate": 1.5147153161608966e-06, "loss": 0.06644821166992188, "step": 153785 }, { "epoch": 1.3297766556277075, "grad_norm": 0.22411048037188563, "learning_rate": 1.5145383082809698e-06, "loss": 0.038361740112304685, "step": 153790 }, { "epoch": 1.3298198891492508, "grad_norm": 9.97964439801211, "learning_rate": 1.514361307251861e-06, "loss": 0.13999290466308595, "step": 153795 }, { "epoch": 1.329863122670794, "grad_norm": 10.245349347444824, "learning_rate": 1.5141843130743854e-06, "loss": 0.020547103881835938, "step": 153800 }, { "epoch": 1.3299063561923372, "grad_norm": 18.00389759118056, "learning_rate": 1.5140073257493614e-06, "loss": 0.186724853515625, "step": 153805 }, { "epoch": 1.3299495897138804, "grad_norm": 2.078876152690025, "learning_rate": 1.5138303452776033e-06, "loss": 0.041058349609375, "step": 153810 }, { "epoch": 1.329992823235424, "grad_norm": 1.196980094206103, "learning_rate": 1.5136533716599295e-06, "loss": 0.08608245849609375, "step": 153815 }, { "epoch": 1.3300360567569671, "grad_norm": 25.010484071020624, "learning_rate": 1.513476404897155e-06, "loss": 0.14558792114257812, "step": 153820 }, { "epoch": 1.3300792902785104, "grad_norm": 6.678373975086585, "learning_rate": 1.5132994449900958e-06, "loss": 0.061008453369140625, "step": 153825 }, { "epoch": 1.3301225238000536, "grad_norm": 11.851938629731844, "learning_rate": 1.5131224919395687e-06, "loss": 0.039171600341796876, "step": 153830 }, { "epoch": 1.3301657573215968, "grad_norm": 2.4793826867379094, "learning_rate": 1.512945545746389e-06, "loss": 0.052342700958251956, "step": 153835 }, { "epoch": 1.3302089908431403, "grad_norm": 0.3163471229957937, "learning_rate": 1.5127686064113725e-06, "loss": 0.018248748779296876, "step": 153840 }, { "epoch": 1.3302522243646835, "grad_norm": 2.12886829499327, "learning_rate": 1.5125916739353356e-06, "loss": 0.026589202880859374, "step": 153845 }, { "epoch": 1.3302954578862267, "grad_norm": 40.375234930563174, "learning_rate": 1.512414748319095e-06, "loss": 0.14654293060302734, "step": 153850 }, { "epoch": 1.33033869140777, "grad_norm": 9.678295714595613, "learning_rate": 1.5122378295634652e-06, "loss": 0.024263572692871094, "step": 153855 }, { "epoch": 1.3303819249293132, "grad_norm": 1.503283055556335, "learning_rate": 1.5120609176692641e-06, "loss": 0.07875595092773438, "step": 153860 }, { "epoch": 1.3304251584508564, "grad_norm": 0.09313936965207562, "learning_rate": 1.5118840126373064e-06, "loss": 0.01284637451171875, "step": 153865 }, { "epoch": 1.3304683919723996, "grad_norm": 0.0872292414027696, "learning_rate": 1.5117071144684084e-06, "loss": 0.024358367919921874, "step": 153870 }, { "epoch": 1.3305116254939429, "grad_norm": 0.7402275075351641, "learning_rate": 1.5115302231633847e-06, "loss": 0.25926895141601564, "step": 153875 }, { "epoch": 1.3305548590154863, "grad_norm": 0.2940480354606005, "learning_rate": 1.5113533387230531e-06, "loss": 0.10696029663085938, "step": 153880 }, { "epoch": 1.3305980925370295, "grad_norm": 18.45891704534537, "learning_rate": 1.5111764611482283e-06, "loss": 0.08922538757324219, "step": 153885 }, { "epoch": 1.3306413260585728, "grad_norm": 45.34802241982177, "learning_rate": 1.5109995904397257e-06, "loss": 0.07801513671875, "step": 153890 }, { "epoch": 1.330684559580116, "grad_norm": 0.761342878023663, "learning_rate": 1.5108227265983621e-06, "loss": 0.1137298583984375, "step": 153895 }, { "epoch": 1.3307277931016592, "grad_norm": 0.8734854704799662, "learning_rate": 1.5106458696249522e-06, "loss": 0.0191619873046875, "step": 153900 }, { "epoch": 1.3307710266232027, "grad_norm": 2.5730296795817895, "learning_rate": 1.5104690195203109e-06, "loss": 0.021502685546875, "step": 153905 }, { "epoch": 1.330814260144746, "grad_norm": 0.6000652759906747, "learning_rate": 1.5102921762852559e-06, "loss": 0.023626708984375, "step": 153910 }, { "epoch": 1.3308574936662891, "grad_norm": 36.029404744485056, "learning_rate": 1.5101153399206007e-06, "loss": 0.19572677612304687, "step": 153915 }, { "epoch": 1.3309007271878324, "grad_norm": 0.27271361920201265, "learning_rate": 1.5099385104271628e-06, "loss": 0.02842559814453125, "step": 153920 }, { "epoch": 1.3309439607093756, "grad_norm": 0.10004467396848242, "learning_rate": 1.5097616878057572e-06, "loss": 0.03453369140625, "step": 153925 }, { "epoch": 1.3309871942309188, "grad_norm": 35.877417286330804, "learning_rate": 1.5095848720571985e-06, "loss": 0.09937992095947265, "step": 153930 }, { "epoch": 1.331030427752462, "grad_norm": 7.393398623629917, "learning_rate": 1.5094080631823031e-06, "loss": 0.05233478546142578, "step": 153935 }, { "epoch": 1.3310736612740053, "grad_norm": 12.632875586542704, "learning_rate": 1.5092312611818848e-06, "loss": 0.14473514556884765, "step": 153940 }, { "epoch": 1.3311168947955487, "grad_norm": 1.8309326850563428, "learning_rate": 1.5090544660567615e-06, "loss": 0.01845073699951172, "step": 153945 }, { "epoch": 1.331160128317092, "grad_norm": 40.922442586891215, "learning_rate": 1.508877677807747e-06, "loss": 0.26105804443359376, "step": 153950 }, { "epoch": 1.3312033618386352, "grad_norm": 3.8856307074354413, "learning_rate": 1.5087008964356567e-06, "loss": 0.01076507568359375, "step": 153955 }, { "epoch": 1.3312465953601784, "grad_norm": 0.9096924982179792, "learning_rate": 1.5085241219413064e-06, "loss": 0.018122482299804687, "step": 153960 }, { "epoch": 1.3312898288817216, "grad_norm": 0.34241123941027757, "learning_rate": 1.50834735432551e-06, "loss": 0.027406692504882812, "step": 153965 }, { "epoch": 1.331333062403265, "grad_norm": 0.5509985357195599, "learning_rate": 1.508170593589085e-06, "loss": 0.038421249389648436, "step": 153970 }, { "epoch": 1.3313762959248083, "grad_norm": 1.6281993999661455, "learning_rate": 1.507993839732844e-06, "loss": 0.030514907836914063, "step": 153975 }, { "epoch": 1.3314195294463516, "grad_norm": 91.90804997160132, "learning_rate": 1.5078170927576044e-06, "loss": 0.022439956665039062, "step": 153980 }, { "epoch": 1.3314627629678948, "grad_norm": 7.273705307824817, "learning_rate": 1.5076403526641812e-06, "loss": 0.09468841552734375, "step": 153985 }, { "epoch": 1.331505996489438, "grad_norm": 6.704458533565849, "learning_rate": 1.5074636194533886e-06, "loss": 0.045463180541992186, "step": 153990 }, { "epoch": 1.3315492300109812, "grad_norm": 0.17225586207494636, "learning_rate": 1.5072868931260418e-06, "loss": 0.019073867797851564, "step": 153995 }, { "epoch": 1.3315924635325245, "grad_norm": 0.3769181265802722, "learning_rate": 1.507110173682956e-06, "loss": 0.03246803283691406, "step": 154000 }, { "epoch": 1.331635697054068, "grad_norm": 0.4771508005032464, "learning_rate": 1.5069334611249448e-06, "loss": 0.029842376708984375, "step": 154005 }, { "epoch": 1.3316789305756112, "grad_norm": 1.5504404745418436, "learning_rate": 1.506756755452826e-06, "loss": 0.024833297729492186, "step": 154010 }, { "epoch": 1.3317221640971544, "grad_norm": 3.680859186712284, "learning_rate": 1.5065800566674132e-06, "loss": 0.41749114990234376, "step": 154015 }, { "epoch": 1.3317653976186976, "grad_norm": 11.481650992666918, "learning_rate": 1.506403364769521e-06, "loss": 0.10538558959960938, "step": 154020 }, { "epoch": 1.3318086311402408, "grad_norm": 0.6698478902261996, "learning_rate": 1.5062266797599632e-06, "loss": 0.07127761840820312, "step": 154025 }, { "epoch": 1.3318518646617843, "grad_norm": 20.7264215044209, "learning_rate": 1.5060500016395576e-06, "loss": 0.10850753784179687, "step": 154030 }, { "epoch": 1.3318950981833275, "grad_norm": 8.617079627979761, "learning_rate": 1.5058733304091157e-06, "loss": 0.0590728759765625, "step": 154035 }, { "epoch": 1.3319383317048707, "grad_norm": 0.045593089305657905, "learning_rate": 1.505696666069455e-06, "loss": 0.09391307830810547, "step": 154040 }, { "epoch": 1.331981565226414, "grad_norm": 5.960791544039895, "learning_rate": 1.50552000862139e-06, "loss": 0.03685054779052734, "step": 154045 }, { "epoch": 1.3320247987479572, "grad_norm": 0.9174356974837302, "learning_rate": 1.505343358065734e-06, "loss": 0.10533599853515625, "step": 154050 }, { "epoch": 1.3320680322695004, "grad_norm": 0.5432206596161527, "learning_rate": 1.5051667144033026e-06, "loss": 0.08588829040527343, "step": 154055 }, { "epoch": 1.3321112657910437, "grad_norm": 3.554228673391443, "learning_rate": 1.5049900776349106e-06, "loss": 0.018822860717773438, "step": 154060 }, { "epoch": 1.332154499312587, "grad_norm": 0.04041265575060118, "learning_rate": 1.5048134477613705e-06, "loss": 0.01224517822265625, "step": 154065 }, { "epoch": 1.3321977328341303, "grad_norm": 8.709196089077246, "learning_rate": 1.5046368247835e-06, "loss": 0.05068283081054688, "step": 154070 }, { "epoch": 1.3322409663556736, "grad_norm": 2.7286943977704934, "learning_rate": 1.5044602087021122e-06, "loss": 0.0493194580078125, "step": 154075 }, { "epoch": 1.3322841998772168, "grad_norm": 12.767852335896938, "learning_rate": 1.504283599518021e-06, "loss": 0.04248619079589844, "step": 154080 }, { "epoch": 1.33232743339876, "grad_norm": 11.689008025971132, "learning_rate": 1.5041069972320428e-06, "loss": 0.1084320068359375, "step": 154085 }, { "epoch": 1.3323706669203033, "grad_norm": 0.5050660408859156, "learning_rate": 1.5039304018449907e-06, "loss": 0.018231582641601563, "step": 154090 }, { "epoch": 1.3324139004418467, "grad_norm": 7.191626044625786, "learning_rate": 1.5037538133576795e-06, "loss": 0.04182281494140625, "step": 154095 }, { "epoch": 1.33245713396339, "grad_norm": 29.837372329813558, "learning_rate": 1.5035772317709221e-06, "loss": 0.035348701477050784, "step": 154100 }, { "epoch": 1.3325003674849332, "grad_norm": 4.496964625452378, "learning_rate": 1.5034006570855357e-06, "loss": 0.22185115814208983, "step": 154105 }, { "epoch": 1.3325436010064764, "grad_norm": 0.42340492648892114, "learning_rate": 1.5032240893023333e-06, "loss": 0.020043563842773438, "step": 154110 }, { "epoch": 1.3325868345280196, "grad_norm": 0.4907718315204619, "learning_rate": 1.503047528422129e-06, "loss": 0.0531463623046875, "step": 154115 }, { "epoch": 1.3326300680495629, "grad_norm": 2.156795409139491, "learning_rate": 1.5028709744457373e-06, "loss": 0.323150634765625, "step": 154120 }, { "epoch": 1.332673301571106, "grad_norm": 9.1384668654419, "learning_rate": 1.5026944273739725e-06, "loss": 0.10375633239746093, "step": 154125 }, { "epoch": 1.3327165350926493, "grad_norm": 54.24142217262909, "learning_rate": 1.5025178872076476e-06, "loss": 0.29833526611328126, "step": 154130 }, { "epoch": 1.3327597686141928, "grad_norm": 2.5446060282745284, "learning_rate": 1.5023413539475785e-06, "loss": 0.02516632080078125, "step": 154135 }, { "epoch": 1.332803002135736, "grad_norm": 1.992023329163269, "learning_rate": 1.5021648275945777e-06, "loss": 0.03056793212890625, "step": 154140 }, { "epoch": 1.3328462356572792, "grad_norm": 4.3062256779004775, "learning_rate": 1.501988308149462e-06, "loss": 0.07427139282226562, "step": 154145 }, { "epoch": 1.3328894691788225, "grad_norm": 25.21816868780618, "learning_rate": 1.5018117956130436e-06, "loss": 0.09435882568359374, "step": 154150 }, { "epoch": 1.3329327027003657, "grad_norm": 2.2141085679339207, "learning_rate": 1.5016352899861365e-06, "loss": 0.033519744873046875, "step": 154155 }, { "epoch": 1.3329759362219091, "grad_norm": 6.531776947468478, "learning_rate": 1.5014587912695545e-06, "loss": 0.03130035400390625, "step": 154160 }, { "epoch": 1.3330191697434524, "grad_norm": 1.25191839502031, "learning_rate": 1.5012822994641128e-06, "loss": 0.045511817932128905, "step": 154165 }, { "epoch": 1.3330624032649956, "grad_norm": 0.5241339292597922, "learning_rate": 1.501105814570625e-06, "loss": 0.02612590789794922, "step": 154170 }, { "epoch": 1.3331056367865388, "grad_norm": 0.9918893375336519, "learning_rate": 1.5009293365899047e-06, "loss": 0.34767837524414064, "step": 154175 }, { "epoch": 1.333148870308082, "grad_norm": 10.855875567192935, "learning_rate": 1.5007528655227658e-06, "loss": 0.0669158935546875, "step": 154180 }, { "epoch": 1.3331921038296253, "grad_norm": 2.972394430225806, "learning_rate": 1.5005764013700218e-06, "loss": 0.0263763427734375, "step": 154185 }, { "epoch": 1.3332353373511685, "grad_norm": 0.7843319287224373, "learning_rate": 1.5003999441324864e-06, "loss": 0.03238983154296875, "step": 154190 }, { "epoch": 1.3332785708727117, "grad_norm": 2.602253670953705, "learning_rate": 1.5002234938109746e-06, "loss": 0.023597526550292968, "step": 154195 }, { "epoch": 1.3333218043942552, "grad_norm": 2.0398238097185404, "learning_rate": 1.500047050406299e-06, "loss": 0.06490859985351563, "step": 154200 }, { "epoch": 1.3333650379157984, "grad_norm": 1.1783899730797769, "learning_rate": 1.4998706139192745e-06, "loss": 0.0128509521484375, "step": 154205 }, { "epoch": 1.3334082714373416, "grad_norm": 45.1272478995974, "learning_rate": 1.4996941843507143e-06, "loss": 0.1361988067626953, "step": 154210 }, { "epoch": 1.3334515049588849, "grad_norm": 3.6579550191645738, "learning_rate": 1.4995177617014322e-06, "loss": 0.060747528076171876, "step": 154215 }, { "epoch": 1.3334947384804283, "grad_norm": 0.07154957850553124, "learning_rate": 1.4993413459722413e-06, "loss": 0.02466754913330078, "step": 154220 }, { "epoch": 1.3335379720019715, "grad_norm": 17.57883273169394, "learning_rate": 1.4991649371639546e-06, "loss": 0.11867179870605468, "step": 154225 }, { "epoch": 1.3335812055235148, "grad_norm": 0.2897491716321119, "learning_rate": 1.4989885352773876e-06, "loss": 0.008595657348632813, "step": 154230 }, { "epoch": 1.333624439045058, "grad_norm": 0.7331393887270999, "learning_rate": 1.4988121403133528e-06, "loss": 0.6341289520263672, "step": 154235 }, { "epoch": 1.3336676725666012, "grad_norm": 0.8955987961752875, "learning_rate": 1.4986357522726635e-06, "loss": 0.04168853759765625, "step": 154240 }, { "epoch": 1.3337109060881445, "grad_norm": 1.5539380349419853, "learning_rate": 1.498459371156134e-06, "loss": 0.08086509704589843, "step": 154245 }, { "epoch": 1.3337541396096877, "grad_norm": 0.1673747847744616, "learning_rate": 1.4982829969645756e-06, "loss": 0.008821868896484375, "step": 154250 }, { "epoch": 1.333797373131231, "grad_norm": 11.482325968973345, "learning_rate": 1.4981066296988048e-06, "loss": 0.11534614562988281, "step": 154255 }, { "epoch": 1.3338406066527744, "grad_norm": 0.148772388091711, "learning_rate": 1.497930269359632e-06, "loss": 0.19204330444335938, "step": 154260 }, { "epoch": 1.3338838401743176, "grad_norm": 0.9405779157878089, "learning_rate": 1.4977539159478736e-06, "loss": 0.0715372085571289, "step": 154265 }, { "epoch": 1.3339270736958608, "grad_norm": 1.6765768540580492, "learning_rate": 1.4975775694643415e-06, "loss": 0.155706787109375, "step": 154270 }, { "epoch": 1.333970307217404, "grad_norm": 4.2158000770090185, "learning_rate": 1.4974012299098487e-06, "loss": 0.027581024169921874, "step": 154275 }, { "epoch": 1.3340135407389473, "grad_norm": 3.1911163870985093, "learning_rate": 1.4972248972852085e-06, "loss": 0.240301513671875, "step": 154280 }, { "epoch": 1.3340567742604907, "grad_norm": 6.882665073404321, "learning_rate": 1.4970485715912344e-06, "loss": 0.034366607666015625, "step": 154285 }, { "epoch": 1.334100007782034, "grad_norm": 13.127391180954955, "learning_rate": 1.496872252828738e-06, "loss": 0.14827728271484375, "step": 154290 }, { "epoch": 1.3341432413035772, "grad_norm": 3.4879929514882986, "learning_rate": 1.496695940998536e-06, "loss": 0.05786972045898438, "step": 154295 }, { "epoch": 1.3341864748251204, "grad_norm": 0.2860018387930128, "learning_rate": 1.4965196361014386e-06, "loss": 0.019550132751464843, "step": 154300 }, { "epoch": 1.3342297083466637, "grad_norm": 4.971866432349843, "learning_rate": 1.496343338138259e-06, "loss": 0.15086097717285157, "step": 154305 }, { "epoch": 1.3342729418682069, "grad_norm": 3.9926799736848766, "learning_rate": 1.4961670471098123e-06, "loss": 0.0232421875, "step": 154310 }, { "epoch": 1.3343161753897501, "grad_norm": 6.205798733564392, "learning_rate": 1.4959907630169102e-06, "loss": 0.10832557678222657, "step": 154315 }, { "epoch": 1.3343594089112933, "grad_norm": 0.5478477777819095, "learning_rate": 1.4958144858603645e-06, "loss": 0.0305267333984375, "step": 154320 }, { "epoch": 1.3344026424328368, "grad_norm": 0.4186932237083513, "learning_rate": 1.4956382156409906e-06, "loss": 0.08507614135742188, "step": 154325 }, { "epoch": 1.33444587595438, "grad_norm": 1.5226341624499053, "learning_rate": 1.4954619523596006e-06, "loss": 0.052032852172851564, "step": 154330 }, { "epoch": 1.3344891094759233, "grad_norm": 3.319165725379521, "learning_rate": 1.4952856960170068e-06, "loss": 0.03784027099609375, "step": 154335 }, { "epoch": 1.3345323429974665, "grad_norm": 4.322692752576187, "learning_rate": 1.4951094466140225e-06, "loss": 0.032237815856933597, "step": 154340 }, { "epoch": 1.3345755765190097, "grad_norm": 4.470176136100723, "learning_rate": 1.4949332041514605e-06, "loss": 0.032086944580078124, "step": 154345 }, { "epoch": 1.3346188100405532, "grad_norm": 1.523957285620746, "learning_rate": 1.4947569686301339e-06, "loss": 0.040390396118164064, "step": 154350 }, { "epoch": 1.3346620435620964, "grad_norm": 0.21634135433486423, "learning_rate": 1.4945807400508538e-06, "loss": 0.05765380859375, "step": 154355 }, { "epoch": 1.3347052770836396, "grad_norm": 0.6517800249507719, "learning_rate": 1.4944045184144353e-06, "loss": 0.12837982177734375, "step": 154360 }, { "epoch": 1.3347485106051828, "grad_norm": 0.804820007498172, "learning_rate": 1.4942283037216891e-06, "loss": 0.1313568115234375, "step": 154365 }, { "epoch": 1.334791744126726, "grad_norm": 5.476177633366165, "learning_rate": 1.4940520959734302e-06, "loss": 0.040330123901367185, "step": 154370 }, { "epoch": 1.3348349776482693, "grad_norm": 14.949867029020938, "learning_rate": 1.4938758951704695e-06, "loss": 0.08298587799072266, "step": 154375 }, { "epoch": 1.3348782111698125, "grad_norm": 0.1601401341092215, "learning_rate": 1.4936997013136204e-06, "loss": 0.24593048095703124, "step": 154380 }, { "epoch": 1.3349214446913558, "grad_norm": 19.056703934250518, "learning_rate": 1.4935235144036944e-06, "loss": 0.1047576904296875, "step": 154385 }, { "epoch": 1.3349646782128992, "grad_norm": 19.065387227927978, "learning_rate": 1.4933473344415055e-06, "loss": 0.04359283447265625, "step": 154390 }, { "epoch": 1.3350079117344424, "grad_norm": 3.2197347101987255, "learning_rate": 1.4931711614278654e-06, "loss": 0.0930419921875, "step": 154395 }, { "epoch": 1.3350511452559857, "grad_norm": 0.502381467976837, "learning_rate": 1.492994995363587e-06, "loss": 0.04354095458984375, "step": 154400 }, { "epoch": 1.335094378777529, "grad_norm": 4.920357976194676, "learning_rate": 1.4928188362494826e-06, "loss": 0.035405731201171874, "step": 154405 }, { "epoch": 1.3351376122990721, "grad_norm": 3.321282206896038, "learning_rate": 1.4926426840863647e-06, "loss": 0.2402057647705078, "step": 154410 }, { "epoch": 1.3351808458206156, "grad_norm": 6.005005047907991, "learning_rate": 1.492466538875044e-06, "loss": 0.029328155517578124, "step": 154415 }, { "epoch": 1.3352240793421588, "grad_norm": 31.955809514615662, "learning_rate": 1.492290400616336e-06, "loss": 0.2931316375732422, "step": 154420 }, { "epoch": 1.335267312863702, "grad_norm": 9.53924902748506, "learning_rate": 1.49211426931105e-06, "loss": 0.16492538452148436, "step": 154425 }, { "epoch": 1.3353105463852453, "grad_norm": 11.717151429860966, "learning_rate": 1.4919381449600008e-06, "loss": 0.03864898681640625, "step": 154430 }, { "epoch": 1.3353537799067885, "grad_norm": 6.387770294697392, "learning_rate": 1.4917620275640001e-06, "loss": 0.18163604736328126, "step": 154435 }, { "epoch": 1.3353970134283317, "grad_norm": 0.311772824690002, "learning_rate": 1.4915859171238594e-06, "loss": 0.13384590148925782, "step": 154440 }, { "epoch": 1.335440246949875, "grad_norm": 2.4303222184988305, "learning_rate": 1.491409813640391e-06, "loss": 0.1548320770263672, "step": 154445 }, { "epoch": 1.3354834804714182, "grad_norm": 4.529250170722712, "learning_rate": 1.4912337171144062e-06, "loss": 0.04521598815917969, "step": 154450 }, { "epoch": 1.3355267139929616, "grad_norm": 2.4927110447694787, "learning_rate": 1.4910576275467192e-06, "loss": 0.15088882446289062, "step": 154455 }, { "epoch": 1.3355699475145049, "grad_norm": 0.6971764780137693, "learning_rate": 1.4908815449381412e-06, "loss": 0.0219390869140625, "step": 154460 }, { "epoch": 1.335613181036048, "grad_norm": 34.529629712881636, "learning_rate": 1.4907054692894843e-06, "loss": 0.10481452941894531, "step": 154465 }, { "epoch": 1.3356564145575913, "grad_norm": 4.620103874152867, "learning_rate": 1.4905294006015604e-06, "loss": 0.12135162353515624, "step": 154470 }, { "epoch": 1.3356996480791348, "grad_norm": 0.49344315226781393, "learning_rate": 1.4903533388751802e-06, "loss": 0.03179931640625, "step": 154475 }, { "epoch": 1.335742881600678, "grad_norm": 3.3350603476124245, "learning_rate": 1.490177284111157e-06, "loss": 0.023823928833007813, "step": 154480 }, { "epoch": 1.3357861151222212, "grad_norm": 5.171935955443788, "learning_rate": 1.490001236310304e-06, "loss": 0.027690887451171875, "step": 154485 }, { "epoch": 1.3358293486437645, "grad_norm": 13.757523984728346, "learning_rate": 1.4898251954734318e-06, "loss": 0.12071113586425782, "step": 154490 }, { "epoch": 1.3358725821653077, "grad_norm": 39.41976938808743, "learning_rate": 1.4896491616013528e-06, "loss": 0.2436288833618164, "step": 154495 }, { "epoch": 1.335915815686851, "grad_norm": 0.33791131151133547, "learning_rate": 1.4894731346948777e-06, "loss": 0.024784088134765625, "step": 154500 }, { "epoch": 1.3359590492083941, "grad_norm": 10.47149940807726, "learning_rate": 1.4892971147548194e-06, "loss": 0.02978363037109375, "step": 154505 }, { "epoch": 1.3360022827299374, "grad_norm": 11.13482606935822, "learning_rate": 1.4891211017819893e-06, "loss": 0.082318115234375, "step": 154510 }, { "epoch": 1.3360455162514808, "grad_norm": 7.036729700428086, "learning_rate": 1.488945095777198e-06, "loss": 0.051715087890625, "step": 154515 }, { "epoch": 1.336088749773024, "grad_norm": 0.6876329360056052, "learning_rate": 1.4887690967412597e-06, "loss": 0.0038166046142578125, "step": 154520 }, { "epoch": 1.3361319832945673, "grad_norm": 1.9118793489025625, "learning_rate": 1.4885931046749843e-06, "loss": 0.06270599365234375, "step": 154525 }, { "epoch": 1.3361752168161105, "grad_norm": 2.1371550548486242, "learning_rate": 1.4884171195791833e-06, "loss": 0.2053913116455078, "step": 154530 }, { "epoch": 1.3362184503376537, "grad_norm": 0.14212615677144935, "learning_rate": 1.48824114145467e-06, "loss": 0.22019615173339843, "step": 154535 }, { "epoch": 1.3362616838591972, "grad_norm": 1.206222576334264, "learning_rate": 1.488065170302255e-06, "loss": 0.008356094360351562, "step": 154540 }, { "epoch": 1.3363049173807404, "grad_norm": 2.3789223092180745, "learning_rate": 1.4878892061227486e-06, "loss": 0.06374979019165039, "step": 154545 }, { "epoch": 1.3363481509022836, "grad_norm": 1.3746709137194426, "learning_rate": 1.4877132489169646e-06, "loss": 0.24777870178222655, "step": 154550 }, { "epoch": 1.3363913844238269, "grad_norm": 1.6770165731132052, "learning_rate": 1.4875372986857138e-06, "loss": 0.01068563461303711, "step": 154555 }, { "epoch": 1.33643461794537, "grad_norm": 5.19627935183762, "learning_rate": 1.4873613554298072e-06, "loss": 0.031490516662597653, "step": 154560 }, { "epoch": 1.3364778514669133, "grad_norm": 16.27319152398594, "learning_rate": 1.4871854191500564e-06, "loss": 0.059468841552734374, "step": 154565 }, { "epoch": 1.3365210849884566, "grad_norm": 1.8404438713111224, "learning_rate": 1.4870094898472726e-06, "loss": 0.012003707885742187, "step": 154570 }, { "epoch": 1.3365643185099998, "grad_norm": 6.034607069551665, "learning_rate": 1.4868335675222663e-06, "loss": 0.04560546875, "step": 154575 }, { "epoch": 1.3366075520315432, "grad_norm": 5.5026955602128815, "learning_rate": 1.4866576521758508e-06, "loss": 0.04385528564453125, "step": 154580 }, { "epoch": 1.3366507855530865, "grad_norm": 4.121703045759026, "learning_rate": 1.4864817438088367e-06, "loss": 0.02152252197265625, "step": 154585 }, { "epoch": 1.3366940190746297, "grad_norm": 6.639179152343139, "learning_rate": 1.486305842422034e-06, "loss": 0.05113067626953125, "step": 154590 }, { "epoch": 1.336737252596173, "grad_norm": 0.07169253111173661, "learning_rate": 1.486129948016256e-06, "loss": 0.03246116638183594, "step": 154595 }, { "epoch": 1.3367804861177162, "grad_norm": 0.2817609320103208, "learning_rate": 1.4859540605923133e-06, "loss": 0.03642349243164063, "step": 154600 }, { "epoch": 1.3368237196392596, "grad_norm": 4.466064897228114, "learning_rate": 1.4857781801510163e-06, "loss": 0.037685394287109375, "step": 154605 }, { "epoch": 1.3368669531608028, "grad_norm": 2.492381899381708, "learning_rate": 1.4856023066931755e-06, "loss": 0.04269905090332031, "step": 154610 }, { "epoch": 1.336910186682346, "grad_norm": 0.3531452803760253, "learning_rate": 1.485426440219604e-06, "loss": 0.11563262939453126, "step": 154615 }, { "epoch": 1.3369534202038893, "grad_norm": 1.7572230837967902, "learning_rate": 1.4852505807311121e-06, "loss": 0.03402862548828125, "step": 154620 }, { "epoch": 1.3369966537254325, "grad_norm": 1.480579650039574, "learning_rate": 1.4850747282285106e-06, "loss": 0.05415496826171875, "step": 154625 }, { "epoch": 1.3370398872469758, "grad_norm": 3.4349655790371085, "learning_rate": 1.4848988827126109e-06, "loss": 0.008905029296875, "step": 154630 }, { "epoch": 1.337083120768519, "grad_norm": 1.9180653073383334, "learning_rate": 1.4847230441842232e-06, "loss": 0.022117996215820314, "step": 154635 }, { "epoch": 1.3371263542900622, "grad_norm": 40.21520171107032, "learning_rate": 1.4845472126441578e-06, "loss": 0.14779434204101563, "step": 154640 }, { "epoch": 1.3371695878116057, "grad_norm": 1.6701562177483784, "learning_rate": 1.4843713880932282e-06, "loss": 0.027615737915039063, "step": 154645 }, { "epoch": 1.337212821333149, "grad_norm": 0.19391811686018137, "learning_rate": 1.4841955705322428e-06, "loss": 0.28924999237060545, "step": 154650 }, { "epoch": 1.3372560548546921, "grad_norm": 7.927366382111063, "learning_rate": 1.4840197599620144e-06, "loss": 0.13800621032714844, "step": 154655 }, { "epoch": 1.3372992883762354, "grad_norm": 3.7340497697648387, "learning_rate": 1.483843956383353e-06, "loss": 0.035953330993652347, "step": 154660 }, { "epoch": 1.3373425218977786, "grad_norm": 23.28863259759145, "learning_rate": 1.4836681597970693e-06, "loss": 0.1410888671875, "step": 154665 }, { "epoch": 1.337385755419322, "grad_norm": 0.458268353517065, "learning_rate": 1.4834923702039729e-06, "loss": 0.10491790771484374, "step": 154670 }, { "epoch": 1.3374289889408653, "grad_norm": 0.5874587452144818, "learning_rate": 1.4833165876048766e-06, "loss": 0.04404144287109375, "step": 154675 }, { "epoch": 1.3374722224624085, "grad_norm": 29.529115321683978, "learning_rate": 1.4831408120005907e-06, "loss": 0.08048667907714843, "step": 154680 }, { "epoch": 1.3375154559839517, "grad_norm": 41.349504335451655, "learning_rate": 1.4829650433919246e-06, "loss": 0.16887779235839845, "step": 154685 }, { "epoch": 1.337558689505495, "grad_norm": 2.1696895963163954, "learning_rate": 1.4827892817796902e-06, "loss": 0.10202178955078126, "step": 154690 }, { "epoch": 1.3376019230270382, "grad_norm": 0.8320540356742417, "learning_rate": 1.4826135271646978e-06, "loss": 0.04380779266357422, "step": 154695 }, { "epoch": 1.3376451565485814, "grad_norm": 36.216255456249776, "learning_rate": 1.4824377795477558e-06, "loss": 0.09051437377929687, "step": 154700 }, { "epoch": 1.3376883900701246, "grad_norm": 4.0048117842192905, "learning_rate": 1.4822620389296772e-06, "loss": 0.1702850341796875, "step": 154705 }, { "epoch": 1.337731623591668, "grad_norm": 0.3208554275973777, "learning_rate": 1.4820863053112733e-06, "loss": 0.03528289794921875, "step": 154710 }, { "epoch": 1.3377748571132113, "grad_norm": 41.9476418149857, "learning_rate": 1.4819105786933526e-06, "loss": 0.1868682861328125, "step": 154715 }, { "epoch": 1.3378180906347545, "grad_norm": 2.1952406730617486, "learning_rate": 1.4817348590767269e-06, "loss": 0.03572998046875, "step": 154720 }, { "epoch": 1.3378613241562978, "grad_norm": 6.373131259132169, "learning_rate": 1.4815591464622053e-06, "loss": 0.144732666015625, "step": 154725 }, { "epoch": 1.3379045576778412, "grad_norm": 0.23772866224505534, "learning_rate": 1.4813834408505991e-06, "loss": 0.07615509033203124, "step": 154730 }, { "epoch": 1.3379477911993845, "grad_norm": 0.16353300785255073, "learning_rate": 1.4812077422427171e-06, "loss": 0.059252166748046876, "step": 154735 }, { "epoch": 1.3379910247209277, "grad_norm": 23.53091282006315, "learning_rate": 1.4810320506393717e-06, "loss": 0.09421615600585938, "step": 154740 }, { "epoch": 1.338034258242471, "grad_norm": 1.0312539934435085, "learning_rate": 1.4808563660413723e-06, "loss": 0.04340705871582031, "step": 154745 }, { "epoch": 1.3380774917640141, "grad_norm": 0.258346426729721, "learning_rate": 1.4806806884495294e-06, "loss": 0.03585662841796875, "step": 154750 }, { "epoch": 1.3381207252855574, "grad_norm": 0.969230775807896, "learning_rate": 1.4805050178646514e-06, "loss": 0.38551025390625, "step": 154755 }, { "epoch": 1.3381639588071006, "grad_norm": 14.843555557700215, "learning_rate": 1.4803293542875512e-06, "loss": 0.05178632736206055, "step": 154760 }, { "epoch": 1.3382071923286438, "grad_norm": 0.25100068675076653, "learning_rate": 1.4801536977190368e-06, "loss": 0.44011611938476564, "step": 154765 }, { "epoch": 1.3382504258501873, "grad_norm": 1.4316680304524623, "learning_rate": 1.4799780481599203e-06, "loss": 0.025581741333007814, "step": 154770 }, { "epoch": 1.3382936593717305, "grad_norm": 7.105362099589503, "learning_rate": 1.4798024056110108e-06, "loss": 0.20441436767578125, "step": 154775 }, { "epoch": 1.3383368928932737, "grad_norm": 2.4142983107513976, "learning_rate": 1.4796267700731181e-06, "loss": 0.17441368103027344, "step": 154780 }, { "epoch": 1.338380126414817, "grad_norm": 4.666249629393732, "learning_rate": 1.4794511415470525e-06, "loss": 0.02039642333984375, "step": 154785 }, { "epoch": 1.3384233599363602, "grad_norm": 15.709838830999397, "learning_rate": 1.479275520033624e-06, "loss": 0.06494331359863281, "step": 154790 }, { "epoch": 1.3384665934579036, "grad_norm": 6.74704666379422, "learning_rate": 1.4790999055336423e-06, "loss": 0.2584390640258789, "step": 154795 }, { "epoch": 1.3385098269794469, "grad_norm": 2.837654247714479, "learning_rate": 1.4789242980479164e-06, "loss": 0.021439361572265624, "step": 154800 }, { "epoch": 1.33855306050099, "grad_norm": 10.940340298989398, "learning_rate": 1.478748697577258e-06, "loss": 0.06381568908691407, "step": 154805 }, { "epoch": 1.3385962940225333, "grad_norm": 0.21664236760037134, "learning_rate": 1.4785731041224767e-06, "loss": 0.045428466796875, "step": 154810 }, { "epoch": 1.3386395275440766, "grad_norm": 0.3119497627812495, "learning_rate": 1.4783975176843803e-06, "loss": 0.0388458251953125, "step": 154815 }, { "epoch": 1.3386827610656198, "grad_norm": 0.3836493292823851, "learning_rate": 1.4782219382637814e-06, "loss": 0.015858268737792967, "step": 154820 }, { "epoch": 1.338725994587163, "grad_norm": 11.131995670757332, "learning_rate": 1.4780463658614884e-06, "loss": 0.0468048095703125, "step": 154825 }, { "epoch": 1.3387692281087062, "grad_norm": 16.990583462985324, "learning_rate": 1.47787080047831e-06, "loss": 0.13541183471679688, "step": 154830 }, { "epoch": 1.3388124616302497, "grad_norm": 33.98136856121851, "learning_rate": 1.4776952421150582e-06, "loss": 0.230047607421875, "step": 154835 }, { "epoch": 1.338855695151793, "grad_norm": 10.00671083411242, "learning_rate": 1.4775196907725414e-06, "loss": 0.1692403793334961, "step": 154840 }, { "epoch": 1.3388989286733362, "grad_norm": 3.0719271328366515, "learning_rate": 1.4773441464515688e-06, "loss": 0.08949928283691407, "step": 154845 }, { "epoch": 1.3389421621948794, "grad_norm": 5.928956977687067, "learning_rate": 1.4771686091529512e-06, "loss": 0.08334407806396485, "step": 154850 }, { "epoch": 1.3389853957164226, "grad_norm": 0.970939889927882, "learning_rate": 1.4769930788774968e-06, "loss": 0.04299583435058594, "step": 154855 }, { "epoch": 1.339028629237966, "grad_norm": 25.149735480431524, "learning_rate": 1.4768175556260163e-06, "loss": 0.10296192169189453, "step": 154860 }, { "epoch": 1.3390718627595093, "grad_norm": 6.362917313527253, "learning_rate": 1.476642039399317e-06, "loss": 0.05310535430908203, "step": 154865 }, { "epoch": 1.3391150962810525, "grad_norm": 3.484905389530127, "learning_rate": 1.4764665301982111e-06, "loss": 0.12776336669921876, "step": 154870 }, { "epoch": 1.3391583298025957, "grad_norm": 43.79685270317767, "learning_rate": 1.4762910280235062e-06, "loss": 0.212200927734375, "step": 154875 }, { "epoch": 1.339201563324139, "grad_norm": 44.64092610503977, "learning_rate": 1.4761155328760135e-06, "loss": 0.14923858642578125, "step": 154880 }, { "epoch": 1.3392447968456822, "grad_norm": 7.12923200612016, "learning_rate": 1.4759400447565412e-06, "loss": 0.0726531982421875, "step": 154885 }, { "epoch": 1.3392880303672254, "grad_norm": 2.3267466549413096, "learning_rate": 1.4757645636658986e-06, "loss": 0.0357940673828125, "step": 154890 }, { "epoch": 1.3393312638887687, "grad_norm": 10.045029044438854, "learning_rate": 1.4755890896048944e-06, "loss": 0.015412521362304688, "step": 154895 }, { "epoch": 1.3393744974103121, "grad_norm": 1.466435575148169, "learning_rate": 1.4754136225743398e-06, "loss": 0.08686599731445313, "step": 154900 }, { "epoch": 1.3394177309318553, "grad_norm": 4.6454058489797685, "learning_rate": 1.4752381625750422e-06, "loss": 0.01990509033203125, "step": 154905 }, { "epoch": 1.3394609644533986, "grad_norm": 3.119567108846554, "learning_rate": 1.475062709607812e-06, "loss": 0.09128875732421875, "step": 154910 }, { "epoch": 1.3395041979749418, "grad_norm": 0.09646182454475859, "learning_rate": 1.4748872636734578e-06, "loss": 0.057195281982421874, "step": 154915 }, { "epoch": 1.3395474314964853, "grad_norm": 1.939365469710864, "learning_rate": 1.4747118247727883e-06, "loss": 0.06698951721191407, "step": 154920 }, { "epoch": 1.3395906650180285, "grad_norm": 2.022635374070198, "learning_rate": 1.4745363929066125e-06, "loss": 0.01697196960449219, "step": 154925 }, { "epoch": 1.3396338985395717, "grad_norm": 1.9819707753118165, "learning_rate": 1.4743609680757399e-06, "loss": 0.028018951416015625, "step": 154930 }, { "epoch": 1.339677132061115, "grad_norm": 5.2020720723475, "learning_rate": 1.4741855502809808e-06, "loss": 0.07740144729614258, "step": 154935 }, { "epoch": 1.3397203655826582, "grad_norm": 2.800288802133129, "learning_rate": 1.4740101395231432e-06, "loss": 0.013615036010742187, "step": 154940 }, { "epoch": 1.3397635991042014, "grad_norm": 13.009132277252707, "learning_rate": 1.4738347358030358e-06, "loss": 0.036791229248046876, "step": 154945 }, { "epoch": 1.3398068326257446, "grad_norm": 1.230794525897408, "learning_rate": 1.473659339121468e-06, "loss": 0.011458301544189453, "step": 154950 }, { "epoch": 1.3398500661472879, "grad_norm": 1.6204536866239991, "learning_rate": 1.4734839494792486e-06, "loss": 0.1780078887939453, "step": 154955 }, { "epoch": 1.3398932996688313, "grad_norm": 25.795654275524207, "learning_rate": 1.4733085668771849e-06, "loss": 0.09168510437011719, "step": 154960 }, { "epoch": 1.3399365331903745, "grad_norm": 31.114783884132738, "learning_rate": 1.4731331913160883e-06, "loss": 0.2102447509765625, "step": 154965 }, { "epoch": 1.3399797667119178, "grad_norm": 1.5662572996475683, "learning_rate": 1.4729578227967666e-06, "loss": 0.025554656982421875, "step": 154970 }, { "epoch": 1.340023000233461, "grad_norm": 0.8756038595575003, "learning_rate": 1.4727824613200284e-06, "loss": 0.09767951965332031, "step": 154975 }, { "epoch": 1.3400662337550042, "grad_norm": 7.60533713794251, "learning_rate": 1.4726071068866815e-06, "loss": 0.0352783203125, "step": 154980 }, { "epoch": 1.3401094672765477, "grad_norm": 0.3351894226554814, "learning_rate": 1.472431759497537e-06, "loss": 0.017325210571289062, "step": 154985 }, { "epoch": 1.340152700798091, "grad_norm": 3.8757736158583405, "learning_rate": 1.472256419153401e-06, "loss": 0.2689788818359375, "step": 154990 }, { "epoch": 1.3401959343196341, "grad_norm": 6.068123487081106, "learning_rate": 1.4720810858550842e-06, "loss": 0.05219821929931641, "step": 154995 }, { "epoch": 1.3402391678411774, "grad_norm": 26.59615202995806, "learning_rate": 1.4719057596033946e-06, "loss": 0.19564895629882811, "step": 155000 }, { "epoch": 1.3402824013627206, "grad_norm": 5.122522381887554, "learning_rate": 1.471730440399141e-06, "loss": 0.03131732940673828, "step": 155005 }, { "epoch": 1.3403256348842638, "grad_norm": 10.572371284874846, "learning_rate": 1.471555128243131e-06, "loss": 0.045270538330078124, "step": 155010 }, { "epoch": 1.340368868405807, "grad_norm": 0.19899893166136545, "learning_rate": 1.4713798231361739e-06, "loss": 0.2320720672607422, "step": 155015 }, { "epoch": 1.3404121019273503, "grad_norm": 3.8078896955804793, "learning_rate": 1.471204525079077e-06, "loss": 0.08816909790039062, "step": 155020 }, { "epoch": 1.3404553354488937, "grad_norm": 0.42718973089257983, "learning_rate": 1.4710292340726506e-06, "loss": 0.17553234100341797, "step": 155025 }, { "epoch": 1.340498568970437, "grad_norm": 0.7268948644432727, "learning_rate": 1.4708539501177024e-06, "loss": 0.04511184692382812, "step": 155030 }, { "epoch": 1.3405418024919802, "grad_norm": 0.5752163966710295, "learning_rate": 1.470678673215041e-06, "loss": 0.024797916412353516, "step": 155035 }, { "epoch": 1.3405850360135234, "grad_norm": 11.377255973752602, "learning_rate": 1.4705034033654728e-06, "loss": 0.1037567138671875, "step": 155040 }, { "epoch": 1.3406282695350666, "grad_norm": 2.2342665545292064, "learning_rate": 1.470328140569809e-06, "loss": 0.1914997100830078, "step": 155045 }, { "epoch": 1.34067150305661, "grad_norm": 1.4541640382280903, "learning_rate": 1.4701528848288568e-06, "loss": 0.19435806274414064, "step": 155050 }, { "epoch": 1.3407147365781533, "grad_norm": 4.590077409353259, "learning_rate": 1.469977636143423e-06, "loss": 0.02777099609375, "step": 155055 }, { "epoch": 1.3407579700996965, "grad_norm": 1.4376697462620551, "learning_rate": 1.4698023945143185e-06, "loss": 0.01396484375, "step": 155060 }, { "epoch": 1.3408012036212398, "grad_norm": 3.9757157632909648, "learning_rate": 1.4696271599423498e-06, "loss": 0.19019088745117188, "step": 155065 }, { "epoch": 1.340844437142783, "grad_norm": 6.714312054222894, "learning_rate": 1.4694519324283257e-06, "loss": 0.03302278518676758, "step": 155070 }, { "epoch": 1.3408876706643262, "grad_norm": 9.255280498024065, "learning_rate": 1.4692767119730543e-06, "loss": 0.03257827758789063, "step": 155075 }, { "epoch": 1.3409309041858695, "grad_norm": 1.7549256664135846, "learning_rate": 1.469101498577343e-06, "loss": 0.024259185791015624, "step": 155080 }, { "epoch": 1.3409741377074127, "grad_norm": 0.08971447830009148, "learning_rate": 1.4689262922419993e-06, "loss": 0.026309967041015625, "step": 155085 }, { "epoch": 1.3410173712289561, "grad_norm": 0.5322231633781214, "learning_rate": 1.4687510929678335e-06, "loss": 0.014638328552246093, "step": 155090 }, { "epoch": 1.3410606047504994, "grad_norm": 3.4663053924408405, "learning_rate": 1.4685759007556521e-06, "loss": 0.043777847290039064, "step": 155095 }, { "epoch": 1.3411038382720426, "grad_norm": 1.3833365398797508, "learning_rate": 1.4684007156062625e-06, "loss": 0.01724395751953125, "step": 155100 }, { "epoch": 1.3411470717935858, "grad_norm": 24.31894924107183, "learning_rate": 1.4682255375204744e-06, "loss": 0.06325721740722656, "step": 155105 }, { "epoch": 1.341190305315129, "grad_norm": 2.097629416661523, "learning_rate": 1.468050366499095e-06, "loss": 0.024582672119140624, "step": 155110 }, { "epoch": 1.3412335388366725, "grad_norm": 2.5448765508546467, "learning_rate": 1.4678752025429305e-06, "loss": 0.04485244750976562, "step": 155115 }, { "epoch": 1.3412767723582157, "grad_norm": 2.838870827939093, "learning_rate": 1.4677000456527916e-06, "loss": 0.050507736206054685, "step": 155120 }, { "epoch": 1.341320005879759, "grad_norm": 15.636562450606972, "learning_rate": 1.4675248958294842e-06, "loss": 0.030926513671875, "step": 155125 }, { "epoch": 1.3413632394013022, "grad_norm": 0.6911973897289487, "learning_rate": 1.467349753073817e-06, "loss": 0.018953323364257812, "step": 155130 }, { "epoch": 1.3414064729228454, "grad_norm": 1.882386337316868, "learning_rate": 1.4671746173865971e-06, "loss": 0.024182891845703124, "step": 155135 }, { "epoch": 1.3414497064443887, "grad_norm": 2.7644445790673102, "learning_rate": 1.4669994887686328e-06, "loss": 0.03501548767089844, "step": 155140 }, { "epoch": 1.3414929399659319, "grad_norm": 89.40187267493674, "learning_rate": 1.4668243672207311e-06, "loss": 0.23227767944335936, "step": 155145 }, { "epoch": 1.3415361734874751, "grad_norm": 7.51608551148216, "learning_rate": 1.466649252743699e-06, "loss": 0.041623687744140624, "step": 155150 }, { "epoch": 1.3415794070090186, "grad_norm": 9.1317315389785, "learning_rate": 1.4664741453383447e-06, "loss": 0.05605945587158203, "step": 155155 }, { "epoch": 1.3416226405305618, "grad_norm": 0.17164217603786674, "learning_rate": 1.4662990450054776e-06, "loss": 0.04854736328125, "step": 155160 }, { "epoch": 1.341665874052105, "grad_norm": 2.3058455177009773, "learning_rate": 1.4661239517459038e-06, "loss": 0.0311279296875, "step": 155165 }, { "epoch": 1.3417091075736483, "grad_norm": 3.869840716142461, "learning_rate": 1.4659488655604306e-06, "loss": 0.0367767333984375, "step": 155170 }, { "epoch": 1.3417523410951917, "grad_norm": 1.1712162822884182, "learning_rate": 1.4657737864498655e-06, "loss": 0.21886749267578126, "step": 155175 }, { "epoch": 1.341795574616735, "grad_norm": 1.4698403118906835, "learning_rate": 1.4655987144150154e-06, "loss": 0.04334564208984375, "step": 155180 }, { "epoch": 1.3418388081382782, "grad_norm": 15.1752249955005, "learning_rate": 1.4654236494566896e-06, "loss": 0.08425788879394532, "step": 155185 }, { "epoch": 1.3418820416598214, "grad_norm": 1.618020973506277, "learning_rate": 1.465248591575694e-06, "loss": 0.17403640747070312, "step": 155190 }, { "epoch": 1.3419252751813646, "grad_norm": 0.2639907845292632, "learning_rate": 1.4650735407728366e-06, "loss": 0.0048615455627441405, "step": 155195 }, { "epoch": 1.3419685087029078, "grad_norm": 0.7704087938480493, "learning_rate": 1.4648984970489243e-06, "loss": 0.08020782470703125, "step": 155200 }, { "epoch": 1.342011742224451, "grad_norm": 1.4898296067566246, "learning_rate": 1.464723460404763e-06, "loss": 0.0416534423828125, "step": 155205 }, { "epoch": 1.3420549757459943, "grad_norm": 5.500141014935733, "learning_rate": 1.464548430841163e-06, "loss": 0.0814849853515625, "step": 155210 }, { "epoch": 1.3420982092675378, "grad_norm": 1.7350605917759523, "learning_rate": 1.464373408358929e-06, "loss": 0.17657852172851562, "step": 155215 }, { "epoch": 1.342141442789081, "grad_norm": 0.8499588298641019, "learning_rate": 1.4641983929588699e-06, "loss": 0.006201553344726563, "step": 155220 }, { "epoch": 1.3421846763106242, "grad_norm": 0.06975850608791898, "learning_rate": 1.464023384641792e-06, "loss": 0.0041103363037109375, "step": 155225 }, { "epoch": 1.3422279098321674, "grad_norm": 4.274087501508649, "learning_rate": 1.4638483834085027e-06, "loss": 0.09263191223144532, "step": 155230 }, { "epoch": 1.3422711433537107, "grad_norm": 46.28836719404179, "learning_rate": 1.463673389259809e-06, "loss": 0.09174232482910157, "step": 155235 }, { "epoch": 1.3423143768752541, "grad_norm": 27.561995366180454, "learning_rate": 1.463498402196518e-06, "loss": 0.23924331665039061, "step": 155240 }, { "epoch": 1.3423576103967974, "grad_norm": 0.46561389083479227, "learning_rate": 1.463323422219435e-06, "loss": 0.20140495300292968, "step": 155245 }, { "epoch": 1.3424008439183406, "grad_norm": 5.141903179757771, "learning_rate": 1.4631484493293701e-06, "loss": 0.09100799560546875, "step": 155250 }, { "epoch": 1.3424440774398838, "grad_norm": 3.878661684830766, "learning_rate": 1.4629734835271286e-06, "loss": 0.05660552978515625, "step": 155255 }, { "epoch": 1.342487310961427, "grad_norm": 13.7264951070626, "learning_rate": 1.462798524813518e-06, "loss": 0.13433685302734374, "step": 155260 }, { "epoch": 1.3425305444829703, "grad_norm": 0.7215278315152592, "learning_rate": 1.462623573189343e-06, "loss": 0.02885284423828125, "step": 155265 }, { "epoch": 1.3425737780045135, "grad_norm": 9.223646708195586, "learning_rate": 1.462448628655414e-06, "loss": 0.165447998046875, "step": 155270 }, { "epoch": 1.3426170115260567, "grad_norm": 4.25449664112737, "learning_rate": 1.4622736912125343e-06, "loss": 0.07498741149902344, "step": 155275 }, { "epoch": 1.3426602450476002, "grad_norm": 2.2240200232882885, "learning_rate": 1.4620987608615141e-06, "loss": 0.07643814086914062, "step": 155280 }, { "epoch": 1.3427034785691434, "grad_norm": 38.24221042677467, "learning_rate": 1.4619238376031583e-06, "loss": 0.100244140625, "step": 155285 }, { "epoch": 1.3427467120906866, "grad_norm": 0.6358481838389552, "learning_rate": 1.4617489214382737e-06, "loss": 0.024917221069335936, "step": 155290 }, { "epoch": 1.3427899456122299, "grad_norm": 6.291315766851727, "learning_rate": 1.4615740123676674e-06, "loss": 0.15595664978027343, "step": 155295 }, { "epoch": 1.342833179133773, "grad_norm": 0.5856831284315649, "learning_rate": 1.4613991103921458e-06, "loss": 0.037969970703125, "step": 155300 }, { "epoch": 1.3428764126553165, "grad_norm": 3.811942382120248, "learning_rate": 1.4612242155125154e-06, "loss": 0.14951629638671876, "step": 155305 }, { "epoch": 1.3429196461768598, "grad_norm": 2.8181674913581163, "learning_rate": 1.461049327729582e-06, "loss": 0.09039306640625, "step": 155310 }, { "epoch": 1.342962879698403, "grad_norm": 1.5164693996871847, "learning_rate": 1.4608744470441541e-06, "loss": 0.08781204223632813, "step": 155315 }, { "epoch": 1.3430061132199462, "grad_norm": 8.826365100114325, "learning_rate": 1.4606995734570375e-06, "loss": 0.0931304931640625, "step": 155320 }, { "epoch": 1.3430493467414895, "grad_norm": 0.9536799413675406, "learning_rate": 1.4605247069690371e-06, "loss": 0.019961166381835937, "step": 155325 }, { "epoch": 1.3430925802630327, "grad_norm": 0.28509718891706715, "learning_rate": 1.4603498475809616e-06, "loss": 0.02768402099609375, "step": 155330 }, { "epoch": 1.343135813784576, "grad_norm": 3.002522632443125, "learning_rate": 1.460174995293617e-06, "loss": 0.2669258117675781, "step": 155335 }, { "epoch": 1.3431790473061191, "grad_norm": 0.9098209378739559, "learning_rate": 1.460000150107808e-06, "loss": 0.015524673461914062, "step": 155340 }, { "epoch": 1.3432222808276626, "grad_norm": 3.20238789299272, "learning_rate": 1.4598253120243434e-06, "loss": 0.029474258422851562, "step": 155345 }, { "epoch": 1.3432655143492058, "grad_norm": 0.2642009462357757, "learning_rate": 1.4596504810440282e-06, "loss": 0.05457305908203125, "step": 155350 }, { "epoch": 1.343308747870749, "grad_norm": 25.668576937153716, "learning_rate": 1.459475657167669e-06, "loss": 0.05272598266601562, "step": 155355 }, { "epoch": 1.3433519813922923, "grad_norm": 0.08090449483626234, "learning_rate": 1.459300840396072e-06, "loss": 0.03706226348876953, "step": 155360 }, { "epoch": 1.3433952149138355, "grad_norm": 1.3949018851732633, "learning_rate": 1.459126030730043e-06, "loss": 0.09282951354980469, "step": 155365 }, { "epoch": 1.343438448435379, "grad_norm": 3.062574713778476, "learning_rate": 1.458951228170388e-06, "loss": 0.09016265869140624, "step": 155370 }, { "epoch": 1.3434816819569222, "grad_norm": 11.680098108204406, "learning_rate": 1.4587764327179148e-06, "loss": 0.06045608520507813, "step": 155375 }, { "epoch": 1.3435249154784654, "grad_norm": 2.8101122955770097, "learning_rate": 1.4586016443734271e-06, "loss": 0.19049644470214844, "step": 155380 }, { "epoch": 1.3435681490000086, "grad_norm": 7.37246809084961, "learning_rate": 1.4584268631377335e-06, "loss": 0.0737466812133789, "step": 155385 }, { "epoch": 1.3436113825215519, "grad_norm": 65.433369108004, "learning_rate": 1.4582520890116393e-06, "loss": 0.11549606323242187, "step": 155390 }, { "epoch": 1.343654616043095, "grad_norm": 1.5060862818245961, "learning_rate": 1.45807732199595e-06, "loss": 0.0408233642578125, "step": 155395 }, { "epoch": 1.3436978495646383, "grad_norm": 0.3146672595664707, "learning_rate": 1.457902562091472e-06, "loss": 0.09134674072265625, "step": 155400 }, { "epoch": 1.3437410830861816, "grad_norm": 1.4161362446910717, "learning_rate": 1.45772780929901e-06, "loss": 0.012513351440429688, "step": 155405 }, { "epoch": 1.343784316607725, "grad_norm": 5.231137849243639, "learning_rate": 1.4575530636193722e-06, "loss": 0.052121734619140624, "step": 155410 }, { "epoch": 1.3438275501292682, "grad_norm": 22.02217996399098, "learning_rate": 1.4573783250533631e-06, "loss": 0.05687255859375, "step": 155415 }, { "epoch": 1.3438707836508115, "grad_norm": 5.699704509014391, "learning_rate": 1.4572035936017892e-06, "loss": 0.028211212158203124, "step": 155420 }, { "epoch": 1.3439140171723547, "grad_norm": 3.6742936836266087, "learning_rate": 1.4570288692654559e-06, "loss": 0.0875762939453125, "step": 155425 }, { "epoch": 1.3439572506938982, "grad_norm": 1.5648883474005104, "learning_rate": 1.4568541520451681e-06, "loss": 0.018366241455078126, "step": 155430 }, { "epoch": 1.3440004842154414, "grad_norm": 2.409820871530532, "learning_rate": 1.4566794419417334e-06, "loss": 0.17462158203125, "step": 155435 }, { "epoch": 1.3440437177369846, "grad_norm": 51.55807003845047, "learning_rate": 1.4565047389559559e-06, "loss": 0.16931838989257814, "step": 155440 }, { "epoch": 1.3440869512585278, "grad_norm": 0.42379890727996394, "learning_rate": 1.456330043088643e-06, "loss": 0.008240890502929688, "step": 155445 }, { "epoch": 1.344130184780071, "grad_norm": 0.09942060396787686, "learning_rate": 1.4561553543406002e-06, "loss": 0.039612960815429685, "step": 155450 }, { "epoch": 1.3441734183016143, "grad_norm": 3.4963690449878935, "learning_rate": 1.4559806727126318e-06, "loss": 0.015898609161376955, "step": 155455 }, { "epoch": 1.3442166518231575, "grad_norm": 0.15932112561176454, "learning_rate": 1.4558059982055442e-06, "loss": 0.023184585571289062, "step": 155460 }, { "epoch": 1.3442598853447008, "grad_norm": 4.272000534620238, "learning_rate": 1.4556313308201418e-06, "loss": 0.0175628662109375, "step": 155465 }, { "epoch": 1.3443031188662442, "grad_norm": 11.093273097148076, "learning_rate": 1.4554566705572325e-06, "loss": 0.15036582946777344, "step": 155470 }, { "epoch": 1.3443463523877874, "grad_norm": 32.76716077670276, "learning_rate": 1.4552820174176209e-06, "loss": 0.06781902313232421, "step": 155475 }, { "epoch": 1.3443895859093307, "grad_norm": 6.412984637884578, "learning_rate": 1.4551073714021117e-06, "loss": 0.14253387451171876, "step": 155480 }, { "epoch": 1.344432819430874, "grad_norm": 4.203403127471222, "learning_rate": 1.4549327325115107e-06, "loss": 0.05191364288330078, "step": 155485 }, { "epoch": 1.3444760529524171, "grad_norm": 0.27607004507201105, "learning_rate": 1.4547581007466226e-06, "loss": 0.013033294677734375, "step": 155490 }, { "epoch": 1.3445192864739606, "grad_norm": 2.239229519184976, "learning_rate": 1.4545834761082544e-06, "loss": 0.10178413391113281, "step": 155495 }, { "epoch": 1.3445625199955038, "grad_norm": 1.0456619113846206, "learning_rate": 1.4544088585972098e-06, "loss": 0.020685577392578126, "step": 155500 }, { "epoch": 1.344605753517047, "grad_norm": 3.8550386822937557, "learning_rate": 1.4542342482142959e-06, "loss": 0.04527549743652344, "step": 155505 }, { "epoch": 1.3446489870385903, "grad_norm": 40.952960481825045, "learning_rate": 1.4540596449603172e-06, "loss": 0.2148153305053711, "step": 155510 }, { "epoch": 1.3446922205601335, "grad_norm": 0.5734992552606002, "learning_rate": 1.4538850488360788e-06, "loss": 0.07876510620117187, "step": 155515 }, { "epoch": 1.3447354540816767, "grad_norm": 0.3418406292562355, "learning_rate": 1.4537104598423858e-06, "loss": 0.07349853515625, "step": 155520 }, { "epoch": 1.34477868760322, "grad_norm": 0.5507373196150749, "learning_rate": 1.4535358779800438e-06, "loss": 0.13591327667236328, "step": 155525 }, { "epoch": 1.3448219211247632, "grad_norm": 0.14886457205943224, "learning_rate": 1.4533613032498564e-06, "loss": 0.04022674560546875, "step": 155530 }, { "epoch": 1.3448651546463066, "grad_norm": 20.616130972429644, "learning_rate": 1.453186735652631e-06, "loss": 0.048080825805664064, "step": 155535 }, { "epoch": 1.3449083881678499, "grad_norm": 7.023562409143462, "learning_rate": 1.4530121751891718e-06, "loss": 0.0372039794921875, "step": 155540 }, { "epoch": 1.344951621689393, "grad_norm": 6.518853041145882, "learning_rate": 1.4528376218602836e-06, "loss": 0.25177764892578125, "step": 155545 }, { "epoch": 1.3449948552109363, "grad_norm": 11.094484127605126, "learning_rate": 1.4526630756667709e-06, "loss": 0.01940498352050781, "step": 155550 }, { "epoch": 1.3450380887324795, "grad_norm": 12.375958317000205, "learning_rate": 1.4524885366094402e-06, "loss": 0.10800628662109375, "step": 155555 }, { "epoch": 1.345081322254023, "grad_norm": 2.1744865174026944, "learning_rate": 1.4523140046890956e-06, "loss": 0.03875274658203125, "step": 155560 }, { "epoch": 1.3451245557755662, "grad_norm": 13.435627441463376, "learning_rate": 1.452139479906541e-06, "loss": 0.1304485321044922, "step": 155565 }, { "epoch": 1.3451677892971095, "grad_norm": 9.984709129719889, "learning_rate": 1.4519649622625836e-06, "loss": 0.1001739501953125, "step": 155570 }, { "epoch": 1.3452110228186527, "grad_norm": 0.6325870168763176, "learning_rate": 1.4517904517580265e-06, "loss": 0.2294281005859375, "step": 155575 }, { "epoch": 1.345254256340196, "grad_norm": 1.185032022356395, "learning_rate": 1.4516159483936758e-06, "loss": 0.09781341552734375, "step": 155580 }, { "epoch": 1.3452974898617391, "grad_norm": 0.15562615981439742, "learning_rate": 1.451441452170335e-06, "loss": 0.06186370849609375, "step": 155585 }, { "epoch": 1.3453407233832824, "grad_norm": 0.6043314650241305, "learning_rate": 1.4512669630888094e-06, "loss": 0.01828346252441406, "step": 155590 }, { "epoch": 1.3453839569048256, "grad_norm": 3.4497182032028, "learning_rate": 1.451092481149903e-06, "loss": 0.027928924560546874, "step": 155595 }, { "epoch": 1.345427190426369, "grad_norm": 45.427994395325165, "learning_rate": 1.4509180063544217e-06, "loss": 0.17142410278320314, "step": 155600 }, { "epoch": 1.3454704239479123, "grad_norm": 0.7914321750974873, "learning_rate": 1.450743538703169e-06, "loss": 0.005176162719726563, "step": 155605 }, { "epoch": 1.3455136574694555, "grad_norm": 0.6921965299590708, "learning_rate": 1.450569078196951e-06, "loss": 0.0168701171875, "step": 155610 }, { "epoch": 1.3455568909909987, "grad_norm": 2.4677171073908095, "learning_rate": 1.4503946248365718e-06, "loss": 0.031258773803710935, "step": 155615 }, { "epoch": 1.345600124512542, "grad_norm": 50.54235413227002, "learning_rate": 1.4502201786228359e-06, "loss": 0.249993896484375, "step": 155620 }, { "epoch": 1.3456433580340854, "grad_norm": 0.2093924172742376, "learning_rate": 1.4500457395565458e-06, "loss": 0.03466639518737793, "step": 155625 }, { "epoch": 1.3456865915556286, "grad_norm": 0.5243464193049665, "learning_rate": 1.4498713076385094e-06, "loss": 0.057680511474609376, "step": 155630 }, { "epoch": 1.3457298250771719, "grad_norm": 51.51947660630902, "learning_rate": 1.4496968828695295e-06, "loss": 0.37370147705078127, "step": 155635 }, { "epoch": 1.345773058598715, "grad_norm": 0.4691650133849136, "learning_rate": 1.4495224652504106e-06, "loss": 0.0352020263671875, "step": 155640 }, { "epoch": 1.3458162921202583, "grad_norm": 1.8350342452040564, "learning_rate": 1.449348054781957e-06, "loss": 0.05872039794921875, "step": 155645 }, { "epoch": 1.3458595256418016, "grad_norm": 4.141987604785201, "learning_rate": 1.449173651464973e-06, "loss": 0.14155349731445313, "step": 155650 }, { "epoch": 1.3459027591633448, "grad_norm": 3.922186318859183, "learning_rate": 1.4489992553002621e-06, "loss": 0.089971923828125, "step": 155655 }, { "epoch": 1.3459459926848882, "grad_norm": 17.99389205610604, "learning_rate": 1.448824866288631e-06, "loss": 0.21168746948242187, "step": 155660 }, { "epoch": 1.3459892262064315, "grad_norm": 11.534692297166904, "learning_rate": 1.4486504844308812e-06, "loss": 0.03849334716796875, "step": 155665 }, { "epoch": 1.3460324597279747, "grad_norm": 1.1601552915295956, "learning_rate": 1.4484761097278192e-06, "loss": 0.0636138916015625, "step": 155670 }, { "epoch": 1.346075693249518, "grad_norm": 1.9186140748196103, "learning_rate": 1.4483017421802485e-06, "loss": 0.19377899169921875, "step": 155675 }, { "epoch": 1.3461189267710612, "grad_norm": 0.27302922348393915, "learning_rate": 1.448127381788973e-06, "loss": 0.021624183654785155, "step": 155680 }, { "epoch": 1.3461621602926046, "grad_norm": 2.6184396434320436, "learning_rate": 1.447953028554797e-06, "loss": 0.0657989501953125, "step": 155685 }, { "epoch": 1.3462053938141478, "grad_norm": 0.4805750673509976, "learning_rate": 1.4477786824785233e-06, "loss": 0.06151142120361328, "step": 155690 }, { "epoch": 1.346248627335691, "grad_norm": 0.597114535295981, "learning_rate": 1.4476043435609585e-06, "loss": 0.08788986206054687, "step": 155695 }, { "epoch": 1.3462918608572343, "grad_norm": 0.33923920671381863, "learning_rate": 1.4474300118029047e-06, "loss": 0.040996551513671875, "step": 155700 }, { "epoch": 1.3463350943787775, "grad_norm": 6.28658402707268, "learning_rate": 1.447255687205167e-06, "loss": 0.013574981689453125, "step": 155705 }, { "epoch": 1.3463783279003207, "grad_norm": 17.96507386828242, "learning_rate": 1.4470813697685487e-06, "loss": 0.1199981689453125, "step": 155710 }, { "epoch": 1.346421561421864, "grad_norm": 0.16167207802206532, "learning_rate": 1.4469070594938528e-06, "loss": 0.04875106811523437, "step": 155715 }, { "epoch": 1.3464647949434072, "grad_norm": 8.119179545090375, "learning_rate": 1.4467327563818855e-06, "loss": 0.02725067138671875, "step": 155720 }, { "epoch": 1.3465080284649507, "grad_norm": 3.0695590312022523, "learning_rate": 1.446558460433448e-06, "loss": 0.03483848571777344, "step": 155725 }, { "epoch": 1.3465512619864939, "grad_norm": 3.658270675059359, "learning_rate": 1.4463841716493472e-06, "loss": 0.08386955261230469, "step": 155730 }, { "epoch": 1.3465944955080371, "grad_norm": 1.1341624620170199, "learning_rate": 1.4462098900303853e-06, "loss": 0.04460906982421875, "step": 155735 }, { "epoch": 1.3466377290295803, "grad_norm": 13.590839116791141, "learning_rate": 1.446035615577366e-06, "loss": 0.09955291748046875, "step": 155740 }, { "epoch": 1.3466809625511236, "grad_norm": 14.890164230336802, "learning_rate": 1.4458613482910928e-06, "loss": 0.104583740234375, "step": 155745 }, { "epoch": 1.346724196072667, "grad_norm": 34.90014438334132, "learning_rate": 1.4456870881723703e-06, "loss": 0.09326629638671875, "step": 155750 }, { "epoch": 1.3467674295942103, "grad_norm": 0.5020729461167348, "learning_rate": 1.445512835222e-06, "loss": 0.01902456283569336, "step": 155755 }, { "epoch": 1.3468106631157535, "grad_norm": 9.09736419787464, "learning_rate": 1.4453385894407886e-06, "loss": 0.11192474365234376, "step": 155760 }, { "epoch": 1.3468538966372967, "grad_norm": 0.560618672889655, "learning_rate": 1.4451643508295381e-06, "loss": 0.072149658203125, "step": 155765 }, { "epoch": 1.34689713015884, "grad_norm": 0.8296029818202888, "learning_rate": 1.444990119389051e-06, "loss": 0.056597900390625, "step": 155770 }, { "epoch": 1.3469403636803832, "grad_norm": 2.8385566758135172, "learning_rate": 1.4448158951201333e-06, "loss": 0.02060546875, "step": 155775 }, { "epoch": 1.3469835972019264, "grad_norm": 17.057737730786435, "learning_rate": 1.4446416780235874e-06, "loss": 0.02996959686279297, "step": 155780 }, { "epoch": 1.3470268307234696, "grad_norm": 4.696915148224212, "learning_rate": 1.4444674681002155e-06, "loss": 0.06728363037109375, "step": 155785 }, { "epoch": 1.347070064245013, "grad_norm": 0.9149391876366149, "learning_rate": 1.4442932653508232e-06, "loss": 0.15898971557617186, "step": 155790 }, { "epoch": 1.3471132977665563, "grad_norm": 1.3553527215382537, "learning_rate": 1.444119069776213e-06, "loss": 0.03764076232910156, "step": 155795 }, { "epoch": 1.3471565312880995, "grad_norm": 0.3475334756883575, "learning_rate": 1.443944881377188e-06, "loss": 0.0135467529296875, "step": 155800 }, { "epoch": 1.3471997648096428, "grad_norm": 2.9072580749539805, "learning_rate": 1.4437707001545516e-06, "loss": 0.016819000244140625, "step": 155805 }, { "epoch": 1.347242998331186, "grad_norm": 9.524304229281299, "learning_rate": 1.4435965261091077e-06, "loss": 0.12653121948242188, "step": 155810 }, { "epoch": 1.3472862318527294, "grad_norm": 16.117640452349566, "learning_rate": 1.4434223592416588e-06, "loss": 0.04639854431152344, "step": 155815 }, { "epoch": 1.3473294653742727, "grad_norm": 79.66034195832238, "learning_rate": 1.443248199553007e-06, "loss": 0.20740242004394532, "step": 155820 }, { "epoch": 1.347372698895816, "grad_norm": 0.2762947399093382, "learning_rate": 1.4430740470439587e-06, "loss": 0.08024959564208985, "step": 155825 }, { "epoch": 1.3474159324173591, "grad_norm": 0.6622698746185857, "learning_rate": 1.442899901715314e-06, "loss": 0.034225082397460936, "step": 155830 }, { "epoch": 1.3474591659389024, "grad_norm": 6.019624402212063, "learning_rate": 1.4427257635678781e-06, "loss": 0.0254974365234375, "step": 155835 }, { "epoch": 1.3475023994604456, "grad_norm": 62.310987036406296, "learning_rate": 1.442551632602454e-06, "loss": 0.18245887756347656, "step": 155840 }, { "epoch": 1.3475456329819888, "grad_norm": 30.027547847710423, "learning_rate": 1.4423775088198437e-06, "loss": 0.1178924560546875, "step": 155845 }, { "epoch": 1.347588866503532, "grad_norm": 2.8054866306901993, "learning_rate": 1.4422033922208496e-06, "loss": 0.049273681640625, "step": 155850 }, { "epoch": 1.3476321000250755, "grad_norm": 0.41406980702968266, "learning_rate": 1.442029282806277e-06, "loss": 0.02557029724121094, "step": 155855 }, { "epoch": 1.3476753335466187, "grad_norm": 0.18130596238076027, "learning_rate": 1.4418551805769279e-06, "loss": 0.046899032592773435, "step": 155860 }, { "epoch": 1.347718567068162, "grad_norm": 0.09370359717733, "learning_rate": 1.4416810855336047e-06, "loss": 0.012368011474609374, "step": 155865 }, { "epoch": 1.3477618005897052, "grad_norm": 1.5255988540437635, "learning_rate": 1.4415069976771107e-06, "loss": 0.101861572265625, "step": 155870 }, { "epoch": 1.3478050341112486, "grad_norm": 6.547077754076271, "learning_rate": 1.441332917008249e-06, "loss": 0.045925140380859375, "step": 155875 }, { "epoch": 1.3478482676327919, "grad_norm": 0.09211399370598072, "learning_rate": 1.4411588435278205e-06, "loss": 0.0266754150390625, "step": 155880 }, { "epoch": 1.347891501154335, "grad_norm": 1.301267109360711, "learning_rate": 1.4409847772366314e-06, "loss": 0.011910247802734374, "step": 155885 }, { "epoch": 1.3479347346758783, "grad_norm": 9.459984136554771, "learning_rate": 1.4408107181354814e-06, "loss": 0.03511123657226563, "step": 155890 }, { "epoch": 1.3479779681974215, "grad_norm": 4.109227099123729, "learning_rate": 1.4406366662251753e-06, "loss": 0.010574722290039062, "step": 155895 }, { "epoch": 1.3480212017189648, "grad_norm": 0.6511068493806015, "learning_rate": 1.4404626215065156e-06, "loss": 0.03953399658203125, "step": 155900 }, { "epoch": 1.348064435240508, "grad_norm": 4.2515362740884335, "learning_rate": 1.4402885839803045e-06, "loss": 0.09789581298828125, "step": 155905 }, { "epoch": 1.3481076687620512, "grad_norm": 6.333284998238092, "learning_rate": 1.4401145536473447e-06, "loss": 0.03280487060546875, "step": 155910 }, { "epoch": 1.3481509022835947, "grad_norm": 3.1426268067662857, "learning_rate": 1.439940530508437e-06, "loss": 0.035968780517578125, "step": 155915 }, { "epoch": 1.348194135805138, "grad_norm": 18.60251941868777, "learning_rate": 1.4397665145643874e-06, "loss": 0.08859710693359375, "step": 155920 }, { "epoch": 1.3482373693266811, "grad_norm": 19.219520191375544, "learning_rate": 1.4395925058159966e-06, "loss": 0.1812774658203125, "step": 155925 }, { "epoch": 1.3482806028482244, "grad_norm": 1.2855244045961767, "learning_rate": 1.4394185042640673e-06, "loss": 0.12817344665527344, "step": 155930 }, { "epoch": 1.3483238363697676, "grad_norm": 1.017998806106859, "learning_rate": 1.439244509909402e-06, "loss": 0.022312164306640625, "step": 155935 }, { "epoch": 1.348367069891311, "grad_norm": 0.6434971605331717, "learning_rate": 1.439070522752802e-06, "loss": 0.12423210144042969, "step": 155940 }, { "epoch": 1.3484103034128543, "grad_norm": 11.211404103636639, "learning_rate": 1.4388965427950707e-06, "loss": 0.046509933471679685, "step": 155945 }, { "epoch": 1.3484535369343975, "grad_norm": 1.1760913215529458, "learning_rate": 1.4387225700370118e-06, "loss": 0.013078022003173827, "step": 155950 }, { "epoch": 1.3484967704559407, "grad_norm": 4.337176186458729, "learning_rate": 1.4385486044794268e-06, "loss": 0.13049545288085937, "step": 155955 }, { "epoch": 1.348540003977484, "grad_norm": 15.295285653150009, "learning_rate": 1.4383746461231173e-06, "loss": 0.12444114685058594, "step": 155960 }, { "epoch": 1.3485832374990272, "grad_norm": 0.6047897315322013, "learning_rate": 1.4382006949688857e-06, "loss": 0.045825958251953125, "step": 155965 }, { "epoch": 1.3486264710205704, "grad_norm": 5.234806804589089, "learning_rate": 1.438026751017535e-06, "loss": 0.025272941589355467, "step": 155970 }, { "epoch": 1.3486697045421137, "grad_norm": 0.449081286752375, "learning_rate": 1.4378528142698653e-06, "loss": 0.06265449523925781, "step": 155975 }, { "epoch": 1.348712938063657, "grad_norm": 2.624713800056733, "learning_rate": 1.4376788847266817e-06, "loss": 0.1838043212890625, "step": 155980 }, { "epoch": 1.3487561715852003, "grad_norm": 1.0183986460076226, "learning_rate": 1.4375049623887852e-06, "loss": 0.008119583129882812, "step": 155985 }, { "epoch": 1.3487994051067436, "grad_norm": 20.706713050576273, "learning_rate": 1.4373310472569776e-06, "loss": 0.10962905883789062, "step": 155990 }, { "epoch": 1.3488426386282868, "grad_norm": 22.103340056861885, "learning_rate": 1.4371571393320602e-06, "loss": 0.1055419921875, "step": 155995 }, { "epoch": 1.34888587214983, "grad_norm": 3.095187044660056, "learning_rate": 1.4369832386148368e-06, "loss": 0.0277740478515625, "step": 156000 }, { "epoch": 1.3489291056713735, "grad_norm": 0.7246442610803548, "learning_rate": 1.436809345106109e-06, "loss": 0.24455223083496094, "step": 156005 }, { "epoch": 1.3489723391929167, "grad_norm": 1.3957538752855552, "learning_rate": 1.436635458806677e-06, "loss": 0.020180892944335938, "step": 156010 }, { "epoch": 1.34901557271446, "grad_norm": 12.17429538245212, "learning_rate": 1.436461579717345e-06, "loss": 0.04048576354980469, "step": 156015 }, { "epoch": 1.3490588062360032, "grad_norm": 0.10365904212589974, "learning_rate": 1.4362877078389145e-06, "loss": 0.02315177917480469, "step": 156020 }, { "epoch": 1.3491020397575464, "grad_norm": 1.4061819982520507, "learning_rate": 1.4361138431721866e-06, "loss": 0.05465621948242187, "step": 156025 }, { "epoch": 1.3491452732790896, "grad_norm": 0.16518540109598517, "learning_rate": 1.4359399857179635e-06, "loss": 0.03296737670898438, "step": 156030 }, { "epoch": 1.3491885068006328, "grad_norm": 1.0077283492952749, "learning_rate": 1.435766135477047e-06, "loss": 0.32077560424804685, "step": 156035 }, { "epoch": 1.349231740322176, "grad_norm": 6.977663365841081, "learning_rate": 1.435592292450238e-06, "loss": 0.06290817260742188, "step": 156040 }, { "epoch": 1.3492749738437195, "grad_norm": 0.7846862574873527, "learning_rate": 1.4354184566383398e-06, "loss": 0.05248565673828125, "step": 156045 }, { "epoch": 1.3493182073652628, "grad_norm": 0.5135638769824082, "learning_rate": 1.435244628042154e-06, "loss": 0.04709548950195312, "step": 156050 }, { "epoch": 1.349361440886806, "grad_norm": 1.9882040592869739, "learning_rate": 1.4350708066624801e-06, "loss": 0.18170700073242188, "step": 156055 }, { "epoch": 1.3494046744083492, "grad_norm": 1.390230936268437, "learning_rate": 1.4348969925001225e-06, "loss": 0.22628936767578126, "step": 156060 }, { "epoch": 1.3494479079298924, "grad_norm": 2.0646877620220216, "learning_rate": 1.4347231855558818e-06, "loss": 0.03731842041015625, "step": 156065 }, { "epoch": 1.349491141451436, "grad_norm": 1.9632639973579886, "learning_rate": 1.4345493858305582e-06, "loss": 0.030901336669921876, "step": 156070 }, { "epoch": 1.3495343749729791, "grad_norm": 17.182560086602816, "learning_rate": 1.434375593324956e-06, "loss": 0.09660682678222657, "step": 156075 }, { "epoch": 1.3495776084945224, "grad_norm": 10.214502954728648, "learning_rate": 1.434201808039875e-06, "loss": 0.013299942016601562, "step": 156080 }, { "epoch": 1.3496208420160656, "grad_norm": 2.7851423340791057, "learning_rate": 1.4340280299761168e-06, "loss": 0.07163925170898437, "step": 156085 }, { "epoch": 1.3496640755376088, "grad_norm": 127.1321267651293, "learning_rate": 1.4338542591344831e-06, "loss": 0.1955780029296875, "step": 156090 }, { "epoch": 1.349707309059152, "grad_norm": 5.177849383614127, "learning_rate": 1.4336804955157753e-06, "loss": 0.028354644775390625, "step": 156095 }, { "epoch": 1.3497505425806953, "grad_norm": 1.3123489812241576, "learning_rate": 1.4335067391207944e-06, "loss": 0.054169845581054685, "step": 156100 }, { "epoch": 1.3497937761022385, "grad_norm": 0.9595477149046756, "learning_rate": 1.433332989950341e-06, "loss": 0.0219879150390625, "step": 156105 }, { "epoch": 1.349837009623782, "grad_norm": 0.29914578366426564, "learning_rate": 1.4331592480052184e-06, "loss": 0.0030301094055175783, "step": 156110 }, { "epoch": 1.3498802431453252, "grad_norm": 0.4311105603506939, "learning_rate": 1.432985513286226e-06, "loss": 0.020229911804199217, "step": 156115 }, { "epoch": 1.3499234766668684, "grad_norm": 8.20111086851946, "learning_rate": 1.4328117857941668e-06, "loss": 0.06322021484375, "step": 156120 }, { "epoch": 1.3499667101884116, "grad_norm": 1.7845208197471545, "learning_rate": 1.4326380655298413e-06, "loss": 0.2333892822265625, "step": 156125 }, { "epoch": 1.350009943709955, "grad_norm": 0.7653662219221871, "learning_rate": 1.4324643524940507e-06, "loss": 0.02868499755859375, "step": 156130 }, { "epoch": 1.3500531772314983, "grad_norm": 0.5809370725035744, "learning_rate": 1.4322906466875946e-06, "loss": 0.03996429443359375, "step": 156135 }, { "epoch": 1.3500964107530415, "grad_norm": 17.148273485268213, "learning_rate": 1.432116948111277e-06, "loss": 0.16356658935546875, "step": 156140 }, { "epoch": 1.3501396442745848, "grad_norm": 15.494989532064771, "learning_rate": 1.4319432567658971e-06, "loss": 0.08202037811279297, "step": 156145 }, { "epoch": 1.350182877796128, "grad_norm": 3.8130670532031727, "learning_rate": 1.4317695726522564e-06, "loss": 0.10484619140625, "step": 156150 }, { "epoch": 1.3502261113176712, "grad_norm": 3.8746694425428645, "learning_rate": 1.4315958957711562e-06, "loss": 0.021345901489257812, "step": 156155 }, { "epoch": 1.3502693448392145, "grad_norm": 5.309368131788732, "learning_rate": 1.4314222261233967e-06, "loss": 0.031743621826171874, "step": 156160 }, { "epoch": 1.3503125783607577, "grad_norm": 3.4602732740644013, "learning_rate": 1.4312485637097785e-06, "loss": 0.04646987915039062, "step": 156165 }, { "epoch": 1.3503558118823011, "grad_norm": 4.0438685043163805, "learning_rate": 1.431074908531103e-06, "loss": 0.06691436767578125, "step": 156170 }, { "epoch": 1.3503990454038444, "grad_norm": 0.4888196110447014, "learning_rate": 1.4309012605881727e-06, "loss": 0.02147331237792969, "step": 156175 }, { "epoch": 1.3504422789253876, "grad_norm": 2.461190487196845, "learning_rate": 1.4307276198817874e-06, "loss": 0.0388824462890625, "step": 156180 }, { "epoch": 1.3504855124469308, "grad_norm": 3.574735326282843, "learning_rate": 1.4305539864127474e-06, "loss": 0.053453826904296876, "step": 156185 }, { "epoch": 1.350528745968474, "grad_norm": 0.20818751513164632, "learning_rate": 1.4303803601818535e-06, "loss": 0.010297203063964843, "step": 156190 }, { "epoch": 1.3505719794900175, "grad_norm": 3.7298259771105435, "learning_rate": 1.430206741189907e-06, "loss": 0.09401798248291016, "step": 156195 }, { "epoch": 1.3506152130115607, "grad_norm": 1.158717550473783, "learning_rate": 1.4300331294377073e-06, "loss": 0.033893585205078125, "step": 156200 }, { "epoch": 1.350658446533104, "grad_norm": 0.05559643821856018, "learning_rate": 1.4298595249260567e-06, "loss": 0.01275177001953125, "step": 156205 }, { "epoch": 1.3507016800546472, "grad_norm": 12.717195271109071, "learning_rate": 1.4296859276557555e-06, "loss": 0.07364730834960938, "step": 156210 }, { "epoch": 1.3507449135761904, "grad_norm": 0.6108941301839396, "learning_rate": 1.4295123376276044e-06, "loss": 0.03809967041015625, "step": 156215 }, { "epoch": 1.3507881470977336, "grad_norm": 0.4634816961501864, "learning_rate": 1.429338754842402e-06, "loss": 0.008055877685546876, "step": 156220 }, { "epoch": 1.3508313806192769, "grad_norm": 10.11836027484657, "learning_rate": 1.4291651793009517e-06, "loss": 0.042274856567382814, "step": 156225 }, { "epoch": 1.35087461414082, "grad_norm": 0.7138780465966997, "learning_rate": 1.428991611004052e-06, "loss": 0.015249252319335938, "step": 156230 }, { "epoch": 1.3509178476623636, "grad_norm": 3.1104916682412393, "learning_rate": 1.4288180499525048e-06, "loss": 0.015629959106445313, "step": 156235 }, { "epoch": 1.3509610811839068, "grad_norm": 2.02576008921516, "learning_rate": 1.4286444961471102e-06, "loss": 0.014938735961914062, "step": 156240 }, { "epoch": 1.35100431470545, "grad_norm": 0.7318644678121804, "learning_rate": 1.4284709495886683e-06, "loss": 0.009070587158203126, "step": 156245 }, { "epoch": 1.3510475482269932, "grad_norm": 1.0542383979909407, "learning_rate": 1.4282974102779794e-06, "loss": 0.04286956787109375, "step": 156250 }, { "epoch": 1.3510907817485365, "grad_norm": 8.774782489248835, "learning_rate": 1.4281238782158443e-06, "loss": 0.05399761199951172, "step": 156255 }, { "epoch": 1.35113401527008, "grad_norm": 50.32477701095313, "learning_rate": 1.4279503534030629e-06, "loss": 0.2510650634765625, "step": 156260 }, { "epoch": 1.3511772487916232, "grad_norm": 101.51261912887706, "learning_rate": 1.4277768358404342e-06, "loss": 0.30936737060546876, "step": 156265 }, { "epoch": 1.3512204823131664, "grad_norm": 5.111992157621282, "learning_rate": 1.4276033255287612e-06, "loss": 0.027533721923828126, "step": 156270 }, { "epoch": 1.3512637158347096, "grad_norm": 11.461414737626228, "learning_rate": 1.4274298224688425e-06, "loss": 0.10140914916992187, "step": 156275 }, { "epoch": 1.3513069493562528, "grad_norm": 5.592538217345249, "learning_rate": 1.4272563266614775e-06, "loss": 0.06583232879638672, "step": 156280 }, { "epoch": 1.351350182877796, "grad_norm": 3.018978920470273, "learning_rate": 1.4270828381074682e-06, "loss": 0.01836700439453125, "step": 156285 }, { "epoch": 1.3513934163993393, "grad_norm": 13.483002976315403, "learning_rate": 1.426909356807614e-06, "loss": 0.0728973388671875, "step": 156290 }, { "epoch": 1.3514366499208825, "grad_norm": 0.3577904825840476, "learning_rate": 1.426735882762714e-06, "loss": 0.19634170532226564, "step": 156295 }, { "epoch": 1.351479883442426, "grad_norm": 0.7226890311460595, "learning_rate": 1.4265624159735697e-06, "loss": 0.039997100830078125, "step": 156300 }, { "epoch": 1.3515231169639692, "grad_norm": 19.96570781448831, "learning_rate": 1.4263889564409808e-06, "loss": 0.11593780517578126, "step": 156305 }, { "epoch": 1.3515663504855124, "grad_norm": 1.7753025536149747, "learning_rate": 1.4262155041657471e-06, "loss": 0.020863151550292967, "step": 156310 }, { "epoch": 1.3516095840070557, "grad_norm": 0.6418090836679353, "learning_rate": 1.4260420591486685e-06, "loss": 0.1269052505493164, "step": 156315 }, { "epoch": 1.351652817528599, "grad_norm": 0.6634461939839068, "learning_rate": 1.4258686213905448e-06, "loss": 0.014154052734375, "step": 156320 }, { "epoch": 1.3516960510501423, "grad_norm": 44.52838744650398, "learning_rate": 1.4256951908921746e-06, "loss": 0.12683486938476562, "step": 156325 }, { "epoch": 1.3517392845716856, "grad_norm": 1.1265945183121109, "learning_rate": 1.4255217676543603e-06, "loss": 0.005716705322265625, "step": 156330 }, { "epoch": 1.3517825180932288, "grad_norm": 17.365686655810336, "learning_rate": 1.4253483516779005e-06, "loss": 0.074920654296875, "step": 156335 }, { "epoch": 1.351825751614772, "grad_norm": 1.2808366083573848, "learning_rate": 1.4251749429635939e-06, "loss": 0.0530792236328125, "step": 156340 }, { "epoch": 1.3518689851363153, "grad_norm": 1.313118918138762, "learning_rate": 1.4250015415122422e-06, "loss": 0.13763427734375, "step": 156345 }, { "epoch": 1.3519122186578585, "grad_norm": 17.324672140010662, "learning_rate": 1.4248281473246445e-06, "loss": 0.13486404418945314, "step": 156350 }, { "epoch": 1.3519554521794017, "grad_norm": 4.856346371513083, "learning_rate": 1.4246547604016e-06, "loss": 0.020794677734375, "step": 156355 }, { "epoch": 1.351998685700945, "grad_norm": 7.27765061433345, "learning_rate": 1.4244813807439074e-06, "loss": 0.044896697998046874, "step": 156360 }, { "epoch": 1.3520419192224884, "grad_norm": 2.23358335507175, "learning_rate": 1.4243080083523686e-06, "loss": 0.03744163513183594, "step": 156365 }, { "epoch": 1.3520851527440316, "grad_norm": 0.7428812520200881, "learning_rate": 1.4241346432277822e-06, "loss": 0.19693145751953126, "step": 156370 }, { "epoch": 1.3521283862655749, "grad_norm": 22.938468820550476, "learning_rate": 1.4239612853709476e-06, "loss": 0.07464218139648438, "step": 156375 }, { "epoch": 1.352171619787118, "grad_norm": 0.48769845073673634, "learning_rate": 1.4237879347826643e-06, "loss": 0.0935760498046875, "step": 156380 }, { "epoch": 1.3522148533086615, "grad_norm": 0.7126486817430325, "learning_rate": 1.4236145914637314e-06, "loss": 0.04668712615966797, "step": 156385 }, { "epoch": 1.3522580868302048, "grad_norm": 0.15650288972035017, "learning_rate": 1.4234412554149477e-06, "loss": 0.04797439575195313, "step": 156390 }, { "epoch": 1.352301320351748, "grad_norm": 7.921682466949649, "learning_rate": 1.423267926637114e-06, "loss": 0.0425506591796875, "step": 156395 }, { "epoch": 1.3523445538732912, "grad_norm": 6.027004384648272, "learning_rate": 1.4230946051310303e-06, "loss": 0.0213043212890625, "step": 156400 }, { "epoch": 1.3523877873948345, "grad_norm": 0.021023453616194225, "learning_rate": 1.4229212908974946e-06, "loss": 0.02660083770751953, "step": 156405 }, { "epoch": 1.3524310209163777, "grad_norm": 17.500989798308513, "learning_rate": 1.4227479839373068e-06, "loss": 0.04375267028808594, "step": 156410 }, { "epoch": 1.352474254437921, "grad_norm": 1.0320945151538439, "learning_rate": 1.422574684251266e-06, "loss": 0.01358795166015625, "step": 156415 }, { "epoch": 1.3525174879594641, "grad_norm": 1.4005430445770126, "learning_rate": 1.42240139184017e-06, "loss": 0.05147705078125, "step": 156420 }, { "epoch": 1.3525607214810076, "grad_norm": 0.152333618515783, "learning_rate": 1.4222281067048208e-06, "loss": 0.012314987182617188, "step": 156425 }, { "epoch": 1.3526039550025508, "grad_norm": 4.3374853373022635, "learning_rate": 1.422054828846016e-06, "loss": 0.04682235717773438, "step": 156430 }, { "epoch": 1.352647188524094, "grad_norm": 0.5947476168843131, "learning_rate": 1.421881558264555e-06, "loss": 0.03673553466796875, "step": 156435 }, { "epoch": 1.3526904220456373, "grad_norm": 4.039995933840361, "learning_rate": 1.4217082949612366e-06, "loss": 0.05064125061035156, "step": 156440 }, { "epoch": 1.3527336555671805, "grad_norm": 26.36563474073407, "learning_rate": 1.4215350389368594e-06, "loss": 0.15758132934570312, "step": 156445 }, { "epoch": 1.352776889088724, "grad_norm": 0.41137836692555657, "learning_rate": 1.4213617901922235e-06, "loss": 0.021062469482421874, "step": 156450 }, { "epoch": 1.3528201226102672, "grad_norm": 19.60507456561361, "learning_rate": 1.421188548728127e-06, "loss": 0.05755691528320313, "step": 156455 }, { "epoch": 1.3528633561318104, "grad_norm": 0.8104923725484493, "learning_rate": 1.4210153145453705e-06, "loss": 0.082379150390625, "step": 156460 }, { "epoch": 1.3529065896533536, "grad_norm": 2.580912702059445, "learning_rate": 1.4208420876447519e-06, "loss": 0.022875213623046876, "step": 156465 }, { "epoch": 1.3529498231748969, "grad_norm": 6.363399040952443, "learning_rate": 1.4206688680270694e-06, "loss": 0.0944127082824707, "step": 156470 }, { "epoch": 1.35299305669644, "grad_norm": 38.66108586365609, "learning_rate": 1.4204956556931233e-06, "loss": 0.1041499137878418, "step": 156475 }, { "epoch": 1.3530362902179833, "grad_norm": 4.4166740075295925, "learning_rate": 1.4203224506437113e-06, "loss": 0.09234466552734374, "step": 156480 }, { "epoch": 1.3530795237395266, "grad_norm": 73.505577125458, "learning_rate": 1.4201492528796315e-06, "loss": 0.15580596923828124, "step": 156485 }, { "epoch": 1.35312275726107, "grad_norm": 4.527427521713847, "learning_rate": 1.419976062401685e-06, "loss": 0.02730560302734375, "step": 156490 }, { "epoch": 1.3531659907826132, "grad_norm": 3.5254800592835194, "learning_rate": 1.419802879210669e-06, "loss": 0.024057960510253905, "step": 156495 }, { "epoch": 1.3532092243041565, "grad_norm": 0.03908390707434053, "learning_rate": 1.419629703307383e-06, "loss": 0.07185783386230468, "step": 156500 }, { "epoch": 1.3532524578256997, "grad_norm": 3.2846309299226304, "learning_rate": 1.4194565346926235e-06, "loss": 0.08996429443359374, "step": 156505 }, { "epoch": 1.353295691347243, "grad_norm": 8.788441579084484, "learning_rate": 1.4192833733671924e-06, "loss": 0.1167633056640625, "step": 156510 }, { "epoch": 1.3533389248687864, "grad_norm": 18.59527332330989, "learning_rate": 1.4191102193318864e-06, "loss": 0.07008094787597656, "step": 156515 }, { "epoch": 1.3533821583903296, "grad_norm": 0.534295103683512, "learning_rate": 1.4189370725875032e-06, "loss": 0.08908615112304688, "step": 156520 }, { "epoch": 1.3534253919118728, "grad_norm": 27.020702609177913, "learning_rate": 1.4187639331348438e-06, "loss": 0.21623878479003905, "step": 156525 }, { "epoch": 1.353468625433416, "grad_norm": 6.377752621877169, "learning_rate": 1.4185908009747055e-06, "loss": 0.058626556396484376, "step": 156530 }, { "epoch": 1.3535118589549593, "grad_norm": 28.416084066753676, "learning_rate": 1.4184176761078864e-06, "loss": 0.07643508911132812, "step": 156535 }, { "epoch": 1.3535550924765025, "grad_norm": 37.978218999602895, "learning_rate": 1.4182445585351854e-06, "loss": 0.06900539398193359, "step": 156540 }, { "epoch": 1.3535983259980457, "grad_norm": 4.467416132260693, "learning_rate": 1.4180714482574008e-06, "loss": 0.030451583862304687, "step": 156545 }, { "epoch": 1.353641559519589, "grad_norm": 10.801732000412363, "learning_rate": 1.4178983452753294e-06, "loss": 0.07705230712890625, "step": 156550 }, { "epoch": 1.3536847930411324, "grad_norm": 0.43679682766210887, "learning_rate": 1.4177252495897727e-06, "loss": 0.14072723388671876, "step": 156555 }, { "epoch": 1.3537280265626757, "grad_norm": 21.900544990798274, "learning_rate": 1.4175521612015271e-06, "loss": 0.1931488037109375, "step": 156560 }, { "epoch": 1.3537712600842189, "grad_norm": 5.398613619718705, "learning_rate": 1.41737908011139e-06, "loss": 0.07624979019165039, "step": 156565 }, { "epoch": 1.3538144936057621, "grad_norm": 0.9564221084251039, "learning_rate": 1.4172060063201617e-06, "loss": 0.02889108657836914, "step": 156570 }, { "epoch": 1.3538577271273056, "grad_norm": 1.6231453129638322, "learning_rate": 1.4170329398286395e-06, "loss": 0.06462783813476562, "step": 156575 }, { "epoch": 1.3539009606488488, "grad_norm": 0.9429522858053223, "learning_rate": 1.4168598806376207e-06, "loss": 0.07285690307617188, "step": 156580 }, { "epoch": 1.353944194170392, "grad_norm": 0.07617088644008273, "learning_rate": 1.4166868287479053e-06, "loss": 0.14546279907226561, "step": 156585 }, { "epoch": 1.3539874276919353, "grad_norm": 112.72128574091951, "learning_rate": 1.4165137841602901e-06, "loss": 0.2412811279296875, "step": 156590 }, { "epoch": 1.3540306612134785, "grad_norm": 0.5811473861123058, "learning_rate": 1.4163407468755732e-06, "loss": 0.1019287109375, "step": 156595 }, { "epoch": 1.3540738947350217, "grad_norm": 0.5072456803197478, "learning_rate": 1.4161677168945533e-06, "loss": 0.026844024658203125, "step": 156600 }, { "epoch": 1.354117128256565, "grad_norm": 28.76627095086047, "learning_rate": 1.4159946942180279e-06, "loss": 0.39221649169921874, "step": 156605 }, { "epoch": 1.3541603617781082, "grad_norm": 9.648517652201635, "learning_rate": 1.4158216788467946e-06, "loss": 0.0627471923828125, "step": 156610 }, { "epoch": 1.3542035952996516, "grad_norm": 0.33357497794511076, "learning_rate": 1.4156486707816509e-06, "loss": 0.03920822143554688, "step": 156615 }, { "epoch": 1.3542468288211948, "grad_norm": 0.9511842688907053, "learning_rate": 1.4154756700233956e-06, "loss": 0.11006813049316407, "step": 156620 }, { "epoch": 1.354290062342738, "grad_norm": 0.8645859944110287, "learning_rate": 1.4153026765728274e-06, "loss": 0.064404296875, "step": 156625 }, { "epoch": 1.3543332958642813, "grad_norm": 15.251080779916608, "learning_rate": 1.4151296904307433e-06, "loss": 0.064239501953125, "step": 156630 }, { "epoch": 1.3543765293858245, "grad_norm": 0.9475532048994819, "learning_rate": 1.414956711597941e-06, "loss": 0.2152679443359375, "step": 156635 }, { "epoch": 1.354419762907368, "grad_norm": 13.41331808127437, "learning_rate": 1.4147837400752185e-06, "loss": 0.04793472290039062, "step": 156640 }, { "epoch": 1.3544629964289112, "grad_norm": 6.961223140546635, "learning_rate": 1.414610775863372e-06, "loss": 0.05361480712890625, "step": 156645 }, { "epoch": 1.3545062299504544, "grad_norm": 3.689173852080939, "learning_rate": 1.414437818963202e-06, "loss": 0.01812744140625, "step": 156650 }, { "epoch": 1.3545494634719977, "grad_norm": 6.206696527722843, "learning_rate": 1.4142648693755044e-06, "loss": 0.07816162109375, "step": 156655 }, { "epoch": 1.354592696993541, "grad_norm": 0.8711266019176623, "learning_rate": 1.414091927101077e-06, "loss": 0.10722427368164063, "step": 156660 }, { "epoch": 1.3546359305150841, "grad_norm": 1.494032247842764, "learning_rate": 1.4139189921407176e-06, "loss": 0.0248321533203125, "step": 156665 }, { "epoch": 1.3546791640366274, "grad_norm": 9.197018573748538, "learning_rate": 1.4137460644952224e-06, "loss": 0.0697662353515625, "step": 156670 }, { "epoch": 1.3547223975581706, "grad_norm": 6.216319789261038, "learning_rate": 1.4135731441653918e-06, "loss": 0.10181961059570313, "step": 156675 }, { "epoch": 1.354765631079714, "grad_norm": 9.18537955989359, "learning_rate": 1.4134002311520202e-06, "loss": 0.04058914184570313, "step": 156680 }, { "epoch": 1.3548088646012573, "grad_norm": 28.027919962193412, "learning_rate": 1.4132273254559076e-06, "loss": 0.05154972076416016, "step": 156685 }, { "epoch": 1.3548520981228005, "grad_norm": 1.4646720622658076, "learning_rate": 1.4130544270778502e-06, "loss": 0.05284194946289063, "step": 156690 }, { "epoch": 1.3548953316443437, "grad_norm": 0.6214752427298879, "learning_rate": 1.4128815360186457e-06, "loss": 0.043242263793945315, "step": 156695 }, { "epoch": 1.354938565165887, "grad_norm": 0.49146557243654587, "learning_rate": 1.4127086522790915e-06, "loss": 0.01693115234375, "step": 156700 }, { "epoch": 1.3549817986874304, "grad_norm": 17.989418121483777, "learning_rate": 1.4125357758599848e-06, "loss": 0.05504341125488281, "step": 156705 }, { "epoch": 1.3550250322089736, "grad_norm": 0.9150774460928535, "learning_rate": 1.4123629067621214e-06, "loss": 0.056468963623046875, "step": 156710 }, { "epoch": 1.3550682657305169, "grad_norm": 11.720146926049377, "learning_rate": 1.4121900449863014e-06, "loss": 0.12618637084960938, "step": 156715 }, { "epoch": 1.35511149925206, "grad_norm": 0.5699876359134014, "learning_rate": 1.4120171905333206e-06, "loss": 0.008292770385742188, "step": 156720 }, { "epoch": 1.3551547327736033, "grad_norm": 0.22147739901871902, "learning_rate": 1.4118443434039758e-06, "loss": 0.022239112854003908, "step": 156725 }, { "epoch": 1.3551979662951465, "grad_norm": 14.519592662670082, "learning_rate": 1.4116715035990639e-06, "loss": 0.17014312744140625, "step": 156730 }, { "epoch": 1.3552411998166898, "grad_norm": 2.126180694519004, "learning_rate": 1.4114986711193835e-06, "loss": 0.019980621337890626, "step": 156735 }, { "epoch": 1.355284433338233, "grad_norm": 0.1426499325774764, "learning_rate": 1.4113258459657298e-06, "loss": 0.18765716552734374, "step": 156740 }, { "epoch": 1.3553276668597765, "grad_norm": 6.398663752357912, "learning_rate": 1.4111530281389022e-06, "loss": 0.15601844787597657, "step": 156745 }, { "epoch": 1.3553709003813197, "grad_norm": 10.991935549801463, "learning_rate": 1.4109802176396963e-06, "loss": 0.0283782958984375, "step": 156750 }, { "epoch": 1.355414133902863, "grad_norm": 6.420444102259172, "learning_rate": 1.4108074144689093e-06, "loss": 0.0382537841796875, "step": 156755 }, { "epoch": 1.3554573674244061, "grad_norm": 1.4542456665190897, "learning_rate": 1.410634618627338e-06, "loss": 0.02892723083496094, "step": 156760 }, { "epoch": 1.3555006009459494, "grad_norm": 4.929304271406574, "learning_rate": 1.4104618301157796e-06, "loss": 0.028566741943359376, "step": 156765 }, { "epoch": 1.3555438344674928, "grad_norm": 0.5464597739965933, "learning_rate": 1.4102890489350295e-06, "loss": 0.1414276123046875, "step": 156770 }, { "epoch": 1.355587067989036, "grad_norm": 0.41009106832487696, "learning_rate": 1.410116275085887e-06, "loss": 0.06279449462890625, "step": 156775 }, { "epoch": 1.3556303015105793, "grad_norm": 0.9898950618642689, "learning_rate": 1.4099435085691474e-06, "loss": 0.021640777587890625, "step": 156780 }, { "epoch": 1.3556735350321225, "grad_norm": 7.31486564038768, "learning_rate": 1.4097707493856083e-06, "loss": 0.03290290832519531, "step": 156785 }, { "epoch": 1.3557167685536657, "grad_norm": 0.18263395075327946, "learning_rate": 1.4095979975360648e-06, "loss": 0.287553596496582, "step": 156790 }, { "epoch": 1.355760002075209, "grad_norm": 2.1201720453226187, "learning_rate": 1.4094252530213158e-06, "loss": 0.023590087890625, "step": 156795 }, { "epoch": 1.3558032355967522, "grad_norm": 5.715476810000515, "learning_rate": 1.409252515842157e-06, "loss": 0.07593345642089844, "step": 156800 }, { "epoch": 1.3558464691182954, "grad_norm": 0.2913243161428976, "learning_rate": 1.4090797859993841e-06, "loss": 0.010237312316894532, "step": 156805 }, { "epoch": 1.3558897026398389, "grad_norm": 0.13609146482993623, "learning_rate": 1.4089070634937955e-06, "loss": 0.03696155548095703, "step": 156810 }, { "epoch": 1.355932936161382, "grad_norm": 2.003886130062237, "learning_rate": 1.4087343483261871e-06, "loss": 0.06438560485839843, "step": 156815 }, { "epoch": 1.3559761696829253, "grad_norm": 37.58397699609673, "learning_rate": 1.4085616404973554e-06, "loss": 0.056125640869140625, "step": 156820 }, { "epoch": 1.3560194032044686, "grad_norm": 0.5661909215386929, "learning_rate": 1.4083889400080964e-06, "loss": 0.06385955810546876, "step": 156825 }, { "epoch": 1.356062636726012, "grad_norm": 63.00167025179402, "learning_rate": 1.4082162468592075e-06, "loss": 0.022253799438476562, "step": 156830 }, { "epoch": 1.3561058702475552, "grad_norm": 1.4433027751820702, "learning_rate": 1.4080435610514831e-06, "loss": 0.03650035858154297, "step": 156835 }, { "epoch": 1.3561491037690985, "grad_norm": 17.42123409490907, "learning_rate": 1.4078708825857226e-06, "loss": 0.09466514587402344, "step": 156840 }, { "epoch": 1.3561923372906417, "grad_norm": 14.588277170570027, "learning_rate": 1.4076982114627193e-06, "loss": 0.03411178588867188, "step": 156845 }, { "epoch": 1.356235570812185, "grad_norm": 1.9751372133483627, "learning_rate": 1.4075255476832724e-06, "loss": 0.06951990127563476, "step": 156850 }, { "epoch": 1.3562788043337282, "grad_norm": 0.1856538745464009, "learning_rate": 1.407352891248177e-06, "loss": 0.04478530883789063, "step": 156855 }, { "epoch": 1.3563220378552714, "grad_norm": 6.896164780735114, "learning_rate": 1.4071802421582295e-06, "loss": 0.0358123779296875, "step": 156860 }, { "epoch": 1.3563652713768146, "grad_norm": 0.4835013166103519, "learning_rate": 1.4070076004142259e-06, "loss": 0.038514328002929685, "step": 156865 }, { "epoch": 1.356408504898358, "grad_norm": 0.13925847735478417, "learning_rate": 1.4068349660169614e-06, "loss": 0.08549919128417968, "step": 156870 }, { "epoch": 1.3564517384199013, "grad_norm": 5.376581797760437, "learning_rate": 1.4066623389672343e-06, "loss": 0.0429840087890625, "step": 156875 }, { "epoch": 1.3564949719414445, "grad_norm": 0.24509714009186537, "learning_rate": 1.4064897192658397e-06, "loss": 0.008436965942382812, "step": 156880 }, { "epoch": 1.3565382054629878, "grad_norm": 2.440660776746134, "learning_rate": 1.4063171069135737e-06, "loss": 0.033596038818359375, "step": 156885 }, { "epoch": 1.356581438984531, "grad_norm": 4.3289083497620044, "learning_rate": 1.4061445019112325e-06, "loss": 0.027765274047851562, "step": 156890 }, { "epoch": 1.3566246725060744, "grad_norm": 0.08527441722903545, "learning_rate": 1.4059719042596108e-06, "loss": 0.006499862670898438, "step": 156895 }, { "epoch": 1.3566679060276177, "grad_norm": 20.052722030738753, "learning_rate": 1.405799313959507e-06, "loss": 0.19648056030273436, "step": 156900 }, { "epoch": 1.356711139549161, "grad_norm": 3.0365934383396107, "learning_rate": 1.405626731011715e-06, "loss": 0.0835235595703125, "step": 156905 }, { "epoch": 1.3567543730707041, "grad_norm": 0.42431802477958136, "learning_rate": 1.4054541554170322e-06, "loss": 0.00798969268798828, "step": 156910 }, { "epoch": 1.3567976065922474, "grad_norm": 0.4375228460574905, "learning_rate": 1.4052815871762544e-06, "loss": 0.030194091796875, "step": 156915 }, { "epoch": 1.3568408401137906, "grad_norm": 12.350192715773092, "learning_rate": 1.4051090262901768e-06, "loss": 0.05306797027587891, "step": 156920 }, { "epoch": 1.3568840736353338, "grad_norm": 2.0353787365813383, "learning_rate": 1.4049364727595957e-06, "loss": 0.08223342895507812, "step": 156925 }, { "epoch": 1.356927307156877, "grad_norm": 2.5609239679583338, "learning_rate": 1.4047639265853052e-06, "loss": 0.17188549041748047, "step": 156930 }, { "epoch": 1.3569705406784205, "grad_norm": 2.3336163755053003, "learning_rate": 1.404591387768104e-06, "loss": 0.018389511108398437, "step": 156935 }, { "epoch": 1.3570137741999637, "grad_norm": 0.4108720929786738, "learning_rate": 1.404418856308786e-06, "loss": 0.021503448486328125, "step": 156940 }, { "epoch": 1.357057007721507, "grad_norm": 19.158877136209988, "learning_rate": 1.4042463322081473e-06, "loss": 0.04937744140625, "step": 156945 }, { "epoch": 1.3571002412430502, "grad_norm": 3.6574810854684543, "learning_rate": 1.4040738154669837e-06, "loss": 0.03221893310546875, "step": 156950 }, { "epoch": 1.3571434747645934, "grad_norm": 7.363274279275321, "learning_rate": 1.4039013060860891e-06, "loss": 0.03616905212402344, "step": 156955 }, { "epoch": 1.3571867082861369, "grad_norm": 1.2288330075699416, "learning_rate": 1.4037288040662622e-06, "loss": 0.04056625366210938, "step": 156960 }, { "epoch": 1.35722994180768, "grad_norm": 13.103653031197474, "learning_rate": 1.4035563094082957e-06, "loss": 0.14533462524414062, "step": 156965 }, { "epoch": 1.3572731753292233, "grad_norm": 0.43453255634224003, "learning_rate": 1.4033838221129876e-06, "loss": 0.045223236083984375, "step": 156970 }, { "epoch": 1.3573164088507665, "grad_norm": 5.112503254573854, "learning_rate": 1.4032113421811323e-06, "loss": 0.024484825134277344, "step": 156975 }, { "epoch": 1.3573596423723098, "grad_norm": 3.7039667085644976, "learning_rate": 1.4030388696135253e-06, "loss": 0.01149444580078125, "step": 156980 }, { "epoch": 1.357402875893853, "grad_norm": 62.18763255902824, "learning_rate": 1.4028664044109618e-06, "loss": 0.20282459259033203, "step": 156985 }, { "epoch": 1.3574461094153962, "grad_norm": 30.55001985255835, "learning_rate": 1.402693946574237e-06, "loss": 0.06386947631835938, "step": 156990 }, { "epoch": 1.3574893429369395, "grad_norm": 0.5730814278886216, "learning_rate": 1.4025214961041458e-06, "loss": 0.04558486938476562, "step": 156995 }, { "epoch": 1.357532576458483, "grad_norm": 1.998921140148945, "learning_rate": 1.4023490530014856e-06, "loss": 0.023646736145019533, "step": 157000 }, { "epoch": 1.3575758099800261, "grad_norm": 1.5617657381114645, "learning_rate": 1.4021766172670499e-06, "loss": 0.0407989501953125, "step": 157005 }, { "epoch": 1.3576190435015694, "grad_norm": 13.160636870971697, "learning_rate": 1.4020041889016346e-06, "loss": 0.10374908447265625, "step": 157010 }, { "epoch": 1.3576622770231126, "grad_norm": 3.732255701184321, "learning_rate": 1.4018317679060339e-06, "loss": 0.049333953857421876, "step": 157015 }, { "epoch": 1.3577055105446558, "grad_norm": 0.9021828126517567, "learning_rate": 1.401659354281045e-06, "loss": 0.01346282958984375, "step": 157020 }, { "epoch": 1.3577487440661993, "grad_norm": 0.6468688390326959, "learning_rate": 1.4014869480274607e-06, "loss": 0.06809539794921875, "step": 157025 }, { "epoch": 1.3577919775877425, "grad_norm": 0.7937222493733921, "learning_rate": 1.4013145491460784e-06, "loss": 0.00480499267578125, "step": 157030 }, { "epoch": 1.3578352111092857, "grad_norm": 23.95806465317631, "learning_rate": 1.401142157637692e-06, "loss": 0.06307754516601563, "step": 157035 }, { "epoch": 1.357878444630829, "grad_norm": 0.2932502007890648, "learning_rate": 1.4009697735030973e-06, "loss": 0.007022857666015625, "step": 157040 }, { "epoch": 1.3579216781523722, "grad_norm": 117.26780311660748, "learning_rate": 1.400797396743088e-06, "loss": 0.0771575927734375, "step": 157045 }, { "epoch": 1.3579649116739154, "grad_norm": 1.153416602710059, "learning_rate": 1.40062502735846e-06, "loss": 0.044859695434570315, "step": 157050 }, { "epoch": 1.3580081451954586, "grad_norm": 3.5830402144913776, "learning_rate": 1.4004526653500083e-06, "loss": 0.03412322998046875, "step": 157055 }, { "epoch": 1.3580513787170019, "grad_norm": 13.697086025288574, "learning_rate": 1.400280310718526e-06, "loss": 0.050449371337890625, "step": 157060 }, { "epoch": 1.3580946122385453, "grad_norm": 9.274209555882697, "learning_rate": 1.4001079634648114e-06, "loss": 0.113421630859375, "step": 157065 }, { "epoch": 1.3581378457600886, "grad_norm": 0.3160207764463575, "learning_rate": 1.3999356235896555e-06, "loss": 0.07327346801757813, "step": 157070 }, { "epoch": 1.3581810792816318, "grad_norm": 3.829228114051776, "learning_rate": 1.3997632910938568e-06, "loss": 0.056249237060546874, "step": 157075 }, { "epoch": 1.358224312803175, "grad_norm": 7.762272489895046, "learning_rate": 1.3995909659782086e-06, "loss": 0.03199615478515625, "step": 157080 }, { "epoch": 1.3582675463247185, "grad_norm": 1.5002139746596799, "learning_rate": 1.3994186482435048e-06, "loss": 0.01232757568359375, "step": 157085 }, { "epoch": 1.3583107798462617, "grad_norm": 8.5118449148085, "learning_rate": 1.39924633789054e-06, "loss": 0.08828277587890625, "step": 157090 }, { "epoch": 1.358354013367805, "grad_norm": 6.850982380278806, "learning_rate": 1.3990740349201106e-06, "loss": 0.0252410888671875, "step": 157095 }, { "epoch": 1.3583972468893482, "grad_norm": 15.733321180498939, "learning_rate": 1.3989017393330105e-06, "loss": 0.04411106109619141, "step": 157100 }, { "epoch": 1.3584404804108914, "grad_norm": 21.380458815973945, "learning_rate": 1.3987294511300342e-06, "loss": 0.20155181884765624, "step": 157105 }, { "epoch": 1.3584837139324346, "grad_norm": 3.7092004567939223, "learning_rate": 1.3985571703119761e-06, "loss": 0.05424957275390625, "step": 157110 }, { "epoch": 1.3585269474539778, "grad_norm": 10.726079140548668, "learning_rate": 1.3983848968796305e-06, "loss": 0.029289627075195314, "step": 157115 }, { "epoch": 1.358570180975521, "grad_norm": 3.3073275160894116, "learning_rate": 1.3982126308337914e-06, "loss": 0.03963623046875, "step": 157120 }, { "epoch": 1.3586134144970645, "grad_norm": 1.1375811271505913, "learning_rate": 1.398040372175255e-06, "loss": 0.03126029968261719, "step": 157125 }, { "epoch": 1.3586566480186077, "grad_norm": 27.03535942614486, "learning_rate": 1.3978681209048143e-06, "loss": 0.06309051513671875, "step": 157130 }, { "epoch": 1.358699881540151, "grad_norm": 0.15895173974139046, "learning_rate": 1.397695877023265e-06, "loss": 0.3937530517578125, "step": 157135 }, { "epoch": 1.3587431150616942, "grad_norm": 6.16866075378716, "learning_rate": 1.3975236405314005e-06, "loss": 0.074261474609375, "step": 157140 }, { "epoch": 1.3587863485832374, "grad_norm": 1.1371487214602993, "learning_rate": 1.3973514114300155e-06, "loss": 0.031678962707519534, "step": 157145 }, { "epoch": 1.3588295821047809, "grad_norm": 6.4993803084891395, "learning_rate": 1.3971791897199045e-06, "loss": 0.016182327270507814, "step": 157150 }, { "epoch": 1.3588728156263241, "grad_norm": 2.707937632439858, "learning_rate": 1.39700697540186e-06, "loss": 0.054656600952148436, "step": 157155 }, { "epoch": 1.3589160491478673, "grad_norm": 0.012207506202885455, "learning_rate": 1.396834768476679e-06, "loss": 0.008085918426513673, "step": 157160 }, { "epoch": 1.3589592826694106, "grad_norm": 0.7619402236978561, "learning_rate": 1.3966625689451544e-06, "loss": 0.03720550537109375, "step": 157165 }, { "epoch": 1.3590025161909538, "grad_norm": 14.93113607438345, "learning_rate": 1.3964903768080801e-06, "loss": 0.05225296020507812, "step": 157170 }, { "epoch": 1.359045749712497, "grad_norm": 0.6162091527070833, "learning_rate": 1.3963181920662505e-06, "loss": 0.2757392883300781, "step": 157175 }, { "epoch": 1.3590889832340403, "grad_norm": 20.5525279901679, "learning_rate": 1.3961460147204588e-06, "loss": 0.05630035400390625, "step": 157180 }, { "epoch": 1.3591322167555835, "grad_norm": 4.474136547370204, "learning_rate": 1.3959738447715012e-06, "loss": 0.07784576416015625, "step": 157185 }, { "epoch": 1.359175450277127, "grad_norm": 12.702933581788917, "learning_rate": 1.395801682220169e-06, "loss": 0.0858978271484375, "step": 157190 }, { "epoch": 1.3592186837986702, "grad_norm": 4.492713837933464, "learning_rate": 1.3956295270672592e-06, "loss": 0.32870941162109374, "step": 157195 }, { "epoch": 1.3592619173202134, "grad_norm": 22.224464914804013, "learning_rate": 1.395457379313564e-06, "loss": 0.10921707153320312, "step": 157200 }, { "epoch": 1.3593051508417566, "grad_norm": 0.6450801662502618, "learning_rate": 1.3952852389598777e-06, "loss": 0.05809974670410156, "step": 157205 }, { "epoch": 1.3593483843632999, "grad_norm": 4.7872099573476365, "learning_rate": 1.3951131060069938e-06, "loss": 0.05695610046386719, "step": 157210 }, { "epoch": 1.3593916178848433, "grad_norm": 5.760650054432404, "learning_rate": 1.3949409804557069e-06, "loss": 0.0544830322265625, "step": 157215 }, { "epoch": 1.3594348514063865, "grad_norm": 13.32028995544274, "learning_rate": 1.3947688623068092e-06, "loss": 0.05243988037109375, "step": 157220 }, { "epoch": 1.3594780849279298, "grad_norm": 59.06184395063685, "learning_rate": 1.3945967515610964e-06, "loss": 0.13175201416015625, "step": 157225 }, { "epoch": 1.359521318449473, "grad_norm": 1.3656351860097513, "learning_rate": 1.3944246482193618e-06, "loss": 0.016565704345703126, "step": 157230 }, { "epoch": 1.3595645519710162, "grad_norm": 1.2185075247534183, "learning_rate": 1.3942525522823989e-06, "loss": 0.275274658203125, "step": 157235 }, { "epoch": 1.3596077854925595, "grad_norm": 0.3023480254073363, "learning_rate": 1.3940804637509998e-06, "loss": 0.060002899169921874, "step": 157240 }, { "epoch": 1.3596510190141027, "grad_norm": 1.696515524604246, "learning_rate": 1.3939083826259612e-06, "loss": 0.0798126220703125, "step": 157245 }, { "epoch": 1.359694252535646, "grad_norm": 0.5647603053968763, "learning_rate": 1.3937363089080743e-06, "loss": 0.13186187744140626, "step": 157250 }, { "epoch": 1.3597374860571894, "grad_norm": 0.32955841868709074, "learning_rate": 1.3935642425981344e-06, "loss": 0.051720046997070314, "step": 157255 }, { "epoch": 1.3597807195787326, "grad_norm": 17.723315815327343, "learning_rate": 1.3933921836969343e-06, "loss": 0.04311676025390625, "step": 157260 }, { "epoch": 1.3598239531002758, "grad_norm": 0.06168095055097102, "learning_rate": 1.3932201322052672e-06, "loss": 0.19025650024414062, "step": 157265 }, { "epoch": 1.359867186621819, "grad_norm": 5.158073602899367, "learning_rate": 1.3930480881239273e-06, "loss": 0.050739097595214847, "step": 157270 }, { "epoch": 1.3599104201433623, "grad_norm": 26.774335694462717, "learning_rate": 1.3928760514537078e-06, "loss": 0.2742462158203125, "step": 157275 }, { "epoch": 1.3599536536649057, "grad_norm": 4.958495672877267, "learning_rate": 1.3927040221954003e-06, "loss": 0.2827434539794922, "step": 157280 }, { "epoch": 1.359996887186449, "grad_norm": 2.9922759848268483, "learning_rate": 1.3925320003498011e-06, "loss": 0.104022216796875, "step": 157285 }, { "epoch": 1.3600401207079922, "grad_norm": 1.000480099837662, "learning_rate": 1.3923599859177026e-06, "loss": 0.10476341247558593, "step": 157290 }, { "epoch": 1.3600833542295354, "grad_norm": 0.7160472909296346, "learning_rate": 1.3921879788998963e-06, "loss": 0.073150634765625, "step": 157295 }, { "epoch": 1.3601265877510786, "grad_norm": 3.818715658055302, "learning_rate": 1.3920159792971784e-06, "loss": 0.04156646728515625, "step": 157300 }, { "epoch": 1.3601698212726219, "grad_norm": 0.37615963017465714, "learning_rate": 1.3918439871103404e-06, "loss": 0.03946342468261719, "step": 157305 }, { "epoch": 1.360213054794165, "grad_norm": 12.127578360238111, "learning_rate": 1.391672002340176e-06, "loss": 0.11124420166015625, "step": 157310 }, { "epoch": 1.3602562883157086, "grad_norm": 5.0245090018439456, "learning_rate": 1.3915000249874772e-06, "loss": 0.0399017333984375, "step": 157315 }, { "epoch": 1.3602995218372518, "grad_norm": 7.801119363099413, "learning_rate": 1.391328055053039e-06, "loss": 0.11884880065917969, "step": 157320 }, { "epoch": 1.360342755358795, "grad_norm": 5.967406377118007, "learning_rate": 1.3911560925376538e-06, "loss": 0.052534866333007815, "step": 157325 }, { "epoch": 1.3603859888803382, "grad_norm": 2.468652800820133, "learning_rate": 1.3909841374421144e-06, "loss": 0.04042129516601563, "step": 157330 }, { "epoch": 1.3604292224018815, "grad_norm": 3.975210782464851, "learning_rate": 1.3908121897672144e-06, "loss": 0.11008071899414062, "step": 157335 }, { "epoch": 1.360472455923425, "grad_norm": 1.8165735240358685, "learning_rate": 1.390640249513746e-06, "loss": 0.010822296142578125, "step": 157340 }, { "epoch": 1.3605156894449681, "grad_norm": 0.48373410076678647, "learning_rate": 1.3904683166825016e-06, "loss": 0.0499176025390625, "step": 157345 }, { "epoch": 1.3605589229665114, "grad_norm": 1.580348662444265, "learning_rate": 1.3902963912742762e-06, "loss": 0.0275054931640625, "step": 157350 }, { "epoch": 1.3606021564880546, "grad_norm": 10.285954594697643, "learning_rate": 1.3901244732898604e-06, "loss": 0.027581024169921874, "step": 157355 }, { "epoch": 1.3606453900095978, "grad_norm": 1.696336273919987, "learning_rate": 1.3899525627300498e-06, "loss": 0.030309677124023438, "step": 157360 }, { "epoch": 1.360688623531141, "grad_norm": 0.11612609120969809, "learning_rate": 1.3897806595956351e-06, "loss": 0.025732421875, "step": 157365 }, { "epoch": 1.3607318570526843, "grad_norm": 1.3386444491050102, "learning_rate": 1.38960876388741e-06, "loss": 0.017439651489257812, "step": 157370 }, { "epoch": 1.3607750905742275, "grad_norm": 4.611534112975828, "learning_rate": 1.3894368756061658e-06, "loss": 0.037401580810546876, "step": 157375 }, { "epoch": 1.360818324095771, "grad_norm": 2.30748983038104, "learning_rate": 1.3892649947526977e-06, "loss": 0.07128562927246093, "step": 157380 }, { "epoch": 1.3608615576173142, "grad_norm": 5.056330737556388, "learning_rate": 1.389093121327797e-06, "loss": 0.07474784851074219, "step": 157385 }, { "epoch": 1.3609047911388574, "grad_norm": 14.90221115703736, "learning_rate": 1.3889212553322566e-06, "loss": 0.1910778045654297, "step": 157390 }, { "epoch": 1.3609480246604007, "grad_norm": 0.1870539049217402, "learning_rate": 1.3887493967668689e-06, "loss": 0.16163387298583984, "step": 157395 }, { "epoch": 1.3609912581819439, "grad_norm": 5.355997973235915, "learning_rate": 1.3885775456324267e-06, "loss": 0.028863906860351562, "step": 157400 }, { "epoch": 1.3610344917034873, "grad_norm": 2.7552012769766847, "learning_rate": 1.3884057019297213e-06, "loss": 0.105670166015625, "step": 157405 }, { "epoch": 1.3610777252250306, "grad_norm": 4.374404061858212, "learning_rate": 1.3882338656595477e-06, "loss": 0.04835186004638672, "step": 157410 }, { "epoch": 1.3611209587465738, "grad_norm": 1.3738833503244474, "learning_rate": 1.3880620368226956e-06, "loss": 0.01099700927734375, "step": 157415 }, { "epoch": 1.361164192268117, "grad_norm": 8.842372683065658, "learning_rate": 1.3878902154199603e-06, "loss": 0.11256561279296876, "step": 157420 }, { "epoch": 1.3612074257896603, "grad_norm": 1.165725380911932, "learning_rate": 1.3877184014521329e-06, "loss": 0.11601486206054687, "step": 157425 }, { "epoch": 1.3612506593112035, "grad_norm": 1.57288147037892, "learning_rate": 1.3875465949200056e-06, "loss": 0.008576202392578124, "step": 157430 }, { "epoch": 1.3612938928327467, "grad_norm": 0.5901091052749283, "learning_rate": 1.3873747958243708e-06, "loss": 0.0199005126953125, "step": 157435 }, { "epoch": 1.36133712635429, "grad_norm": 11.419543484519384, "learning_rate": 1.3872030041660205e-06, "loss": 0.030029296875, "step": 157440 }, { "epoch": 1.3613803598758334, "grad_norm": 7.711299351086576, "learning_rate": 1.3870312199457477e-06, "loss": 0.0295989990234375, "step": 157445 }, { "epoch": 1.3614235933973766, "grad_norm": 15.525277514232265, "learning_rate": 1.3868594431643449e-06, "loss": 0.05389556884765625, "step": 157450 }, { "epoch": 1.3614668269189198, "grad_norm": 13.66437834485074, "learning_rate": 1.3866876738226037e-06, "loss": 0.0596588134765625, "step": 157455 }, { "epoch": 1.361510060440463, "grad_norm": 11.497714114171687, "learning_rate": 1.3865159119213165e-06, "loss": 0.13172607421875, "step": 157460 }, { "epoch": 1.3615532939620063, "grad_norm": 3.832725506155735, "learning_rate": 1.386344157461274e-06, "loss": 0.03236713409423828, "step": 157465 }, { "epoch": 1.3615965274835498, "grad_norm": 30.803813028183047, "learning_rate": 1.386172410443271e-06, "loss": 0.06912384033203126, "step": 157470 }, { "epoch": 1.361639761005093, "grad_norm": 1.2153986816226212, "learning_rate": 1.386000670868097e-06, "loss": 0.08626174926757812, "step": 157475 }, { "epoch": 1.3616829945266362, "grad_norm": 0.8219990028138768, "learning_rate": 1.3858289387365467e-06, "loss": 0.02758941650390625, "step": 157480 }, { "epoch": 1.3617262280481794, "grad_norm": 2.1149363963822245, "learning_rate": 1.3856572140494101e-06, "loss": 0.04078598022460937, "step": 157485 }, { "epoch": 1.3617694615697227, "grad_norm": 4.095228993693015, "learning_rate": 1.3854854968074803e-06, "loss": 0.1513885498046875, "step": 157490 }, { "epoch": 1.361812695091266, "grad_norm": 15.697105281887099, "learning_rate": 1.3853137870115487e-06, "loss": 0.1571685791015625, "step": 157495 }, { "epoch": 1.3618559286128091, "grad_norm": 0.1866107040537911, "learning_rate": 1.385142084662407e-06, "loss": 0.012372398376464843, "step": 157500 }, { "epoch": 1.3618991621343524, "grad_norm": 4.16872080183594, "learning_rate": 1.3849703897608464e-06, "loss": 0.05964508056640625, "step": 157505 }, { "epoch": 1.3619423956558958, "grad_norm": 8.020623146366226, "learning_rate": 1.3847987023076604e-06, "loss": 0.0353057861328125, "step": 157510 }, { "epoch": 1.361985629177439, "grad_norm": 11.251926081065113, "learning_rate": 1.3846270223036404e-06, "loss": 0.06321754455566406, "step": 157515 }, { "epoch": 1.3620288626989823, "grad_norm": 2.0001082560961554, "learning_rate": 1.3844553497495767e-06, "loss": 0.038547515869140625, "step": 157520 }, { "epoch": 1.3620720962205255, "grad_norm": 29.819249369885014, "learning_rate": 1.3842836846462634e-06, "loss": 0.07532997131347656, "step": 157525 }, { "epoch": 1.362115329742069, "grad_norm": 2.3841175482523473, "learning_rate": 1.3841120269944908e-06, "loss": 0.029286956787109374, "step": 157530 }, { "epoch": 1.3621585632636122, "grad_norm": 1.9660152798960813, "learning_rate": 1.3839403767950496e-06, "loss": 0.14777641296386718, "step": 157535 }, { "epoch": 1.3622017967851554, "grad_norm": 30.756736177051966, "learning_rate": 1.383768734048734e-06, "loss": 0.21625213623046874, "step": 157540 }, { "epoch": 1.3622450303066986, "grad_norm": 0.05540310338368466, "learning_rate": 1.3835970987563335e-06, "loss": 0.06222972869873047, "step": 157545 }, { "epoch": 1.3622882638282419, "grad_norm": 1.90758136242865, "learning_rate": 1.3834254709186411e-06, "loss": 0.10593185424804688, "step": 157550 }, { "epoch": 1.362331497349785, "grad_norm": 0.2550359702229836, "learning_rate": 1.3832538505364472e-06, "loss": 0.02699127197265625, "step": 157555 }, { "epoch": 1.3623747308713283, "grad_norm": 0.15507612808523552, "learning_rate": 1.3830822376105437e-06, "loss": 0.03485870361328125, "step": 157560 }, { "epoch": 1.3624179643928715, "grad_norm": 0.36022829018040636, "learning_rate": 1.382910632141722e-06, "loss": 0.00802764892578125, "step": 157565 }, { "epoch": 1.362461197914415, "grad_norm": 2.87515210911316, "learning_rate": 1.3827390341307726e-06, "loss": 0.13695144653320312, "step": 157570 }, { "epoch": 1.3625044314359582, "grad_norm": 0.814179130735101, "learning_rate": 1.382567443578489e-06, "loss": 0.06000518798828125, "step": 157575 }, { "epoch": 1.3625476649575015, "grad_norm": 0.9428957850459653, "learning_rate": 1.3823958604856604e-06, "loss": 0.060207366943359375, "step": 157580 }, { "epoch": 1.3625908984790447, "grad_norm": 4.997901991577486, "learning_rate": 1.3822242848530802e-06, "loss": 0.024579429626464845, "step": 157585 }, { "epoch": 1.362634132000588, "grad_norm": 0.19328099086690767, "learning_rate": 1.3820527166815385e-06, "loss": 0.010103607177734375, "step": 157590 }, { "epoch": 1.3626773655221314, "grad_norm": 2.4314894433379166, "learning_rate": 1.381881155971827e-06, "loss": 0.01821746826171875, "step": 157595 }, { "epoch": 1.3627205990436746, "grad_norm": 1.138489862178894, "learning_rate": 1.381709602724735e-06, "loss": 0.012877655029296876, "step": 157600 }, { "epoch": 1.3627638325652178, "grad_norm": 0.6645201904704698, "learning_rate": 1.381538056941057e-06, "loss": 0.030574798583984375, "step": 157605 }, { "epoch": 1.362807066086761, "grad_norm": 0.28988623676258163, "learning_rate": 1.3813665186215821e-06, "loss": 0.09008331298828125, "step": 157610 }, { "epoch": 1.3628502996083043, "grad_norm": 0.16255662260910284, "learning_rate": 1.381194987767102e-06, "loss": 0.016776275634765626, "step": 157615 }, { "epoch": 1.3628935331298475, "grad_norm": 0.23234511496678836, "learning_rate": 1.3810234643784075e-06, "loss": 0.046441841125488284, "step": 157620 }, { "epoch": 1.3629367666513907, "grad_norm": 0.23151394459891733, "learning_rate": 1.3808519484562894e-06, "loss": 0.031069564819335937, "step": 157625 }, { "epoch": 1.362980000172934, "grad_norm": 1.1769319340035844, "learning_rate": 1.3806804400015386e-06, "loss": 0.011516952514648437, "step": 157630 }, { "epoch": 1.3630232336944774, "grad_norm": 1.593435675247795, "learning_rate": 1.380508939014947e-06, "loss": 0.3120269775390625, "step": 157635 }, { "epoch": 1.3630664672160206, "grad_norm": 1.2964435255703854, "learning_rate": 1.3803374454973044e-06, "loss": 0.09654817581176758, "step": 157640 }, { "epoch": 1.3631097007375639, "grad_norm": 4.967466113541895, "learning_rate": 1.3801659594494033e-06, "loss": 0.042024993896484376, "step": 157645 }, { "epoch": 1.363152934259107, "grad_norm": 1.4145554667533413, "learning_rate": 1.3799944808720336e-06, "loss": 0.09121551513671874, "step": 157650 }, { "epoch": 1.3631961677806503, "grad_norm": 19.398276349628244, "learning_rate": 1.3798230097659865e-06, "loss": 0.0467219352722168, "step": 157655 }, { "epoch": 1.3632394013021938, "grad_norm": 61.90615121386049, "learning_rate": 1.3796515461320523e-06, "loss": 0.24905242919921874, "step": 157660 }, { "epoch": 1.363282634823737, "grad_norm": 9.606593346318537, "learning_rate": 1.3794800899710208e-06, "loss": 0.020928955078125, "step": 157665 }, { "epoch": 1.3633258683452802, "grad_norm": 2.7746845185458495, "learning_rate": 1.3793086412836853e-06, "loss": 0.11880416870117187, "step": 157670 }, { "epoch": 1.3633691018668235, "grad_norm": 14.790929811998678, "learning_rate": 1.3791372000708351e-06, "loss": 0.06159534454345703, "step": 157675 }, { "epoch": 1.3634123353883667, "grad_norm": 9.870218146960468, "learning_rate": 1.3789657663332606e-06, "loss": 0.04526653289794922, "step": 157680 }, { "epoch": 1.36345556890991, "grad_norm": 0.9494782121357619, "learning_rate": 1.378794340071752e-06, "loss": 0.031682586669921874, "step": 157685 }, { "epoch": 1.3634988024314532, "grad_norm": 0.24976119005575195, "learning_rate": 1.3786229212871014e-06, "loss": 0.03353958129882813, "step": 157690 }, { "epoch": 1.3635420359529964, "grad_norm": 5.422087619052385, "learning_rate": 1.378451509980098e-06, "loss": 0.03882732391357422, "step": 157695 }, { "epoch": 1.3635852694745398, "grad_norm": 0.7112306165170559, "learning_rate": 1.3782801061515336e-06, "loss": 0.02481689453125, "step": 157700 }, { "epoch": 1.363628502996083, "grad_norm": 0.7842202200384035, "learning_rate": 1.3781087098021984e-06, "loss": 0.027007675170898436, "step": 157705 }, { "epoch": 1.3636717365176263, "grad_norm": 10.86427812593486, "learning_rate": 1.3779373209328823e-06, "loss": 0.05888938903808594, "step": 157710 }, { "epoch": 1.3637149700391695, "grad_norm": 0.39976675698981107, "learning_rate": 1.377765939544376e-06, "loss": 0.03223562240600586, "step": 157715 }, { "epoch": 1.3637582035607128, "grad_norm": 15.416143975332277, "learning_rate": 1.3775945656374698e-06, "loss": 0.2553285598754883, "step": 157720 }, { "epoch": 1.3638014370822562, "grad_norm": 1.0618618555623633, "learning_rate": 1.3774231992129533e-06, "loss": 0.01621589660644531, "step": 157725 }, { "epoch": 1.3638446706037994, "grad_norm": 40.10775233698186, "learning_rate": 1.3772518402716184e-06, "loss": 0.1811840057373047, "step": 157730 }, { "epoch": 1.3638879041253427, "grad_norm": 3.8400146444496213, "learning_rate": 1.3770804888142545e-06, "loss": 0.08682365417480468, "step": 157735 }, { "epoch": 1.363931137646886, "grad_norm": 1.6672977271699116, "learning_rate": 1.3769091448416522e-06, "loss": 0.01726245880126953, "step": 157740 }, { "epoch": 1.3639743711684291, "grad_norm": 1.6268088134690533, "learning_rate": 1.3767378083546004e-06, "loss": 0.19112892150878907, "step": 157745 }, { "epoch": 1.3640176046899724, "grad_norm": 1.7094375685658068, "learning_rate": 1.3765664793538914e-06, "loss": 0.06276779174804688, "step": 157750 }, { "epoch": 1.3640608382115156, "grad_norm": 14.442687098263, "learning_rate": 1.3763951578403147e-06, "loss": 0.04539031982421875, "step": 157755 }, { "epoch": 1.3641040717330588, "grad_norm": 0.2113313723194795, "learning_rate": 1.3762238438146588e-06, "loss": 0.21935768127441407, "step": 157760 }, { "epoch": 1.3641473052546023, "grad_norm": 31.933627605219506, "learning_rate": 1.376052537277716e-06, "loss": 0.12601394653320314, "step": 157765 }, { "epoch": 1.3641905387761455, "grad_norm": 3.9037415972264693, "learning_rate": 1.3758812382302752e-06, "loss": 0.016217422485351563, "step": 157770 }, { "epoch": 1.3642337722976887, "grad_norm": 1.6470471773910367, "learning_rate": 1.3757099466731272e-06, "loss": 0.03819198608398437, "step": 157775 }, { "epoch": 1.364277005819232, "grad_norm": 25.87000047315682, "learning_rate": 1.375538662607061e-06, "loss": 0.07169418334960938, "step": 157780 }, { "epoch": 1.3643202393407754, "grad_norm": 4.161621047920493, "learning_rate": 1.375367386032867e-06, "loss": 0.036895751953125, "step": 157785 }, { "epoch": 1.3643634728623186, "grad_norm": 8.714214512185128, "learning_rate": 1.375196116951334e-06, "loss": 0.3222625732421875, "step": 157790 }, { "epoch": 1.3644067063838619, "grad_norm": 1.1412232505456117, "learning_rate": 1.3750248553632541e-06, "loss": 0.07546348571777343, "step": 157795 }, { "epoch": 1.364449939905405, "grad_norm": 30.47774287446912, "learning_rate": 1.3748536012694159e-06, "loss": 0.0862457275390625, "step": 157800 }, { "epoch": 1.3644931734269483, "grad_norm": 1.8961734320189103, "learning_rate": 1.3746823546706081e-06, "loss": 0.06525535583496093, "step": 157805 }, { "epoch": 1.3645364069484915, "grad_norm": 4.937545731372457, "learning_rate": 1.374511115567623e-06, "loss": 0.03918914794921875, "step": 157810 }, { "epoch": 1.3645796404700348, "grad_norm": 0.612179260020173, "learning_rate": 1.3743398839612488e-06, "loss": 0.013194465637207031, "step": 157815 }, { "epoch": 1.364622873991578, "grad_norm": 4.42849134239654, "learning_rate": 1.3741686598522755e-06, "loss": 0.0511077880859375, "step": 157820 }, { "epoch": 1.3646661075131215, "grad_norm": 0.18349750503737106, "learning_rate": 1.3739974432414917e-06, "loss": 0.08977203369140625, "step": 157825 }, { "epoch": 1.3647093410346647, "grad_norm": 0.1186646725753824, "learning_rate": 1.373826234129689e-06, "loss": 0.030255889892578124, "step": 157830 }, { "epoch": 1.364752574556208, "grad_norm": 0.9761350643072517, "learning_rate": 1.373655032517656e-06, "loss": 0.18814988136291505, "step": 157835 }, { "epoch": 1.3647958080777511, "grad_norm": 8.18849399542558, "learning_rate": 1.373483838406182e-06, "loss": 0.18237724304199218, "step": 157840 }, { "epoch": 1.3648390415992944, "grad_norm": 0.36942285308718076, "learning_rate": 1.3733126517960571e-06, "loss": 0.059028244018554686, "step": 157845 }, { "epoch": 1.3648822751208378, "grad_norm": 7.649410333803453, "learning_rate": 1.3731414726880708e-06, "loss": 0.012659072875976562, "step": 157850 }, { "epoch": 1.364925508642381, "grad_norm": 1.941923673278642, "learning_rate": 1.3729703010830107e-06, "loss": 0.09442214965820313, "step": 157855 }, { "epoch": 1.3649687421639243, "grad_norm": 0.5021916879808621, "learning_rate": 1.3727991369816693e-06, "loss": 0.15762977600097655, "step": 157860 }, { "epoch": 1.3650119756854675, "grad_norm": 3.4433691850555816, "learning_rate": 1.3726279803848332e-06, "loss": 0.026181602478027345, "step": 157865 }, { "epoch": 1.3650552092070107, "grad_norm": 3.608040108235913, "learning_rate": 1.3724568312932938e-06, "loss": 0.05281562805175781, "step": 157870 }, { "epoch": 1.365098442728554, "grad_norm": 1.011367054540597, "learning_rate": 1.3722856897078403e-06, "loss": 0.15600814819335937, "step": 157875 }, { "epoch": 1.3651416762500972, "grad_norm": 0.5707339771084927, "learning_rate": 1.3721145556292611e-06, "loss": 0.023626708984375, "step": 157880 }, { "epoch": 1.3651849097716404, "grad_norm": 1.0046884021376095, "learning_rate": 1.3719434290583447e-06, "loss": 0.5217720031738281, "step": 157885 }, { "epoch": 1.3652281432931839, "grad_norm": 40.205449631972144, "learning_rate": 1.3717723099958822e-06, "loss": 0.10377655029296876, "step": 157890 }, { "epoch": 1.365271376814727, "grad_norm": 9.535165538788881, "learning_rate": 1.3716011984426622e-06, "loss": 0.03100433349609375, "step": 157895 }, { "epoch": 1.3653146103362703, "grad_norm": 0.40911730269101104, "learning_rate": 1.3714300943994732e-06, "loss": 0.05736846923828125, "step": 157900 }, { "epoch": 1.3653578438578136, "grad_norm": 8.466084152654554, "learning_rate": 1.3712589978671052e-06, "loss": 0.07572288513183593, "step": 157905 }, { "epoch": 1.3654010773793568, "grad_norm": 0.1413677304663297, "learning_rate": 1.3710879088463456e-06, "loss": 0.06518745422363281, "step": 157910 }, { "epoch": 1.3654443109009002, "grad_norm": 10.368969922191923, "learning_rate": 1.3709168273379857e-06, "loss": 0.08035964965820312, "step": 157915 }, { "epoch": 1.3654875444224435, "grad_norm": 0.40851058597240797, "learning_rate": 1.3707457533428121e-06, "loss": 0.029216194152832033, "step": 157920 }, { "epoch": 1.3655307779439867, "grad_norm": 8.233460549923757, "learning_rate": 1.3705746868616162e-06, "loss": 0.06313629150390625, "step": 157925 }, { "epoch": 1.36557401146553, "grad_norm": 0.1224273404904853, "learning_rate": 1.3704036278951863e-06, "loss": 0.005996036529541016, "step": 157930 }, { "epoch": 1.3656172449870732, "grad_norm": 61.38008103919972, "learning_rate": 1.3702325764443105e-06, "loss": 0.46502227783203126, "step": 157935 }, { "epoch": 1.3656604785086164, "grad_norm": 6.040470134646063, "learning_rate": 1.3700615325097781e-06, "loss": 0.08377208709716796, "step": 157940 }, { "epoch": 1.3657037120301596, "grad_norm": 1.0435647798154866, "learning_rate": 1.369890496092378e-06, "loss": 0.006362152099609375, "step": 157945 }, { "epoch": 1.3657469455517028, "grad_norm": 2.464908439579328, "learning_rate": 1.3697194671928978e-06, "loss": 0.11142940521240234, "step": 157950 }, { "epoch": 1.3657901790732463, "grad_norm": 1.3163397844106355, "learning_rate": 1.3695484458121283e-06, "loss": 0.05016021728515625, "step": 157955 }, { "epoch": 1.3658334125947895, "grad_norm": 24.1413362891499, "learning_rate": 1.3693774319508575e-06, "loss": 0.14711456298828124, "step": 157960 }, { "epoch": 1.3658766461163327, "grad_norm": 4.5022660025605745, "learning_rate": 1.369206425609874e-06, "loss": 0.0246826171875, "step": 157965 }, { "epoch": 1.365919879637876, "grad_norm": 0.8434519942725418, "learning_rate": 1.369035426789965e-06, "loss": 0.042665863037109376, "step": 157970 }, { "epoch": 1.3659631131594192, "grad_norm": 0.33017370301046756, "learning_rate": 1.3688644354919216e-06, "loss": 0.11911640167236329, "step": 157975 }, { "epoch": 1.3660063466809627, "grad_norm": 8.098703853487068, "learning_rate": 1.3686934517165304e-06, "loss": 0.09359283447265625, "step": 157980 }, { "epoch": 1.3660495802025059, "grad_norm": 0.06834819337251888, "learning_rate": 1.3685224754645817e-06, "loss": 0.09487228393554688, "step": 157985 }, { "epoch": 1.3660928137240491, "grad_norm": 23.999065320872138, "learning_rate": 1.3683515067368636e-06, "loss": 0.0899810791015625, "step": 157990 }, { "epoch": 1.3661360472455923, "grad_norm": 4.618155893296768, "learning_rate": 1.3681805455341637e-06, "loss": 0.08807849884033203, "step": 157995 }, { "epoch": 1.3661792807671356, "grad_norm": 2.9820885379088597, "learning_rate": 1.368009591857271e-06, "loss": 0.20302276611328124, "step": 158000 }, { "epoch": 1.3662225142886788, "grad_norm": 3.4602824745251177, "learning_rate": 1.367838645706974e-06, "loss": 0.02539215087890625, "step": 158005 }, { "epoch": 1.366265747810222, "grad_norm": 1.2884623398232131, "learning_rate": 1.367667707084061e-06, "loss": 0.010165786743164063, "step": 158010 }, { "epoch": 1.3663089813317653, "grad_norm": 0.5441485646014439, "learning_rate": 1.3674967759893191e-06, "loss": 0.020722579956054688, "step": 158015 }, { "epoch": 1.3663522148533087, "grad_norm": 5.419737277227587, "learning_rate": 1.367325852423539e-06, "loss": 0.06425933837890625, "step": 158020 }, { "epoch": 1.366395448374852, "grad_norm": 2.164912867017548, "learning_rate": 1.3671549363875079e-06, "loss": 0.06012458801269531, "step": 158025 }, { "epoch": 1.3664386818963952, "grad_norm": 0.6406618434937839, "learning_rate": 1.3669840278820124e-06, "loss": 0.08887176513671875, "step": 158030 }, { "epoch": 1.3664819154179384, "grad_norm": 15.921118063386858, "learning_rate": 1.3668131269078437e-06, "loss": 0.11873970031738282, "step": 158035 }, { "epoch": 1.3665251489394818, "grad_norm": 0.5781668089805804, "learning_rate": 1.3666422334657887e-06, "loss": 0.007846832275390625, "step": 158040 }, { "epoch": 1.366568382461025, "grad_norm": 4.99809419371283, "learning_rate": 1.3664713475566342e-06, "loss": 0.041253662109375, "step": 158045 }, { "epoch": 1.3666116159825683, "grad_norm": 21.510121643029155, "learning_rate": 1.3663004691811705e-06, "loss": 0.054794692993164064, "step": 158050 }, { "epoch": 1.3666548495041115, "grad_norm": 1.3242012624304114, "learning_rate": 1.3661295983401847e-06, "loss": 0.10000839233398437, "step": 158055 }, { "epoch": 1.3666980830256548, "grad_norm": 0.1546960849642121, "learning_rate": 1.3659587350344646e-06, "loss": 0.017761993408203124, "step": 158060 }, { "epoch": 1.366741316547198, "grad_norm": 1.6232936651558267, "learning_rate": 1.3657878792647987e-06, "loss": 0.0224365234375, "step": 158065 }, { "epoch": 1.3667845500687412, "grad_norm": 0.17252209354599013, "learning_rate": 1.3656170310319746e-06, "loss": 0.07231521606445312, "step": 158070 }, { "epoch": 1.3668277835902845, "grad_norm": 1.2565706747604588, "learning_rate": 1.3654461903367794e-06, "loss": 0.03483657836914063, "step": 158075 }, { "epoch": 1.366871017111828, "grad_norm": 0.6281416922410599, "learning_rate": 1.365275357180003e-06, "loss": 0.10078125, "step": 158080 }, { "epoch": 1.3669142506333711, "grad_norm": 6.742114429846426, "learning_rate": 1.3651045315624308e-06, "loss": 0.01538543701171875, "step": 158085 }, { "epoch": 1.3669574841549144, "grad_norm": 0.5022968932729022, "learning_rate": 1.3649337134848534e-06, "loss": 0.02297210693359375, "step": 158090 }, { "epoch": 1.3670007176764576, "grad_norm": 4.648732162109755, "learning_rate": 1.3647629029480572e-06, "loss": 0.059659385681152345, "step": 158095 }, { "epoch": 1.3670439511980008, "grad_norm": 0.11535526296171315, "learning_rate": 1.3645920999528302e-06, "loss": 0.08939971923828124, "step": 158100 }, { "epoch": 1.3670871847195443, "grad_norm": 9.651683385389854, "learning_rate": 1.3644213044999595e-06, "loss": 0.037880325317382814, "step": 158105 }, { "epoch": 1.3671304182410875, "grad_norm": 0.06573044381780956, "learning_rate": 1.3642505165902325e-06, "loss": 0.018128585815429688, "step": 158110 }, { "epoch": 1.3671736517626307, "grad_norm": 0.7019961462417398, "learning_rate": 1.3640797362244385e-06, "loss": 0.084832763671875, "step": 158115 }, { "epoch": 1.367216885284174, "grad_norm": 0.16810138854624337, "learning_rate": 1.3639089634033643e-06, "loss": 0.23518142700195313, "step": 158120 }, { "epoch": 1.3672601188057172, "grad_norm": 0.6683194704411969, "learning_rate": 1.3637381981277972e-06, "loss": 0.04434356689453125, "step": 158125 }, { "epoch": 1.3673033523272604, "grad_norm": 10.420013878920344, "learning_rate": 1.3635674403985252e-06, "loss": 0.04707260131835937, "step": 158130 }, { "epoch": 1.3673465858488036, "grad_norm": 1.0276858052097986, "learning_rate": 1.3633966902163343e-06, "loss": 0.048343658447265625, "step": 158135 }, { "epoch": 1.3673898193703469, "grad_norm": 0.7909927416489808, "learning_rate": 1.3632259475820145e-06, "loss": 0.068719482421875, "step": 158140 }, { "epoch": 1.3674330528918903, "grad_norm": 5.8589456722874, "learning_rate": 1.3630552124963506e-06, "loss": 0.027837753295898438, "step": 158145 }, { "epoch": 1.3674762864134336, "grad_norm": 0.26120613142012045, "learning_rate": 1.362884484960133e-06, "loss": 0.44858589172363283, "step": 158150 }, { "epoch": 1.3675195199349768, "grad_norm": 33.98398336002449, "learning_rate": 1.362713764974147e-06, "loss": 0.05988006591796875, "step": 158155 }, { "epoch": 1.36756275345652, "grad_norm": 1.9055433840582854, "learning_rate": 1.3625430525391805e-06, "loss": 0.0612030029296875, "step": 158160 }, { "epoch": 1.3676059869780632, "grad_norm": 0.09219770049499083, "learning_rate": 1.3623723476560213e-06, "loss": 0.06253433227539062, "step": 158165 }, { "epoch": 1.3676492204996067, "grad_norm": 4.908068389912188, "learning_rate": 1.3622016503254556e-06, "loss": 0.01535491943359375, "step": 158170 }, { "epoch": 1.36769245402115, "grad_norm": 0.4382792190456775, "learning_rate": 1.36203096054827e-06, "loss": 0.008538055419921874, "step": 158175 }, { "epoch": 1.3677356875426931, "grad_norm": 0.4205340292896141, "learning_rate": 1.3618602783252542e-06, "loss": 0.028952789306640626, "step": 158180 }, { "epoch": 1.3677789210642364, "grad_norm": 6.291836901341886, "learning_rate": 1.3616896036571943e-06, "loss": 0.048918533325195315, "step": 158185 }, { "epoch": 1.3678221545857796, "grad_norm": 1.790510119639229, "learning_rate": 1.361518936544877e-06, "loss": 0.028226852416992188, "step": 158190 }, { "epoch": 1.3678653881073228, "grad_norm": 6.3058640195249485, "learning_rate": 1.361348276989088e-06, "loss": 0.06832046508789062, "step": 158195 }, { "epoch": 1.367908621628866, "grad_norm": 0.5822738029274525, "learning_rate": 1.3611776249906177e-06, "loss": 0.06033649444580078, "step": 158200 }, { "epoch": 1.3679518551504093, "grad_norm": 1.7542038209664197, "learning_rate": 1.3610069805502498e-06, "loss": 0.19233016967773436, "step": 158205 }, { "epoch": 1.3679950886719527, "grad_norm": 8.75135251474119, "learning_rate": 1.3608363436687746e-06, "loss": 0.07019805908203125, "step": 158210 }, { "epoch": 1.368038322193496, "grad_norm": 8.455695426129283, "learning_rate": 1.3606657143469768e-06, "loss": 0.07031974792480469, "step": 158215 }, { "epoch": 1.3680815557150392, "grad_norm": 0.3996674881567622, "learning_rate": 1.3604950925856442e-06, "loss": 0.03418121337890625, "step": 158220 }, { "epoch": 1.3681247892365824, "grad_norm": 0.5510027210944046, "learning_rate": 1.360324478385563e-06, "loss": 0.0571746826171875, "step": 158225 }, { "epoch": 1.3681680227581259, "grad_norm": 0.44764088570210736, "learning_rate": 1.3601538717475207e-06, "loss": 0.011173248291015625, "step": 158230 }, { "epoch": 1.368211256279669, "grad_norm": 0.5005278887978376, "learning_rate": 1.3599832726723025e-06, "loss": 0.026160049438476562, "step": 158235 }, { "epoch": 1.3682544898012123, "grad_norm": 3.4016523711504143, "learning_rate": 1.359812681160698e-06, "loss": 0.012038612365722656, "step": 158240 }, { "epoch": 1.3682977233227556, "grad_norm": 0.5251323743087468, "learning_rate": 1.3596420972134923e-06, "loss": 0.013494873046875, "step": 158245 }, { "epoch": 1.3683409568442988, "grad_norm": 13.100907573362655, "learning_rate": 1.3594715208314724e-06, "loss": 0.11202392578125, "step": 158250 }, { "epoch": 1.368384190365842, "grad_norm": 0.447979967938239, "learning_rate": 1.3593009520154233e-06, "loss": 0.11002960205078124, "step": 158255 }, { "epoch": 1.3684274238873853, "grad_norm": 23.150234372059092, "learning_rate": 1.359130390766135e-06, "loss": 0.11695175170898438, "step": 158260 }, { "epoch": 1.3684706574089285, "grad_norm": 1.641477216572189, "learning_rate": 1.358959837084392e-06, "loss": 0.015198516845703124, "step": 158265 }, { "epoch": 1.368513890930472, "grad_norm": 1.9981387554474561, "learning_rate": 1.35878929097098e-06, "loss": 0.17994155883789062, "step": 158270 }, { "epoch": 1.3685571244520152, "grad_norm": 15.53572929896691, "learning_rate": 1.358618752426688e-06, "loss": 0.04352645874023438, "step": 158275 }, { "epoch": 1.3686003579735584, "grad_norm": 1.0371390084486043, "learning_rate": 1.3584482214523015e-06, "loss": 0.12860527038574218, "step": 158280 }, { "epoch": 1.3686435914951016, "grad_norm": 3.0428005659412714, "learning_rate": 1.3582776980486067e-06, "loss": 0.07480278015136718, "step": 158285 }, { "epoch": 1.3686868250166448, "grad_norm": 12.13687185142276, "learning_rate": 1.35810718221639e-06, "loss": 0.08579978942871094, "step": 158290 }, { "epoch": 1.3687300585381883, "grad_norm": 7.339934538274015, "learning_rate": 1.3579366739564377e-06, "loss": 0.25025405883789065, "step": 158295 }, { "epoch": 1.3687732920597315, "grad_norm": 1.2758006138780003, "learning_rate": 1.3577661732695356e-06, "loss": 0.007590484619140625, "step": 158300 }, { "epoch": 1.3688165255812748, "grad_norm": 43.58011137422001, "learning_rate": 1.357595680156472e-06, "loss": 0.17128868103027345, "step": 158305 }, { "epoch": 1.368859759102818, "grad_norm": 6.632013137248088, "learning_rate": 1.3574251946180302e-06, "loss": 0.0316864013671875, "step": 158310 }, { "epoch": 1.3689029926243612, "grad_norm": 28.014940173008874, "learning_rate": 1.357254716655e-06, "loss": 0.09543724060058593, "step": 158315 }, { "epoch": 1.3689462261459044, "grad_norm": 0.3489026532744066, "learning_rate": 1.3570842462681657e-06, "loss": 0.0038349151611328123, "step": 158320 }, { "epoch": 1.3689894596674477, "grad_norm": 0.43907695977594885, "learning_rate": 1.3569137834583134e-06, "loss": 0.13629226684570311, "step": 158325 }, { "epoch": 1.369032693188991, "grad_norm": 0.7766781032698421, "learning_rate": 1.356743328226229e-06, "loss": 0.04629249572753906, "step": 158330 }, { "epoch": 1.3690759267105344, "grad_norm": 1.2611330438027955, "learning_rate": 1.3565728805727e-06, "loss": 0.009910964965820312, "step": 158335 }, { "epoch": 1.3691191602320776, "grad_norm": 30.075909696496804, "learning_rate": 1.3564024404985117e-06, "loss": 0.104901123046875, "step": 158340 }, { "epoch": 1.3691623937536208, "grad_norm": 0.7992207285891357, "learning_rate": 1.3562320080044502e-06, "loss": 0.11518497467041015, "step": 158345 }, { "epoch": 1.369205627275164, "grad_norm": 0.5194382647169642, "learning_rate": 1.356061583091301e-06, "loss": 0.010573959350585938, "step": 158350 }, { "epoch": 1.3692488607967073, "grad_norm": 6.865522876742652, "learning_rate": 1.3558911657598509e-06, "loss": 0.020836639404296874, "step": 158355 }, { "epoch": 1.3692920943182507, "grad_norm": 2.4268636254277403, "learning_rate": 1.3557207560108843e-06, "loss": 0.05883636474609375, "step": 158360 }, { "epoch": 1.369335327839794, "grad_norm": 0.9174530027354197, "learning_rate": 1.3555503538451894e-06, "loss": 0.05976295471191406, "step": 158365 }, { "epoch": 1.3693785613613372, "grad_norm": 1.531942477679472, "learning_rate": 1.3553799592635499e-06, "loss": 0.049219512939453126, "step": 158370 }, { "epoch": 1.3694217948828804, "grad_norm": 2.7601304272823652, "learning_rate": 1.3552095722667535e-06, "loss": 0.23991851806640624, "step": 158375 }, { "epoch": 1.3694650284044236, "grad_norm": 4.679315906970474, "learning_rate": 1.3550391928555857e-06, "loss": 0.0556950569152832, "step": 158380 }, { "epoch": 1.3695082619259669, "grad_norm": 2.614309496796504, "learning_rate": 1.3548688210308316e-06, "loss": 0.035115814208984374, "step": 158385 }, { "epoch": 1.36955149544751, "grad_norm": 4.782641950330502, "learning_rate": 1.3546984567932769e-06, "loss": 0.026554107666015625, "step": 158390 }, { "epoch": 1.3695947289690533, "grad_norm": 3.116627169937544, "learning_rate": 1.3545281001437067e-06, "loss": 0.29320526123046875, "step": 158395 }, { "epoch": 1.3696379624905968, "grad_norm": 12.357034522545653, "learning_rate": 1.3543577510829087e-06, "loss": 0.027099227905273436, "step": 158400 }, { "epoch": 1.36968119601214, "grad_norm": 15.16231872263971, "learning_rate": 1.354187409611667e-06, "loss": 0.07026214599609375, "step": 158405 }, { "epoch": 1.3697244295336832, "grad_norm": 2.405227904050991, "learning_rate": 1.3540170757307678e-06, "loss": 0.07901115417480468, "step": 158410 }, { "epoch": 1.3697676630552265, "grad_norm": 6.322418561157022, "learning_rate": 1.3538467494409962e-06, "loss": 0.02590484619140625, "step": 158415 }, { "epoch": 1.3698108965767697, "grad_norm": 10.098407462012563, "learning_rate": 1.353676430743137e-06, "loss": 0.09794197082519532, "step": 158420 }, { "epoch": 1.3698541300983131, "grad_norm": 1.107248684700548, "learning_rate": 1.3535061196379775e-06, "loss": 0.08556671142578125, "step": 158425 }, { "epoch": 1.3698973636198564, "grad_norm": 2.3938727643754514, "learning_rate": 1.3533358161263012e-06, "loss": 0.10198860168457032, "step": 158430 }, { "epoch": 1.3699405971413996, "grad_norm": 21.397674030877592, "learning_rate": 1.3531655202088962e-06, "loss": 0.07205162048339844, "step": 158435 }, { "epoch": 1.3699838306629428, "grad_norm": 40.692297461557544, "learning_rate": 1.352995231886546e-06, "loss": 0.06568927764892578, "step": 158440 }, { "epoch": 1.370027064184486, "grad_norm": 4.91281872196945, "learning_rate": 1.352824951160036e-06, "loss": 0.200506591796875, "step": 158445 }, { "epoch": 1.3700702977060293, "grad_norm": 0.5654992999647684, "learning_rate": 1.3526546780301522e-06, "loss": 0.020446205139160158, "step": 158450 }, { "epoch": 1.3701135312275725, "grad_norm": 56.672429498899234, "learning_rate": 1.3524844124976795e-06, "loss": 0.08116168975830078, "step": 158455 }, { "epoch": 1.3701567647491157, "grad_norm": 2.3600607944378247, "learning_rate": 1.3523141545634017e-06, "loss": 0.0254486083984375, "step": 158460 }, { "epoch": 1.3701999982706592, "grad_norm": 60.321601212488815, "learning_rate": 1.3521439042281064e-06, "loss": 0.18553447723388672, "step": 158465 }, { "epoch": 1.3702432317922024, "grad_norm": 53.57014453499433, "learning_rate": 1.3519736614925782e-06, "loss": 0.18397445678710939, "step": 158470 }, { "epoch": 1.3702864653137456, "grad_norm": 2.009449001349762, "learning_rate": 1.3518034263576017e-06, "loss": 0.059153079986572266, "step": 158475 }, { "epoch": 1.3703296988352889, "grad_norm": 6.943470623351222, "learning_rate": 1.3516331988239609e-06, "loss": 0.04116134643554688, "step": 158480 }, { "epoch": 1.3703729323568323, "grad_norm": 1.1336012227670742, "learning_rate": 1.3514629788924435e-06, "loss": 0.07618179321289062, "step": 158485 }, { "epoch": 1.3704161658783756, "grad_norm": 8.819380140477213, "learning_rate": 1.3512927665638316e-06, "loss": 0.07305335998535156, "step": 158490 }, { "epoch": 1.3704593993999188, "grad_norm": 0.11504651203185559, "learning_rate": 1.3511225618389136e-06, "loss": 0.15417861938476562, "step": 158495 }, { "epoch": 1.370502632921462, "grad_norm": 0.06736998763651043, "learning_rate": 1.3509523647184723e-06, "loss": 0.03147315979003906, "step": 158500 }, { "epoch": 1.3705458664430052, "grad_norm": 7.375422946844913, "learning_rate": 1.3507821752032929e-06, "loss": 0.11700286865234374, "step": 158505 }, { "epoch": 1.3705890999645485, "grad_norm": 0.45589256948985324, "learning_rate": 1.3506119932941604e-06, "loss": 0.11722221374511718, "step": 158510 }, { "epoch": 1.3706323334860917, "grad_norm": 0.10110399510202336, "learning_rate": 1.3504418189918596e-06, "loss": 0.033869171142578126, "step": 158515 }, { "epoch": 1.370675567007635, "grad_norm": 1.8126381761122838, "learning_rate": 1.3502716522971755e-06, "loss": 0.03681106567382812, "step": 158520 }, { "epoch": 1.3707188005291784, "grad_norm": 0.1825459908531586, "learning_rate": 1.3501014932108917e-06, "loss": 0.014183425903320312, "step": 158525 }, { "epoch": 1.3707620340507216, "grad_norm": 1.2752995016232234, "learning_rate": 1.349931341733795e-06, "loss": 0.0828826904296875, "step": 158530 }, { "epoch": 1.3708052675722648, "grad_norm": 24.978422442318582, "learning_rate": 1.3497611978666682e-06, "loss": 0.13024444580078126, "step": 158535 }, { "epoch": 1.370848501093808, "grad_norm": 0.22297159822247606, "learning_rate": 1.3495910616102979e-06, "loss": 0.13276138305664062, "step": 158540 }, { "epoch": 1.3708917346153513, "grad_norm": 0.5843989919429242, "learning_rate": 1.3494209329654677e-06, "loss": 0.027513885498046876, "step": 158545 }, { "epoch": 1.3709349681368947, "grad_norm": 0.4506622902069121, "learning_rate": 1.3492508119329624e-06, "loss": 0.021868133544921876, "step": 158550 }, { "epoch": 1.370978201658438, "grad_norm": 5.162876617569043, "learning_rate": 1.3490806985135656e-06, "loss": 0.15288925170898438, "step": 158555 }, { "epoch": 1.3710214351799812, "grad_norm": 0.13510846712786, "learning_rate": 1.3489105927080636e-06, "loss": 0.0591583251953125, "step": 158560 }, { "epoch": 1.3710646687015244, "grad_norm": 0.3100971582550239, "learning_rate": 1.34874049451724e-06, "loss": 0.28934173583984374, "step": 158565 }, { "epoch": 1.3711079022230677, "grad_norm": 6.715211234040585, "learning_rate": 1.3485704039418799e-06, "loss": 0.17873930931091309, "step": 158570 }, { "epoch": 1.371151135744611, "grad_norm": 8.403828037838478, "learning_rate": 1.3484003209827664e-06, "loss": 0.05074920654296875, "step": 158575 }, { "epoch": 1.3711943692661541, "grad_norm": 5.8834962682300915, "learning_rate": 1.3482302456406851e-06, "loss": 0.20370063781738282, "step": 158580 }, { "epoch": 1.3712376027876974, "grad_norm": 4.335296865013212, "learning_rate": 1.3480601779164188e-06, "loss": 0.06322784423828125, "step": 158585 }, { "epoch": 1.3712808363092408, "grad_norm": 15.142501938960242, "learning_rate": 1.347890117810754e-06, "loss": 0.03762054443359375, "step": 158590 }, { "epoch": 1.371324069830784, "grad_norm": 3.0537112495534484, "learning_rate": 1.3477200653244732e-06, "loss": 0.12778472900390625, "step": 158595 }, { "epoch": 1.3713673033523273, "grad_norm": 11.548337896483964, "learning_rate": 1.3475500204583621e-06, "loss": 0.28623046875, "step": 158600 }, { "epoch": 1.3714105368738705, "grad_norm": 2.001101652340949, "learning_rate": 1.3473799832132045e-06, "loss": 0.11125965118408203, "step": 158605 }, { "epoch": 1.3714537703954137, "grad_norm": 6.472302117330893, "learning_rate": 1.3472099535897844e-06, "loss": 0.2211151123046875, "step": 158610 }, { "epoch": 1.3714970039169572, "grad_norm": 0.013507602712238023, "learning_rate": 1.347039931588886e-06, "loss": 0.025943756103515625, "step": 158615 }, { "epoch": 1.3715402374385004, "grad_norm": 0.14400205728437127, "learning_rate": 1.3468699172112922e-06, "loss": 0.0151275634765625, "step": 158620 }, { "epoch": 1.3715834709600436, "grad_norm": 0.2079295344419875, "learning_rate": 1.3466999104577894e-06, "loss": 0.048462295532226564, "step": 158625 }, { "epoch": 1.3716267044815869, "grad_norm": 8.834515235877813, "learning_rate": 1.3465299113291605e-06, "loss": 0.04740333557128906, "step": 158630 }, { "epoch": 1.37166993800313, "grad_norm": 0.25913380767732713, "learning_rate": 1.3463599198261895e-06, "loss": 0.17261962890625, "step": 158635 }, { "epoch": 1.3717131715246733, "grad_norm": 0.40279148455760666, "learning_rate": 1.3461899359496605e-06, "loss": 0.3175617218017578, "step": 158640 }, { "epoch": 1.3717564050462165, "grad_norm": 0.7049150715934821, "learning_rate": 1.3460199597003565e-06, "loss": 0.08502578735351562, "step": 158645 }, { "epoch": 1.3717996385677598, "grad_norm": 1.788740017398703, "learning_rate": 1.345849991079063e-06, "loss": 0.04019355773925781, "step": 158650 }, { "epoch": 1.3718428720893032, "grad_norm": 1.2555131427739277, "learning_rate": 1.3456800300865624e-06, "loss": 0.1040557861328125, "step": 158655 }, { "epoch": 1.3718861056108465, "grad_norm": 0.8492320188239337, "learning_rate": 1.3455100767236401e-06, "loss": 0.2310558319091797, "step": 158660 }, { "epoch": 1.3719293391323897, "grad_norm": 1.0144131550724593, "learning_rate": 1.3453401309910793e-06, "loss": 0.005317115783691406, "step": 158665 }, { "epoch": 1.371972572653933, "grad_norm": 2.853272714692328, "learning_rate": 1.3451701928896638e-06, "loss": 0.07915496826171875, "step": 158670 }, { "epoch": 1.3720158061754761, "grad_norm": 5.65331370720894, "learning_rate": 1.345000262420177e-06, "loss": 0.05120086669921875, "step": 158675 }, { "epoch": 1.3720590396970196, "grad_norm": 16.831360244323868, "learning_rate": 1.3448303395834014e-06, "loss": 0.024574089050292968, "step": 158680 }, { "epoch": 1.3721022732185628, "grad_norm": 0.22794612692485217, "learning_rate": 1.3446604243801236e-06, "loss": 0.02763824462890625, "step": 158685 }, { "epoch": 1.372145506740106, "grad_norm": 6.43607374266512, "learning_rate": 1.3444905168111254e-06, "loss": 0.025228118896484374, "step": 158690 }, { "epoch": 1.3721887402616493, "grad_norm": 2.8490877757856374, "learning_rate": 1.3443206168771907e-06, "loss": 0.19144096374511718, "step": 158695 }, { "epoch": 1.3722319737831925, "grad_norm": 2.5768695950641725, "learning_rate": 1.3441507245791028e-06, "loss": 0.07293663024902344, "step": 158700 }, { "epoch": 1.3722752073047357, "grad_norm": 9.830130050106611, "learning_rate": 1.3439808399176446e-06, "loss": 0.01617889404296875, "step": 158705 }, { "epoch": 1.372318440826279, "grad_norm": 3.587346122742665, "learning_rate": 1.3438109628936015e-06, "loss": 0.04973907470703125, "step": 158710 }, { "epoch": 1.3723616743478222, "grad_norm": 0.9579892032985998, "learning_rate": 1.3436410935077546e-06, "loss": 0.03708343505859375, "step": 158715 }, { "epoch": 1.3724049078693656, "grad_norm": 9.697128975234934, "learning_rate": 1.34347123176089e-06, "loss": 0.026261520385742188, "step": 158720 }, { "epoch": 1.3724481413909089, "grad_norm": 10.945906360627538, "learning_rate": 1.3433013776537893e-06, "loss": 0.08042774200439454, "step": 158725 }, { "epoch": 1.372491374912452, "grad_norm": 0.20060728725305818, "learning_rate": 1.3431315311872369e-06, "loss": 0.06698417663574219, "step": 158730 }, { "epoch": 1.3725346084339953, "grad_norm": 2.7419246040073557, "learning_rate": 1.342961692362015e-06, "loss": 0.09625320434570313, "step": 158735 }, { "epoch": 1.3725778419555388, "grad_norm": 4.365901611589923, "learning_rate": 1.3427918611789072e-06, "loss": 0.094342041015625, "step": 158740 }, { "epoch": 1.372621075477082, "grad_norm": 1.3236103197741722, "learning_rate": 1.3426220376386964e-06, "loss": 0.02409210205078125, "step": 158745 }, { "epoch": 1.3726643089986252, "grad_norm": 0.1715633873894811, "learning_rate": 1.342452221742167e-06, "loss": 0.024899673461914063, "step": 158750 }, { "epoch": 1.3727075425201685, "grad_norm": 10.987990553580984, "learning_rate": 1.3422824134901014e-06, "loss": 0.0282867431640625, "step": 158755 }, { "epoch": 1.3727507760417117, "grad_norm": 0.27853387821944375, "learning_rate": 1.342112612883282e-06, "loss": 0.042516326904296874, "step": 158760 }, { "epoch": 1.372794009563255, "grad_norm": 0.9360614485090797, "learning_rate": 1.3419428199224936e-06, "loss": 0.05490875244140625, "step": 158765 }, { "epoch": 1.3728372430847982, "grad_norm": 11.440038685293581, "learning_rate": 1.3417730346085187e-06, "loss": 0.03519706726074219, "step": 158770 }, { "epoch": 1.3728804766063414, "grad_norm": 0.47815877946563506, "learning_rate": 1.3416032569421399e-06, "loss": 0.03696746826171875, "step": 158775 }, { "epoch": 1.3729237101278848, "grad_norm": 55.649676832250314, "learning_rate": 1.341433486924139e-06, "loss": 0.20856170654296874, "step": 158780 }, { "epoch": 1.372966943649428, "grad_norm": 10.372238708875972, "learning_rate": 1.3412637245553018e-06, "loss": 0.061248397827148436, "step": 158785 }, { "epoch": 1.3730101771709713, "grad_norm": 5.292358237652709, "learning_rate": 1.3410939698364095e-06, "loss": 0.022276687622070312, "step": 158790 }, { "epoch": 1.3730534106925145, "grad_norm": 1.7860258414098882, "learning_rate": 1.3409242227682455e-06, "loss": 0.04850616455078125, "step": 158795 }, { "epoch": 1.3730966442140577, "grad_norm": 0.7362590208899485, "learning_rate": 1.3407544833515918e-06, "loss": 0.0405792236328125, "step": 158800 }, { "epoch": 1.3731398777356012, "grad_norm": 0.189210668570189, "learning_rate": 1.3405847515872325e-06, "loss": 0.060986328125, "step": 158805 }, { "epoch": 1.3731831112571444, "grad_norm": 0.20779324598760812, "learning_rate": 1.3404150274759483e-06, "loss": 0.1712890625, "step": 158810 }, { "epoch": 1.3732263447786877, "grad_norm": 9.615381749069051, "learning_rate": 1.3402453110185242e-06, "loss": 0.08601722717285157, "step": 158815 }, { "epoch": 1.3732695783002309, "grad_norm": 0.20131668869474958, "learning_rate": 1.3400756022157415e-06, "loss": 0.03822784423828125, "step": 158820 }, { "epoch": 1.3733128118217741, "grad_norm": 74.3731059362338, "learning_rate": 1.339905901068384e-06, "loss": 0.15722427368164063, "step": 158825 }, { "epoch": 1.3733560453433173, "grad_norm": 7.452641662487066, "learning_rate": 1.3397362075772341e-06, "loss": 0.0572418212890625, "step": 158830 }, { "epoch": 1.3733992788648606, "grad_norm": 0.35419249561381927, "learning_rate": 1.3395665217430741e-06, "loss": 0.08096237182617187, "step": 158835 }, { "epoch": 1.3734425123864038, "grad_norm": 0.9839039313662745, "learning_rate": 1.3393968435666853e-06, "loss": 0.033667373657226565, "step": 158840 }, { "epoch": 1.3734857459079473, "grad_norm": 0.42177459618269647, "learning_rate": 1.3392271730488529e-06, "loss": 0.06389274597167968, "step": 158845 }, { "epoch": 1.3735289794294905, "grad_norm": 8.799557835653143, "learning_rate": 1.339057510190358e-06, "loss": 0.10272140502929687, "step": 158850 }, { "epoch": 1.3735722129510337, "grad_norm": 0.297750621793198, "learning_rate": 1.338887854991983e-06, "loss": 0.04362640380859375, "step": 158855 }, { "epoch": 1.373615446472577, "grad_norm": 52.48616107554594, "learning_rate": 1.3387182074545103e-06, "loss": 0.5010047912597656, "step": 158860 }, { "epoch": 1.3736586799941202, "grad_norm": 2.3954952853970872, "learning_rate": 1.3385485675787226e-06, "loss": 0.03415184020996094, "step": 158865 }, { "epoch": 1.3737019135156636, "grad_norm": 11.140567526840865, "learning_rate": 1.3383789353654008e-06, "loss": 0.10679130554199219, "step": 158870 }, { "epoch": 1.3737451470372068, "grad_norm": 13.534121068776187, "learning_rate": 1.3382093108153298e-06, "loss": 0.0895904541015625, "step": 158875 }, { "epoch": 1.37378838055875, "grad_norm": 3.6884392059116253, "learning_rate": 1.3380396939292896e-06, "loss": 0.16731338500976561, "step": 158880 }, { "epoch": 1.3738316140802933, "grad_norm": 0.502232515550214, "learning_rate": 1.3378700847080645e-06, "loss": 0.012085723876953124, "step": 158885 }, { "epoch": 1.3738748476018365, "grad_norm": 15.046546843517994, "learning_rate": 1.3377004831524357e-06, "loss": 0.031854248046875, "step": 158890 }, { "epoch": 1.3739180811233798, "grad_norm": 0.6617512408848578, "learning_rate": 1.3375308892631852e-06, "loss": 0.05250244140625, "step": 158895 }, { "epoch": 1.373961314644923, "grad_norm": 1.840626236919761, "learning_rate": 1.3373613030410957e-06, "loss": 0.11068344116210938, "step": 158900 }, { "epoch": 1.3740045481664662, "grad_norm": 5.236030825491459, "learning_rate": 1.3371917244869477e-06, "loss": 0.03101348876953125, "step": 158905 }, { "epoch": 1.3740477816880097, "grad_norm": 11.403538144925326, "learning_rate": 1.3370221536015256e-06, "loss": 0.0266265869140625, "step": 158910 }, { "epoch": 1.374091015209553, "grad_norm": 0.7777275031017802, "learning_rate": 1.3368525903856103e-06, "loss": 0.02562408447265625, "step": 158915 }, { "epoch": 1.3741342487310961, "grad_norm": 1.3948417975758682, "learning_rate": 1.3366830348399843e-06, "loss": 0.17923736572265625, "step": 158920 }, { "epoch": 1.3741774822526394, "grad_norm": 4.509269079832437, "learning_rate": 1.3365134869654285e-06, "loss": 0.06729278564453126, "step": 158925 }, { "epoch": 1.3742207157741826, "grad_norm": 6.114220955153033, "learning_rate": 1.336343946762725e-06, "loss": 0.038623809814453125, "step": 158930 }, { "epoch": 1.374263949295726, "grad_norm": 1.1748248039499496, "learning_rate": 1.336174414232656e-06, "loss": 0.0488800048828125, "step": 158935 }, { "epoch": 1.3743071828172693, "grad_norm": 9.042421112870983, "learning_rate": 1.3360048893760045e-06, "loss": 0.04809150695800781, "step": 158940 }, { "epoch": 1.3743504163388125, "grad_norm": 4.782597364763274, "learning_rate": 1.3358353721935517e-06, "loss": 0.07999916076660156, "step": 158945 }, { "epoch": 1.3743936498603557, "grad_norm": 0.09534940664427233, "learning_rate": 1.335665862686079e-06, "loss": 0.017885208129882812, "step": 158950 }, { "epoch": 1.374436883381899, "grad_norm": 49.84563042991477, "learning_rate": 1.3354963608543684e-06, "loss": 0.18864669799804687, "step": 158955 }, { "epoch": 1.3744801169034422, "grad_norm": 11.205933177937032, "learning_rate": 1.3353268666992011e-06, "loss": 0.040593719482421874, "step": 158960 }, { "epoch": 1.3745233504249854, "grad_norm": 7.206630512822025, "learning_rate": 1.3351573802213595e-06, "loss": 0.03863525390625, "step": 158965 }, { "epoch": 1.3745665839465289, "grad_norm": 1.9718807235834976, "learning_rate": 1.3349879014216242e-06, "loss": 0.011049652099609375, "step": 158970 }, { "epoch": 1.374609817468072, "grad_norm": 5.927232288790647, "learning_rate": 1.334818430300778e-06, "loss": 0.09609527587890625, "step": 158975 }, { "epoch": 1.3746530509896153, "grad_norm": 16.142588879930152, "learning_rate": 1.3346489668596023e-06, "loss": 0.08574771881103516, "step": 158980 }, { "epoch": 1.3746962845111586, "grad_norm": 1.2576756781245895, "learning_rate": 1.3344795110988775e-06, "loss": 0.13336257934570311, "step": 158985 }, { "epoch": 1.3747395180327018, "grad_norm": 0.9414962488048592, "learning_rate": 1.3343100630193872e-06, "loss": 0.13428726196289062, "step": 158990 }, { "epoch": 1.3747827515542452, "grad_norm": 14.64356167355408, "learning_rate": 1.3341406226219113e-06, "loss": 0.1303802490234375, "step": 158995 }, { "epoch": 1.3748259850757885, "grad_norm": 1.1670107948425081, "learning_rate": 1.3339711899072308e-06, "loss": 0.052976226806640624, "step": 159000 }, { "epoch": 1.3748692185973317, "grad_norm": 1.7801515002286765, "learning_rate": 1.3338017648761293e-06, "loss": 0.025783538818359375, "step": 159005 }, { "epoch": 1.374912452118875, "grad_norm": 10.585144688930503, "learning_rate": 1.3336323475293864e-06, "loss": 0.14262161254882813, "step": 159010 }, { "epoch": 1.3749556856404181, "grad_norm": 7.969084403311556, "learning_rate": 1.3334629378677841e-06, "loss": 0.1010467529296875, "step": 159015 }, { "epoch": 1.3749989191619614, "grad_norm": 0.9034184175681564, "learning_rate": 1.3332935358921035e-06, "loss": 0.3101973533630371, "step": 159020 }, { "epoch": 1.3750421526835046, "grad_norm": 0.6093397024555881, "learning_rate": 1.3331241416031256e-06, "loss": 0.010878372192382812, "step": 159025 }, { "epoch": 1.3750853862050478, "grad_norm": 13.371300786418043, "learning_rate": 1.3329547550016308e-06, "loss": 0.04014606475830078, "step": 159030 }, { "epoch": 1.3751286197265913, "grad_norm": 5.871003433762854, "learning_rate": 1.3327853760884028e-06, "loss": 0.104388427734375, "step": 159035 }, { "epoch": 1.3751718532481345, "grad_norm": 7.677716431937186, "learning_rate": 1.3326160048642211e-06, "loss": 0.03751716613769531, "step": 159040 }, { "epoch": 1.3752150867696777, "grad_norm": 6.340339213654821, "learning_rate": 1.3324466413298658e-06, "loss": 0.029974365234375, "step": 159045 }, { "epoch": 1.375258320291221, "grad_norm": 1.1733944620986916, "learning_rate": 1.3322772854861207e-06, "loss": 0.1832275390625, "step": 159050 }, { "epoch": 1.3753015538127642, "grad_norm": 43.65312264207127, "learning_rate": 1.332107937333765e-06, "loss": 0.13847732543945312, "step": 159055 }, { "epoch": 1.3753447873343077, "grad_norm": 52.26807303884481, "learning_rate": 1.3319385968735804e-06, "loss": 0.15625457763671874, "step": 159060 }, { "epoch": 1.3753880208558509, "grad_norm": 0.24973085755518182, "learning_rate": 1.3317692641063465e-06, "loss": 0.030457305908203124, "step": 159065 }, { "epoch": 1.375431254377394, "grad_norm": 0.8384353054015394, "learning_rate": 1.3315999390328462e-06, "loss": 0.00804595947265625, "step": 159070 }, { "epoch": 1.3754744878989373, "grad_norm": 0.24111459621447812, "learning_rate": 1.33143062165386e-06, "loss": 0.09496965408325195, "step": 159075 }, { "epoch": 1.3755177214204806, "grad_norm": 6.9470600401626, "learning_rate": 1.3312613119701678e-06, "loss": 0.04040260314941406, "step": 159080 }, { "epoch": 1.3755609549420238, "grad_norm": 3.4812194174241373, "learning_rate": 1.3310920099825514e-06, "loss": 0.06007881164550781, "step": 159085 }, { "epoch": 1.375604188463567, "grad_norm": 0.18428530389632447, "learning_rate": 1.3309227156917908e-06, "loss": 0.010599327087402344, "step": 159090 }, { "epoch": 1.3756474219851103, "grad_norm": 0.15728894336699795, "learning_rate": 1.3307534290986664e-06, "loss": 0.1353456497192383, "step": 159095 }, { "epoch": 1.3756906555066537, "grad_norm": 2.464696074065901, "learning_rate": 1.3305841502039608e-06, "loss": 0.024573898315429686, "step": 159100 }, { "epoch": 1.375733889028197, "grad_norm": 0.06832377686173235, "learning_rate": 1.3304148790084524e-06, "loss": 0.016043853759765626, "step": 159105 }, { "epoch": 1.3757771225497402, "grad_norm": 1.8135102706978672, "learning_rate": 1.330245615512924e-06, "loss": 0.06315765380859376, "step": 159110 }, { "epoch": 1.3758203560712834, "grad_norm": 4.346023557707889, "learning_rate": 1.3300763597181553e-06, "loss": 0.030278396606445313, "step": 159115 }, { "epoch": 1.3758635895928266, "grad_norm": 0.11720872567083175, "learning_rate": 1.329907111624927e-06, "loss": 0.0417724609375, "step": 159120 }, { "epoch": 1.37590682311437, "grad_norm": 4.995936164401895, "learning_rate": 1.3297378712340195e-06, "loss": 0.02154541015625, "step": 159125 }, { "epoch": 1.3759500566359133, "grad_norm": 0.16431508098462622, "learning_rate": 1.3295686385462123e-06, "loss": 0.010225296020507812, "step": 159130 }, { "epoch": 1.3759932901574565, "grad_norm": 0.49692763889266234, "learning_rate": 1.329399413562288e-06, "loss": 0.07579421997070312, "step": 159135 }, { "epoch": 1.3760365236789998, "grad_norm": 19.494995295217876, "learning_rate": 1.3292301962830259e-06, "loss": 0.18448333740234374, "step": 159140 }, { "epoch": 1.376079757200543, "grad_norm": 5.727832239125712, "learning_rate": 1.3290609867092065e-06, "loss": 0.052064132690429685, "step": 159145 }, { "epoch": 1.3761229907220862, "grad_norm": 5.7038081915242715, "learning_rate": 1.3288917848416104e-06, "loss": 0.034454345703125, "step": 159150 }, { "epoch": 1.3761662242436294, "grad_norm": 13.887524895015567, "learning_rate": 1.3287225906810165e-06, "loss": 0.0631103515625, "step": 159155 }, { "epoch": 1.3762094577651727, "grad_norm": 0.5702461533353815, "learning_rate": 1.3285534042282062e-06, "loss": 0.026395416259765624, "step": 159160 }, { "epoch": 1.3762526912867161, "grad_norm": 6.212830683730001, "learning_rate": 1.3283842254839613e-06, "loss": 0.09412097930908203, "step": 159165 }, { "epoch": 1.3762959248082594, "grad_norm": 0.06058683903604188, "learning_rate": 1.3282150544490603e-06, "loss": 0.007506370544433594, "step": 159170 }, { "epoch": 1.3763391583298026, "grad_norm": 0.9966415642123229, "learning_rate": 1.328045891124284e-06, "loss": 0.02718982696533203, "step": 159175 }, { "epoch": 1.3763823918513458, "grad_norm": 0.40395217505475217, "learning_rate": 1.3278767355104121e-06, "loss": 0.03544807434082031, "step": 159180 }, { "epoch": 1.3764256253728893, "grad_norm": 0.898169758649437, "learning_rate": 1.327707587608225e-06, "loss": 0.020726776123046874, "step": 159185 }, { "epoch": 1.3764688588944325, "grad_norm": 6.657036039562994, "learning_rate": 1.3275384474185018e-06, "loss": 0.07516288757324219, "step": 159190 }, { "epoch": 1.3765120924159757, "grad_norm": 1.6702779305585007, "learning_rate": 1.3273693149420246e-06, "loss": 0.02306709289550781, "step": 159195 }, { "epoch": 1.376555325937519, "grad_norm": 13.213793979337142, "learning_rate": 1.327200190179572e-06, "loss": 0.13285751342773439, "step": 159200 }, { "epoch": 1.3765985594590622, "grad_norm": 2.371494072817074, "learning_rate": 1.3270310731319247e-06, "loss": 0.1159637451171875, "step": 159205 }, { "epoch": 1.3766417929806054, "grad_norm": 3.804152606385273, "learning_rate": 1.326861963799861e-06, "loss": 0.017165756225585936, "step": 159210 }, { "epoch": 1.3766850265021486, "grad_norm": 14.142669761409172, "learning_rate": 1.3266928621841635e-06, "loss": 0.073553466796875, "step": 159215 }, { "epoch": 1.3767282600236919, "grad_norm": 6.877087880165153, "learning_rate": 1.3265237682856101e-06, "loss": 0.03160076141357422, "step": 159220 }, { "epoch": 1.3767714935452353, "grad_norm": 2.618620737368354, "learning_rate": 1.3263546821049803e-06, "loss": 0.032049560546875, "step": 159225 }, { "epoch": 1.3768147270667785, "grad_norm": 7.997962593873688, "learning_rate": 1.326185603643056e-06, "loss": 0.14639739990234374, "step": 159230 }, { "epoch": 1.3768579605883218, "grad_norm": 1.9505776832853008, "learning_rate": 1.3260165329006158e-06, "loss": 0.045438194274902345, "step": 159235 }, { "epoch": 1.376901194109865, "grad_norm": 1.2593165859950366, "learning_rate": 1.3258474698784397e-06, "loss": 0.02515544891357422, "step": 159240 }, { "epoch": 1.3769444276314082, "grad_norm": 15.67909012245382, "learning_rate": 1.3256784145773066e-06, "loss": 0.20590057373046874, "step": 159245 }, { "epoch": 1.3769876611529517, "grad_norm": 4.043423519071135, "learning_rate": 1.3255093669979968e-06, "loss": 0.1481304168701172, "step": 159250 }, { "epoch": 1.377030894674495, "grad_norm": 14.40304749708575, "learning_rate": 1.325340327141289e-06, "loss": 0.10048370361328125, "step": 159255 }, { "epoch": 1.3770741281960381, "grad_norm": 6.406796771404762, "learning_rate": 1.3251712950079646e-06, "loss": 0.0717193603515625, "step": 159260 }, { "epoch": 1.3771173617175814, "grad_norm": 4.452538791017032, "learning_rate": 1.3250022705988023e-06, "loss": 0.03527374267578125, "step": 159265 }, { "epoch": 1.3771605952391246, "grad_norm": 0.7652236124446372, "learning_rate": 1.3248332539145803e-06, "loss": 0.09379768371582031, "step": 159270 }, { "epoch": 1.3772038287606678, "grad_norm": 3.1494329680681794, "learning_rate": 1.3246642449560807e-06, "loss": 0.0271759033203125, "step": 159275 }, { "epoch": 1.377247062282211, "grad_norm": 16.56074238408745, "learning_rate": 1.3244952437240812e-06, "loss": 0.16802330017089845, "step": 159280 }, { "epoch": 1.3772902958037543, "grad_norm": 14.551905050485813, "learning_rate": 1.3243262502193607e-06, "loss": 0.0351654052734375, "step": 159285 }, { "epoch": 1.3773335293252977, "grad_norm": 4.541028547514014, "learning_rate": 1.3241572644427004e-06, "loss": 0.044959068298339844, "step": 159290 }, { "epoch": 1.377376762846841, "grad_norm": 5.714146955256165, "learning_rate": 1.323988286394879e-06, "loss": 0.01061553955078125, "step": 159295 }, { "epoch": 1.3774199963683842, "grad_norm": 0.11520046417591026, "learning_rate": 1.3238193160766752e-06, "loss": 0.18732471466064454, "step": 159300 }, { "epoch": 1.3774632298899274, "grad_norm": 19.711135007960877, "learning_rate": 1.3236503534888686e-06, "loss": 0.09923973083496093, "step": 159305 }, { "epoch": 1.3775064634114706, "grad_norm": 2.2268967703476332, "learning_rate": 1.3234813986322387e-06, "loss": 0.12952423095703125, "step": 159310 }, { "epoch": 1.377549696933014, "grad_norm": 3.064169650422368, "learning_rate": 1.3233124515075644e-06, "loss": 0.04690284729003906, "step": 159315 }, { "epoch": 1.3775929304545573, "grad_norm": 0.4503210276028591, "learning_rate": 1.3231435121156238e-06, "loss": 0.23010940551757814, "step": 159320 }, { "epoch": 1.3776361639761006, "grad_norm": 3.854745232768011, "learning_rate": 1.3229745804571983e-06, "loss": 0.074310302734375, "step": 159325 }, { "epoch": 1.3776793974976438, "grad_norm": 56.081944722777976, "learning_rate": 1.3228056565330646e-06, "loss": 0.162139892578125, "step": 159330 }, { "epoch": 1.377722631019187, "grad_norm": 9.299652562498514, "learning_rate": 1.3226367403440041e-06, "loss": 0.039983367919921874, "step": 159335 }, { "epoch": 1.3777658645407302, "grad_norm": 18.67941965130474, "learning_rate": 1.3224678318907948e-06, "loss": 0.27351722717285154, "step": 159340 }, { "epoch": 1.3778090980622735, "grad_norm": 2.9268130928672735, "learning_rate": 1.3222989311742157e-06, "loss": 0.080145263671875, "step": 159345 }, { "epoch": 1.3778523315838167, "grad_norm": 1.036602135707911, "learning_rate": 1.3221300381950447e-06, "loss": 0.07437744140625, "step": 159350 }, { "epoch": 1.3778955651053602, "grad_norm": 1.3926654154201377, "learning_rate": 1.3219611529540627e-06, "loss": 0.028302764892578124, "step": 159355 }, { "epoch": 1.3779387986269034, "grad_norm": 20.73632715754977, "learning_rate": 1.3217922754520473e-06, "loss": 0.05968780517578125, "step": 159360 }, { "epoch": 1.3779820321484466, "grad_norm": 4.701199985387431, "learning_rate": 1.3216234056897777e-06, "loss": 0.07747459411621094, "step": 159365 }, { "epoch": 1.3780252656699898, "grad_norm": 0.3595110424213008, "learning_rate": 1.3214545436680328e-06, "loss": 0.06030731201171875, "step": 159370 }, { "epoch": 1.378068499191533, "grad_norm": 1.2437221470702022, "learning_rate": 1.3212856893875913e-06, "loss": 0.06543769836425781, "step": 159375 }, { "epoch": 1.3781117327130765, "grad_norm": 0.42550116247651976, "learning_rate": 1.3211168428492308e-06, "loss": 0.013162994384765625, "step": 159380 }, { "epoch": 1.3781549662346197, "grad_norm": 3.982355474070888, "learning_rate": 1.3209480040537309e-06, "loss": 0.057661819458007815, "step": 159385 }, { "epoch": 1.378198199756163, "grad_norm": 9.631198132894012, "learning_rate": 1.3207791730018716e-06, "loss": 0.013869094848632812, "step": 159390 }, { "epoch": 1.3782414332777062, "grad_norm": 5.24438420156877, "learning_rate": 1.3206103496944307e-06, "loss": 0.04199676513671875, "step": 159395 }, { "epoch": 1.3782846667992494, "grad_norm": 2.048400327220306, "learning_rate": 1.320441534132186e-06, "loss": 0.033967208862304685, "step": 159400 }, { "epoch": 1.3783279003207927, "grad_norm": 7.413139093349057, "learning_rate": 1.3202727263159167e-06, "loss": 0.02301025390625, "step": 159405 }, { "epoch": 1.378371133842336, "grad_norm": 1.4477015955971138, "learning_rate": 1.3201039262464012e-06, "loss": 0.15075130462646485, "step": 159410 }, { "epoch": 1.3784143673638791, "grad_norm": 45.37072155789872, "learning_rate": 1.3199351339244167e-06, "loss": 0.21898651123046875, "step": 159415 }, { "epoch": 1.3784576008854226, "grad_norm": 0.890772026997948, "learning_rate": 1.3197663493507441e-06, "loss": 0.028192138671875, "step": 159420 }, { "epoch": 1.3785008344069658, "grad_norm": 6.185328775876612, "learning_rate": 1.3195975725261607e-06, "loss": 0.04618644714355469, "step": 159425 }, { "epoch": 1.378544067928509, "grad_norm": 1.53538574345127, "learning_rate": 1.3194288034514448e-06, "loss": 0.169293212890625, "step": 159430 }, { "epoch": 1.3785873014500523, "grad_norm": 1.5755143574661252, "learning_rate": 1.3192600421273735e-06, "loss": 0.181402587890625, "step": 159435 }, { "epoch": 1.3786305349715957, "grad_norm": 12.688438668438536, "learning_rate": 1.3190912885547278e-06, "loss": 0.131475830078125, "step": 159440 }, { "epoch": 1.378673768493139, "grad_norm": 16.165723589885072, "learning_rate": 1.3189225427342833e-06, "loss": 0.07613525390625, "step": 159445 }, { "epoch": 1.3787170020146822, "grad_norm": 0.050056215960009455, "learning_rate": 1.3187538046668207e-06, "loss": 0.007546043395996094, "step": 159450 }, { "epoch": 1.3787602355362254, "grad_norm": 19.82843155740657, "learning_rate": 1.3185850743531166e-06, "loss": 0.0894805908203125, "step": 159455 }, { "epoch": 1.3788034690577686, "grad_norm": 3.346677106790348, "learning_rate": 1.3184163517939499e-06, "loss": 0.05326900482177734, "step": 159460 }, { "epoch": 1.3788467025793119, "grad_norm": 0.3145415362241521, "learning_rate": 1.3182476369900982e-06, "loss": 0.05095863342285156, "step": 159465 }, { "epoch": 1.378889936100855, "grad_norm": 0.7201165711210366, "learning_rate": 1.3180789299423399e-06, "loss": 0.015885162353515624, "step": 159470 }, { "epoch": 1.3789331696223983, "grad_norm": 7.120089186561511, "learning_rate": 1.3179102306514532e-06, "loss": 0.06736831665039063, "step": 159475 }, { "epoch": 1.3789764031439418, "grad_norm": 0.4803298351970695, "learning_rate": 1.3177415391182144e-06, "loss": 0.01479930877685547, "step": 159480 }, { "epoch": 1.379019636665485, "grad_norm": 3.099115171012203, "learning_rate": 1.317572855343404e-06, "loss": 0.27622184753417967, "step": 159485 }, { "epoch": 1.3790628701870282, "grad_norm": 12.783443077341817, "learning_rate": 1.3174041793277993e-06, "loss": 0.1453948974609375, "step": 159490 }, { "epoch": 1.3791061037085715, "grad_norm": 0.3560117743884063, "learning_rate": 1.3172355110721766e-06, "loss": 0.14096603393554688, "step": 159495 }, { "epoch": 1.3791493372301147, "grad_norm": 7.842778566817809, "learning_rate": 1.317066850577316e-06, "loss": 0.0432647705078125, "step": 159500 }, { "epoch": 1.3791925707516581, "grad_norm": 5.428431628556406, "learning_rate": 1.3168981978439948e-06, "loss": 0.048066329956054685, "step": 159505 }, { "epoch": 1.3792358042732014, "grad_norm": 3.7230304080896426, "learning_rate": 1.3167295528729887e-06, "loss": 0.058446311950683595, "step": 159510 }, { "epoch": 1.3792790377947446, "grad_norm": 10.07977259212207, "learning_rate": 1.3165609156650789e-06, "loss": 0.0676919937133789, "step": 159515 }, { "epoch": 1.3793222713162878, "grad_norm": 3.4098255864416616, "learning_rate": 1.316392286221041e-06, "loss": 0.03165931701660156, "step": 159520 }, { "epoch": 1.379365504837831, "grad_norm": 4.006823729700269, "learning_rate": 1.3162236645416533e-06, "loss": 0.035823822021484375, "step": 159525 }, { "epoch": 1.3794087383593743, "grad_norm": 4.212001779033527, "learning_rate": 1.3160550506276931e-06, "loss": 0.05942459106445312, "step": 159530 }, { "epoch": 1.3794519718809175, "grad_norm": 6.809382549748102, "learning_rate": 1.3158864444799383e-06, "loss": 0.044001007080078126, "step": 159535 }, { "epoch": 1.3794952054024607, "grad_norm": 1.9505638069931301, "learning_rate": 1.3157178460991652e-06, "loss": 0.046779632568359375, "step": 159540 }, { "epoch": 1.3795384389240042, "grad_norm": 3.8259800355981195, "learning_rate": 1.3155492554861535e-06, "loss": 0.0403656005859375, "step": 159545 }, { "epoch": 1.3795816724455474, "grad_norm": 2.299475813908187, "learning_rate": 1.3153806726416803e-06, "loss": 0.022064971923828124, "step": 159550 }, { "epoch": 1.3796249059670906, "grad_norm": 15.836604182030094, "learning_rate": 1.315212097566521e-06, "loss": 0.052191162109375, "step": 159555 }, { "epoch": 1.3796681394886339, "grad_norm": 19.25944506945715, "learning_rate": 1.3150435302614562e-06, "loss": 0.09075813293457032, "step": 159560 }, { "epoch": 1.379711373010177, "grad_norm": 12.763581907328694, "learning_rate": 1.3148749707272614e-06, "loss": 0.09380950927734374, "step": 159565 }, { "epoch": 1.3797546065317206, "grad_norm": 14.890694326415545, "learning_rate": 1.3147064189647143e-06, "loss": 0.15849227905273439, "step": 159570 }, { "epoch": 1.3797978400532638, "grad_norm": 1.6481386239275255, "learning_rate": 1.3145378749745915e-06, "loss": 0.060783004760742186, "step": 159575 }, { "epoch": 1.379841073574807, "grad_norm": 2.4104980000467644, "learning_rate": 1.314369338757672e-06, "loss": 0.11544418334960938, "step": 159580 }, { "epoch": 1.3798843070963502, "grad_norm": 7.997426764155116, "learning_rate": 1.314200810314732e-06, "loss": 0.040020751953125, "step": 159585 }, { "epoch": 1.3799275406178935, "grad_norm": 12.3055666960364, "learning_rate": 1.314032289646549e-06, "loss": 0.07288131713867188, "step": 159590 }, { "epoch": 1.3799707741394367, "grad_norm": 7.039921324261148, "learning_rate": 1.3138637767539003e-06, "loss": 0.035977745056152345, "step": 159595 }, { "epoch": 1.38001400766098, "grad_norm": 1.7421904956034573, "learning_rate": 1.3136952716375629e-06, "loss": 0.07640590667724609, "step": 159600 }, { "epoch": 1.3800572411825232, "grad_norm": 7.251611319634603, "learning_rate": 1.3135267742983127e-06, "loss": 0.03808441162109375, "step": 159605 }, { "epoch": 1.3801004747040666, "grad_norm": 0.47717606257611106, "learning_rate": 1.3133582847369278e-06, "loss": 0.1676311492919922, "step": 159610 }, { "epoch": 1.3801437082256098, "grad_norm": 10.970608582232487, "learning_rate": 1.313189802954187e-06, "loss": 0.09243850708007813, "step": 159615 }, { "epoch": 1.380186941747153, "grad_norm": 9.404541816409873, "learning_rate": 1.3130213289508655e-06, "loss": 0.067041015625, "step": 159620 }, { "epoch": 1.3802301752686963, "grad_norm": 28.2259140761786, "learning_rate": 1.3128528627277405e-06, "loss": 0.09124298095703125, "step": 159625 }, { "epoch": 1.3802734087902395, "grad_norm": 15.559163942574507, "learning_rate": 1.3126844042855891e-06, "loss": 0.0506439208984375, "step": 159630 }, { "epoch": 1.380316642311783, "grad_norm": 1.1841984206169682, "learning_rate": 1.3125159536251869e-06, "loss": 0.014294910430908202, "step": 159635 }, { "epoch": 1.3803598758333262, "grad_norm": 0.5258095470538824, "learning_rate": 1.3123475107473131e-06, "loss": 0.0823211669921875, "step": 159640 }, { "epoch": 1.3804031093548694, "grad_norm": 29.596123323608573, "learning_rate": 1.3121790756527438e-06, "loss": 0.06581954956054688, "step": 159645 }, { "epoch": 1.3804463428764127, "grad_norm": 0.2018299934433699, "learning_rate": 1.312010648342255e-06, "loss": 0.05464324951171875, "step": 159650 }, { "epoch": 1.3804895763979559, "grad_norm": 4.52852363346308, "learning_rate": 1.3118422288166242e-06, "loss": 0.02959785461425781, "step": 159655 }, { "epoch": 1.3805328099194991, "grad_norm": 0.24532196978179138, "learning_rate": 1.3116738170766271e-06, "loss": 0.0096923828125, "step": 159660 }, { "epoch": 1.3805760434410423, "grad_norm": 0.34075933959941046, "learning_rate": 1.3115054131230416e-06, "loss": 0.08335762023925782, "step": 159665 }, { "epoch": 1.3806192769625856, "grad_norm": 0.11329781940059201, "learning_rate": 1.3113370169566435e-06, "loss": 0.028769683837890626, "step": 159670 }, { "epoch": 1.380662510484129, "grad_norm": 0.06844483326432416, "learning_rate": 1.3111686285782105e-06, "loss": 0.1636871337890625, "step": 159675 }, { "epoch": 1.3807057440056723, "grad_norm": 8.356842435629947, "learning_rate": 1.3110002479885185e-06, "loss": 0.046948814392089845, "step": 159680 }, { "epoch": 1.3807489775272155, "grad_norm": 8.510470897289292, "learning_rate": 1.3108318751883443e-06, "loss": 0.22559471130371095, "step": 159685 }, { "epoch": 1.3807922110487587, "grad_norm": 6.08904565464845, "learning_rate": 1.3106635101784645e-06, "loss": 0.0269287109375, "step": 159690 }, { "epoch": 1.3808354445703022, "grad_norm": 0.8069758832532916, "learning_rate": 1.310495152959655e-06, "loss": 0.06162128448486328, "step": 159695 }, { "epoch": 1.3808786780918454, "grad_norm": 0.5953716543823903, "learning_rate": 1.3103268035326917e-06, "loss": 0.034704971313476565, "step": 159700 }, { "epoch": 1.3809219116133886, "grad_norm": 2.439919936518466, "learning_rate": 1.3101584618983527e-06, "loss": 0.05953712463378906, "step": 159705 }, { "epoch": 1.3809651451349318, "grad_norm": 4.300488539007833, "learning_rate": 1.3099901280574135e-06, "loss": 0.05989151000976563, "step": 159710 }, { "epoch": 1.381008378656475, "grad_norm": 8.437143028017273, "learning_rate": 1.3098218020106508e-06, "loss": 0.06369476318359375, "step": 159715 }, { "epoch": 1.3810516121780183, "grad_norm": 1.0818242321911509, "learning_rate": 1.3096534837588395e-06, "loss": 0.15223464965820313, "step": 159720 }, { "epoch": 1.3810948456995615, "grad_norm": 1.173456621564503, "learning_rate": 1.3094851733027578e-06, "loss": 0.0486541748046875, "step": 159725 }, { "epoch": 1.3811380792211048, "grad_norm": 2.8164381730031063, "learning_rate": 1.3093168706431802e-06, "loss": 0.09335403442382813, "step": 159730 }, { "epoch": 1.3811813127426482, "grad_norm": 0.5483048378840176, "learning_rate": 1.3091485757808848e-06, "loss": 0.025816726684570312, "step": 159735 }, { "epoch": 1.3812245462641914, "grad_norm": 4.662161313132062, "learning_rate": 1.3089802887166468e-06, "loss": 0.09678306579589843, "step": 159740 }, { "epoch": 1.3812677797857347, "grad_norm": 1.1938053816668446, "learning_rate": 1.3088120094512423e-06, "loss": 0.0221099853515625, "step": 159745 }, { "epoch": 1.381311013307278, "grad_norm": 0.7371492507250682, "learning_rate": 1.3086437379854472e-06, "loss": 0.07946548461914063, "step": 159750 }, { "epoch": 1.3813542468288211, "grad_norm": 0.144553268124329, "learning_rate": 1.3084754743200383e-06, "loss": 0.08984298706054687, "step": 159755 }, { "epoch": 1.3813974803503646, "grad_norm": 0.2876331358971767, "learning_rate": 1.3083072184557905e-06, "loss": 0.13507461547851562, "step": 159760 }, { "epoch": 1.3814407138719078, "grad_norm": 22.680597200117084, "learning_rate": 1.3081389703934793e-06, "loss": 0.14390640258789061, "step": 159765 }, { "epoch": 1.381483947393451, "grad_norm": 2.8673969857357164, "learning_rate": 1.307970730133883e-06, "loss": 0.0268341064453125, "step": 159770 }, { "epoch": 1.3815271809149943, "grad_norm": 2.2554740758363474, "learning_rate": 1.307802497677776e-06, "loss": 0.009313583374023438, "step": 159775 }, { "epoch": 1.3815704144365375, "grad_norm": 1.7332736515183875, "learning_rate": 1.3076342730259333e-06, "loss": 0.165643310546875, "step": 159780 }, { "epoch": 1.3816136479580807, "grad_norm": 0.23483198617148718, "learning_rate": 1.307466056179133e-06, "loss": 0.01107025146484375, "step": 159785 }, { "epoch": 1.381656881479624, "grad_norm": 11.016372892510335, "learning_rate": 1.3072978471381495e-06, "loss": 0.11655502319335938, "step": 159790 }, { "epoch": 1.3817001150011672, "grad_norm": 9.773502115446723, "learning_rate": 1.3071296459037575e-06, "loss": 0.0565277099609375, "step": 159795 }, { "epoch": 1.3817433485227106, "grad_norm": 0.4425589621750117, "learning_rate": 1.3069614524767356e-06, "loss": 0.009210586547851562, "step": 159800 }, { "epoch": 1.3817865820442539, "grad_norm": 13.286647632355862, "learning_rate": 1.3067932668578575e-06, "loss": 0.099578857421875, "step": 159805 }, { "epoch": 1.381829815565797, "grad_norm": 3.432540893526164, "learning_rate": 1.3066250890478993e-06, "loss": 0.021803665161132812, "step": 159810 }, { "epoch": 1.3818730490873403, "grad_norm": 69.35514354007644, "learning_rate": 1.3064569190476365e-06, "loss": 0.2120513916015625, "step": 159815 }, { "epoch": 1.3819162826088836, "grad_norm": 0.16946767701264956, "learning_rate": 1.306288756857845e-06, "loss": 0.11754684448242188, "step": 159820 }, { "epoch": 1.381959516130427, "grad_norm": 10.985134865347058, "learning_rate": 1.3061206024792987e-06, "loss": 0.04627532958984375, "step": 159825 }, { "epoch": 1.3820027496519702, "grad_norm": 8.284725426495681, "learning_rate": 1.305952455912776e-06, "loss": 0.04828643798828125, "step": 159830 }, { "epoch": 1.3820459831735135, "grad_norm": 0.6307204604474492, "learning_rate": 1.3057843171590495e-06, "loss": 0.14177932739257812, "step": 159835 }, { "epoch": 1.3820892166950567, "grad_norm": 0.7869906064208414, "learning_rate": 1.305616186218897e-06, "loss": 0.14135208129882812, "step": 159840 }, { "epoch": 1.3821324502166, "grad_norm": 2.0297595417376217, "learning_rate": 1.3054480630930934e-06, "loss": 0.017196273803710936, "step": 159845 }, { "epoch": 1.3821756837381431, "grad_norm": 73.31430257563711, "learning_rate": 1.305279947782413e-06, "loss": 0.2669075012207031, "step": 159850 }, { "epoch": 1.3822189172596864, "grad_norm": 0.714077442834735, "learning_rate": 1.3051118402876322e-06, "loss": 0.24109916687011718, "step": 159855 }, { "epoch": 1.3822621507812296, "grad_norm": 1.6747610407786298, "learning_rate": 1.3049437406095245e-06, "loss": 0.07622318267822266, "step": 159860 }, { "epoch": 1.382305384302773, "grad_norm": 1.0597776280247264, "learning_rate": 1.3047756487488677e-06, "loss": 0.25160064697265627, "step": 159865 }, { "epoch": 1.3823486178243163, "grad_norm": 0.5458807353156306, "learning_rate": 1.3046075647064357e-06, "loss": 0.04935741424560547, "step": 159870 }, { "epoch": 1.3823918513458595, "grad_norm": 2.739623001197913, "learning_rate": 1.3044394884830039e-06, "loss": 0.033827590942382815, "step": 159875 }, { "epoch": 1.3824350848674027, "grad_norm": 2.129823132977197, "learning_rate": 1.3042714200793471e-06, "loss": 0.06484794616699219, "step": 159880 }, { "epoch": 1.382478318388946, "grad_norm": 2.145294671777527, "learning_rate": 1.3041033594962401e-06, "loss": 0.0422210693359375, "step": 159885 }, { "epoch": 1.3825215519104894, "grad_norm": 1.9222353320600352, "learning_rate": 1.3039353067344594e-06, "loss": 0.01450958251953125, "step": 159890 }, { "epoch": 1.3825647854320327, "grad_norm": 7.074921037570957, "learning_rate": 1.3037672617947778e-06, "loss": 0.056926727294921875, "step": 159895 }, { "epoch": 1.3826080189535759, "grad_norm": 15.185936882081608, "learning_rate": 1.303599224677973e-06, "loss": 0.024480819702148438, "step": 159900 }, { "epoch": 1.382651252475119, "grad_norm": 4.496142890194366, "learning_rate": 1.303431195384819e-06, "loss": 0.015772056579589844, "step": 159905 }, { "epoch": 1.3826944859966623, "grad_norm": 14.793088773061939, "learning_rate": 1.30326317391609e-06, "loss": 0.18913497924804687, "step": 159910 }, { "epoch": 1.3827377195182056, "grad_norm": 4.966409823436136, "learning_rate": 1.3030951602725615e-06, "loss": 0.049514389038085936, "step": 159915 }, { "epoch": 1.3827809530397488, "grad_norm": 0.42586344846848884, "learning_rate": 1.3029271544550084e-06, "loss": 0.020489883422851563, "step": 159920 }, { "epoch": 1.3828241865612922, "grad_norm": 0.6251355676401887, "learning_rate": 1.3027591564642036e-06, "loss": 0.036151885986328125, "step": 159925 }, { "epoch": 1.3828674200828355, "grad_norm": 0.03386357889005115, "learning_rate": 1.3025911663009251e-06, "loss": 0.008979988098144532, "step": 159930 }, { "epoch": 1.3829106536043787, "grad_norm": 0.328971338030499, "learning_rate": 1.3024231839659461e-06, "loss": 0.025787353515625, "step": 159935 }, { "epoch": 1.382953887125922, "grad_norm": 4.009244344327893, "learning_rate": 1.3022552094600414e-06, "loss": 0.02331695556640625, "step": 159940 }, { "epoch": 1.3829971206474652, "grad_norm": 1.8111688717362824, "learning_rate": 1.3020872427839846e-06, "loss": 0.029900169372558592, "step": 159945 }, { "epoch": 1.3830403541690086, "grad_norm": 43.686276261647016, "learning_rate": 1.3019192839385525e-06, "loss": 0.5469741821289062, "step": 159950 }, { "epoch": 1.3830835876905518, "grad_norm": 2.7505552241910602, "learning_rate": 1.3017513329245176e-06, "loss": 0.026381301879882812, "step": 159955 }, { "epoch": 1.383126821212095, "grad_norm": 4.08467450216329, "learning_rate": 1.3015833897426564e-06, "loss": 0.21733455657958983, "step": 159960 }, { "epoch": 1.3831700547336383, "grad_norm": 4.512904796841489, "learning_rate": 1.3014154543937427e-06, "loss": 0.22695083618164064, "step": 159965 }, { "epoch": 1.3832132882551815, "grad_norm": 0.10203910075324567, "learning_rate": 1.3012475268785506e-06, "loss": 0.00409698486328125, "step": 159970 }, { "epoch": 1.3832565217767248, "grad_norm": 0.5413809388555905, "learning_rate": 1.301079607197855e-06, "loss": 0.05012931823730469, "step": 159975 }, { "epoch": 1.383299755298268, "grad_norm": 1.1485615307731951, "learning_rate": 1.3009116953524302e-06, "loss": 0.003536224365234375, "step": 159980 }, { "epoch": 1.3833429888198112, "grad_norm": 32.530275704290325, "learning_rate": 1.300743791343049e-06, "loss": 0.2203968048095703, "step": 159985 }, { "epoch": 1.3833862223413547, "grad_norm": 0.6171682909505954, "learning_rate": 1.3005758951704889e-06, "loss": 0.019170379638671874, "step": 159990 }, { "epoch": 1.383429455862898, "grad_norm": 6.658972955276942, "learning_rate": 1.3004080068355223e-06, "loss": 0.1140289306640625, "step": 159995 }, { "epoch": 1.3834726893844411, "grad_norm": 0.629194708620014, "learning_rate": 1.300240126338924e-06, "loss": 0.1167572021484375, "step": 160000 }, { "epoch": 1.3834726893844411, "eval_loss": 0.1266646385192871, "eval_margin": 0.16131319105625153, "eval_mean_neg": -0.00310450978577137, "eval_mean_pos": 0.7200332880020142, "eval_runtime": 31.1721, "eval_samples_per_second": 7.41, "eval_steps_per_second": 3.721, "step": 160000 } ], "logging_steps": 5, "max_steps": 231302, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 40000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 38457055641600.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }